diff --git a/infer/src/IR/Annot.ml b/infer/src/IR/Annot.ml index 16ca03fbe..37e753dfd 100644 --- a/infer/src/IR/Annot.ml +++ b/infer/src/IR/Annot.ml @@ -19,10 +19,6 @@ type t = ; parameters: parameters (** currently only one string parameter *) } [@@deriving compare] -let equal = [%compare.equal: t] - -let hash = Hashtbl.hash - let volatile = {class_name= "volatile"; parameters= []} let final = {class_name= "final"; parameters= []} diff --git a/infer/src/IR/Annot.mli b/infer/src/IR/Annot.mli index 50fba1d72..6e3e7b8b2 100644 --- a/infer/src/IR/Annot.mli +++ b/infer/src/IR/Annot.mli @@ -19,10 +19,6 @@ type t = ; parameters: parameters (** currently only one string parameter *) } [@@deriving compare] -val equal : t -> t -> bool - -val hash : t -> int - val volatile : t (** annotation for fields marked with the "volatile" keyword *) diff --git a/infer/src/IR/Tenv.ml b/infer/src/IR/Tenv.ml index 3c0027f5c..887f03432 100644 --- a/infer/src/IR/Tenv.ml +++ b/infer/src/IR/Tenv.ml @@ -5,13 +5,14 @@ * LICENSE file in the root directory of this source tree. *) open! IStd -module Hashtbl = Caml.Hashtbl module L = Logging (** Module for Type Environments. *) (** Hash tables on type names. *) -module TypenameHash = Hashtbl.Make (Typ.Name) +module TypenameHash = Caml.Hashtbl.Make (Typ.Name) + +module TypenameHashNormalizer = MaximumSharing.ForHashtbl (TypenameHash) (** Type for type environment. *) type t = Typ.Struct.t TypenameHash.t @@ -161,142 +162,12 @@ let store_to_filename tenv tenv_filename = if Config.debug_mode then store_debug_file tenv tenv_filename -(** Use a prime for the initial hashtable size, since, typically, we expect a lot of types. - Prime sizes make hash functions happier across table resizes. *) -let medium_size_prime = 1003 - -module type HashconsS = sig - module Hashtable : Caml.Hashtbl.S - - val reset : unit -> unit - - val hashcons : Hashtable.key -> Hashtable.key -end - -module Hashcons (H : Caml.Hashtbl.S) : HashconsS with module Hashtable = H = struct - module Hashtable = H - - let reset, hashcons = - let tbl : H.key H.t = H.create medium_size_prime in - ( (fun () -> H.reset tbl) - , fun x -> match H.find_opt tbl x with Some x' -> x' | None -> H.add tbl x x ; x ) -end - -module HashconsList (T : Caml.Hashtbl.HashedType) (H : HashconsS with type Hashtable.key = T.t) : - HashconsS with type Hashtable.key = T.t list = struct - module Hashtable = Hashtbl.Make (struct - type t = T.t list - - let hash = Hashtbl.hash - - let equal (x : t) (y : t) = - let open Polymorphic_compare in - x = y - end) - - let reset, hashcons = - let tbl : T.t list Hashtable.t = Hashtable.create medium_size_prime in - ( (fun () -> Hashtable.reset tbl) - , fun x -> - let x = IList.map_changed x ~equal:phys_equal ~f:H.hashcons in - match Hashtable.find_opt tbl x with Some x' -> x' | None -> Hashtable.add tbl x x ; x ) -end - -module PnameHC = Hashcons (Typ.Procname.Hash) -module PnameListHC = HashconsList (Typ.Procname) (PnameHC) -module TnameHC = Hashcons (TypenameHash) -module TnameListHC = HashconsList (Typ.Name) (TnameHC) -module StringHC = Hashcons (Hashtbl.Make (String)) -module StringListHC = HashconsList (String) (StringHC) -module FieldnameHC = Hashcons (Hashtbl.Make (Typ.Fieldname)) -module TypHC = Hashcons (Hashtbl.Make (Typ)) - -module AnnotHC = struct - include Hashcons (Hashtbl.Make (Annot)) - - let hashcons ({class_name; parameters} : Annot.t) : Annot.t = - {class_name= StringHC.hashcons class_name; parameters= StringListHC.hashcons parameters} -end - -module AnnotVis = struct - type t = Annot.t * bool [@@deriving compare] - - let equal = [%compare.equal: t] - - let hash = Hashtbl.hash -end - -module AnnotVisHC = struct - include Hashcons (Hashtbl.Make (AnnotVis)) - - let hashcons (annot, visibility) = hashcons (AnnotHC.hashcons annot, visibility) -end - -module AnnotItemHC = HashconsList (AnnotVis) (AnnotVisHC) - -module Field = struct - type t = Typ.Fieldname.t * Typ.t * Annot.Item.t [@@deriving compare] - - let equal = [%compare.equal: t] - - let hash = Hashtbl.hash -end - -module FieldHC = struct - include Hashcons (Hashtbl.Make (Field)) - - let hashcons (fieldname, typ, annot_item) = - hashcons (FieldnameHC.hashcons fieldname, TypHC.hashcons typ, AnnotItemHC.hashcons annot_item) -end - -module FieldListHC = HashconsList (Field) (FieldHC) - -let reset_hashtables () = - PnameHC.reset () ; - PnameListHC.reset () ; - TnameHC.reset () ; - TnameListHC.reset () ; - StringHC.reset () ; - StringListHC.reset () ; - FieldnameHC.reset () ; - TypHC.reset () ; - AnnotHC.reset () ; - AnnotVisHC.reset () ; - AnnotItemHC.reset () ; - FieldHC.reset () ; - FieldListHC.reset () - - -(** Global tenv size is a problem in the genrule capture integration for java. - This function tries to improve sharing of values in the tenv, and assumes - Java data structures (it's still correct for Clangs, just not necessarily - as effective. *) -let canonicalize tenv = - reset_hashtables () ; - let result = create () in - let canonicalize_one tname - ({fields; statics; supers; methods; exported_objc_methods; annots} : Typ.Struct.t) = - let tname = TnameHC.hashcons tname in - let tstruct = - Typ.Struct.internal_mk_struct ~supers:(TnameListHC.hashcons supers) - ~fields:(FieldListHC.hashcons fields) ~statics:(FieldListHC.hashcons statics) - ~methods:(PnameListHC.hashcons methods) - ~exported_objc_methods:(PnameListHC.hashcons exported_objc_methods) - ~annots:(AnnotItemHC.hashcons annots) () - in - TypenameHash.add result tname tstruct - in - TypenameHash.iter canonicalize_one tenv ; - reset_hashtables () ; - result - - let store_global tenv = (* update in-memory global tenv for later uses by this process, e.g. in single-core mode the frontend and backend run in the same process *) L.debug Capture Quiet "Tenv.store: global tenv has size %d bytes.@." (Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ; - let tenv = canonicalize tenv in + let tenv = TypenameHashNormalizer.normalize tenv in L.debug Capture Quiet "Tenv.store: canonicalized tenv has size %d bytes.@." (Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ; global_tenv := Some tenv ; diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 769de621f..07cd36b23 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -231,8 +231,6 @@ end include T -let hash = Hashtbl.hash - let mk_type_quals ?default ?is_const ?is_restrict ?is_volatile () = let default_ = {is_const= false; is_restrict= false; is_volatile= false} in let mk_aux ?(default = default_) ?(is_const = default.is_const) @@ -1394,8 +1392,6 @@ module Fieldname = struct let equal = [%compare.equal: t] - let hash = Hashtbl.hash - module T = struct type nonrec t = t diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index bbd23408a..a0dc9d129 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -127,8 +127,6 @@ and template_spec_info = ; args: template_arg list } [@@deriving compare] -val hash : t -> int - val pp_template_spec_info : Pp.env -> F.formatter -> template_spec_info -> unit [@@warning "-32"] val mk : ?default:t -> ?quals:type_quals -> desc -> t @@ -627,9 +625,6 @@ module Fieldname : sig type t [@@deriving compare] val equal : t -> t -> bool - - val hash : t -> int - (** Equality for field names. *) (** Set for fieldnames *) diff --git a/infer/src/istd/MaximumSharing.ml b/infer/src/istd/MaximumSharing.ml new file mode 100644 index 000000000..b66d61cd8 --- /dev/null +++ b/infer/src/istd/MaximumSharing.ml @@ -0,0 +1,172 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +exception MaximumSharingLazyValue + +module Hashing : sig + type hash_value = private int + + type state + + val of_int : int -> hash_value + + val of_no_scan_block : 'a -> hash_value + + val alloc_of_block : tag:int -> size:int -> state + + val fold_hash_value : state -> hash_value -> state + + val get_hash_value : state -> hash_value +end = struct + type hash_value = Hash.hash_value + + type state = Hash.state + + let of_int = Fn.id + + let of_no_scan_block = Caml.Hashtbl.hash + + let alloc_of_block ~tag ~size = + let state = Hash.alloc () in + let state = Hash.fold_int state tag in + let state = Hash.fold_int state size in + state + + + let fold_hash_value = Hash.fold_int + + let get_hash_value = Hash.get_hash_value +end + +module Sharer : sig + type t + + val create : unit -> t + + val normalize_value : t -> 'a -> 'a +end = struct + module HashedNormalizedObj = struct + type t = Hashing.hash_value * Obj.t + + let equal ((h1, o1) : t) ((h2, o2) : t) = + Int.equal (h1 :> int) (h2 :> int) + && + if Obj.tag o1 >= Obj.no_scan_tag then Polymorphic_compare.equal o1 o2 + else PhysEqual.shallow_equal o1 o2 + + + let hash ((h, _) : t) = (h :> int) + end + + module H = Caml.Hashtbl.Make (HashedNormalizedObj) + + type t = + { inplace: bool + (** Uses [Obj.set_field] on possibly immutable values, hence should be avoided when used with flambda *) + ; hash_normalized: HashedNormalizedObj.t H.t + ; fail_on_forward: bool + ; fail_on_nonstring: bool + ; fail_on_objects: bool } + + let create () = + { inplace= false + ; hash_normalized= H.create 1 + ; (* these are just for safety because the code hasn't been tested on them, it should work fine though *) + fail_on_forward= true + ; fail_on_nonstring= true + ; fail_on_objects= true } + + + let normalize_block sharer hash block = + let hash_block = (hash, block) in + match H.find_opt sharer.hash_normalized hash_block with + | Some hash_normalized -> + hash_normalized + | None -> + H.add sharer.hash_normalized hash_block hash_block ; + hash_block + + + let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o) + + (* + TODO: currently the function explores the graph without sharing. It is possible to build values + on which this will behave exponentially. This could be avoided by keeping a hashtbl of visited + values. But it would be much more efficient to write it in C to be able to use the GC flags to + mark visited values. + *) + let rec hash_and_normalize_obj sharer o = + if Obj.is_int o then hash_and_normalize_int o else hash_and_normalize_block sharer o + + + and hash_and_normalize_block sharer block = + let tag = Obj.tag block in + if tag >= Obj.no_scan_tag then ( + assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ; + hash_and_normalize_no_scan_block sharer block ) + else if Int.equal tag Obj.forward_tag then ( + assert (not sharer.fail_on_forward) ; + hash_and_normalize_obj sharer (Obj.field block 0) ) + else if Int.equal tag Obj.lazy_tag then raise MaximumSharingLazyValue + else ( + assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ; + let size = Obj.size block in + hash_and_normalize_block_fields sharer block block size 0 (Hashing.alloc_of_block ~tag ~size) ) + + + and hash_and_normalize_block_fields sharer original_block new_block size field_i hash_state = + if field_i >= size then normalize_block sharer (Hashing.get_hash_value hash_state) new_block + else + let field_v = Obj.field original_block field_i in + let field_hash, field_v' = hash_and_normalize_obj sharer field_v in + let hash_state = Hashing.fold_hash_value hash_state field_hash in + let new_block = + if phys_equal field_v field_v' then new_block + else + let new_block = + if phys_equal original_block new_block && not sharer.inplace then + (* copy-on-write *) + Obj.dup original_block + else new_block + in + Obj.set_field new_block field_i field_v' ; + new_block + in + (hash_and_normalize_block_fields [@tailcall]) sharer original_block new_block size + (field_i + 1) hash_state + + + and hash_and_normalize_no_scan_block sharer block = + normalize_block sharer (Hashing.of_no_scan_block block) block + + + (** + Returns a value structurally equal but with potentially more sharing. + Potentially unsafe if used on mutable values that are modified later. + Preserves polymorphic compare, hashing, no-sharing marshalling. + May have an impact on code using [phys_equal] or marshalling with sharing. + *) + let normalize_value sharer v = hash_and_normalize_obj sharer (Obj.repr v) |> snd |> Obj.obj +end + +module ForHashtbl (H : Caml.Hashtbl.S) = struct + let normalize h = + let sharer = Sharer.create () in + (* If a hash table has been created with [add] and not [replace] only, it is possible to + have several values for a given key. We need to collect them all and reinsert them in + the reverse order. *) + let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in + (* No need to preserve the initial size of the original hash table *) + let h' = H.create (H.length h) in + List.iter rev_bindings ~f:(fun (k, v) -> + let k' = Sharer.normalize_value sharer k in + let v' = Sharer.normalize_value sharer v in + H.add h' k' v' ) ; + h' +end diff --git a/infer/src/istd/MaximumSharing.mli b/infer/src/istd/MaximumSharing.mli new file mode 100644 index 000000000..3b5bb5f22 --- /dev/null +++ b/infer/src/istd/MaximumSharing.mli @@ -0,0 +1,20 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +exception MaximumSharingLazyValue + +(** + Current implementation will stack overflow on deep (TODO: a tailrec version) + or circular values (much harder to detect sharing, also not needed for now). +*) + +module ForHashtbl (H : Caml.Hashtbl.S) : sig + val normalize : 'a H.t -> 'a H.t + (** Duplicate a hash table with maximum sharing. *) +end diff --git a/infer/src/istd/PhysEqual.ml b/infer/src/istd/PhysEqual.ml index 753f6b04b..2fe02e038 100644 --- a/infer/src/istd/PhysEqual.ml +++ b/infer/src/istd/PhysEqual.ml @@ -11,7 +11,7 @@ let rec compare_fields ox oy i = i < 0 || (phys_equal (Obj.field ox i) (Obj.field oy i) && compare_fields ox oy (i - 1)) -let shallow_compare x y = +let shallow_equal x y = phys_equal x y || let ox = Obj.repr x in @@ -25,7 +25,7 @@ let shallow_compare x y = Int.equal sx sy && compare_fields ox oy (sx - 1) -let optim1 ~res x = if shallow_compare res x then x else res +let optim1 ~res x = if shallow_equal res x then x else res let optim2 ~res x1 x2 = - if shallow_compare res x1 then x1 else if shallow_compare res x2 then x2 else res + if shallow_equal res x1 then x1 else if shallow_equal res x2 then x2 else res diff --git a/infer/src/istd/PhysEqual.mli b/infer/src/istd/PhysEqual.mli index 4efef660d..6f143ff6d 100644 --- a/infer/src/istd/PhysEqual.mli +++ b/infer/src/istd/PhysEqual.mli @@ -7,6 +7,8 @@ open! IStd +val shallow_equal : 'a -> 'a -> bool + (** Helpers function to enforce physical equality.