diff --git a/infer/src/IR/Annot.ml b/infer/src/IR/Annot.ml index 7f62a4027..757e7a18d 100644 --- a/infer/src/IR/Annot.ml +++ b/infer/src/IR/Annot.ml @@ -28,8 +28,6 @@ and value = | Class of Typ.t | Annot of t -let equal = [%compare.equal: t] - let volatile = {class_name= "volatile"; parameters= []} let final = {class_name= "final"; parameters= []} @@ -84,6 +82,80 @@ and pp fmt annotation = F.fprintf fmt "(%a)" (F.pp_print_list ~pp_sep:comma_sep pp_parameter) annotation.parameters +module rec ValueNormalizer : (HashNormalizer.S with type t = value) = struct + module rec V : (HashNormalizer.NormalizedT with type t = value) = struct + type t = value [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize value = + match value with + | Str str -> + let str' = HashNormalizer.StringNormalizer.normalize str in + if phys_equal str str' then value else Str str' + | Bool _ -> + value + | Enum {class_typ; value= str_value} -> + let class_typ' = Typ.Normalizer.normalize class_typ in + let str_value' = HashNormalizer.StringNormalizer.normalize str_value in + if phys_equal class_typ class_typ' && phys_equal str_value str_value' then value + else Enum {class_typ= class_typ'; value= str_value'} + | Array list -> + let list' = IList.map_changed list ~equal:phys_equal ~f:N.normalize in + if phys_equal list list' then value else Array list' + | Class typ -> + let typ' = Typ.Normalizer.normalize typ in + if phys_equal typ typ' then value else Class typ' + | Annot t -> + let t' = TNormalizer.normalize t in + if phys_equal t t' then value else Annot t' + end + + and N : (HashNormalizer.S with type t = V.t) = HashNormalizer.Make (V) + + include N +end + +and ParameterNormalizer : (HashNormalizer.S with type t = parameter) = HashNormalizer.Make (struct + type t = parameter [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize_str_opt str_opt = + IOption.map_changed str_opt ~equal:phys_equal ~f:HashNormalizer.StringNormalizer.normalize + + + let normalize parameter = + let name = normalize_str_opt parameter.name in + let value = ValueNormalizer.normalize parameter.value in + if phys_equal name parameter.name && phys_equal value parameter.value then parameter + else {name; value} +end) + +and TNormalizer : (HashNormalizer.S with type t = t) = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let class_name = HashNormalizer.StringNormalizer.normalize t.class_name in + let parameters = + IList.map_changed ~equal:phys_equal ~f:ParameterNormalizer.normalize t.parameters + in + if phys_equal class_name t.class_name && phys_equal parameters t.parameters then t + else {class_name; parameters} +end) + +module PairNormalizer = HashNormalizer.Make (struct + type nonrec t = t * bool [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize ((t, b) as pair) = + let t' = TNormalizer.normalize t in + if phys_equal t t' then pair else (t', b) +end) + module Item = struct (** Annotation for one item: a list of annotations with visibility. *) type nonrec t = (t * bool) list [@@deriving compare, equal] @@ -101,6 +173,23 @@ module Item = struct let is_empty ia = List.is_empty ia let is_final ia = List.exists ia ~f:(fun (x, b) -> b && is_final x) + + module Normalizer = struct + include HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize pairs = IList.map_changed pairs ~equal:phys_equal ~f:PairNormalizer.normalize + end) + + let reset () = + reset () ; + PairNormalizer.reset () ; + TNormalizer.reset () ; + ParameterNormalizer.reset () ; + ValueNormalizer.reset () + end end module Class = struct diff --git a/infer/src/IR/Annot.mli b/infer/src/IR/Annot.mli index f085e0497..48caf35f6 100644 --- a/infer/src/IR/Annot.mli +++ b/infer/src/IR/Annot.mli @@ -55,6 +55,8 @@ module Item : sig val is_final : t -> bool (** Check if final annotation is included in. *) + + module Normalizer : HashNormalizer.S with type t = t end module Class : sig diff --git a/infer/src/IR/Fieldname.ml b/infer/src/IR/Fieldname.ml index bf6b0cadf..ae3798253 100644 --- a/infer/src/IR/Fieldname.ml +++ b/infer/src/IR/Fieldname.ml @@ -58,3 +58,16 @@ let is_java_outer_instance ({field_name} as field) = let last_char = field_name.[String.length field_name - 1] in Char.(last_char >= '0' && last_char <= '9') && String.is_suffix field_name ~suffix:(this ^ String.of_char last_char) + + +module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let class_name = Typ.Name.Normalizer.normalize t.class_name in + let field_name = HashNormalizer.StringNormalizer.normalize t.field_name in + if phys_equal class_name t.class_name && phys_equal field_name t.field_name then t + else {class_name; field_name} +end) diff --git a/infer/src/IR/Fieldname.mli b/infer/src/IR/Fieldname.mli index 723a4f769..7be197a40 100644 --- a/infer/src/IR/Fieldname.mli +++ b/infer/src/IR/Fieldname.mli @@ -43,3 +43,5 @@ val to_simplified_string : t -> string val pp : F.formatter -> t -> unit (** Pretty print a field name. *) + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/IR/JavaClassName.ml b/infer/src/IR/JavaClassName.ml index 9ecea661d..826dd866c 100644 --- a/infer/src/IR/JavaClassName.ml +++ b/infer/src/IR/JavaClassName.ml @@ -108,3 +108,18 @@ let is_external_via_config t = let pp_with_verbosity ~verbose fmt t = if verbose then pp fmt t else F.pp_print_string fmt (classname t) + + +module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let classname = HashNormalizer.StringNormalizer.normalize t.classname in + let package = + IOption.map_changed t.package ~equal:phys_equal ~f:HashNormalizer.StringNormalizer.normalize + in + if phys_equal classname t.classname && phys_equal package t.package then t + else {classname; package} +end) diff --git a/infer/src/IR/JavaClassName.mli b/infer/src/IR/JavaClassName.mli index 02aaecd07..8e30cab17 100644 --- a/infer/src/IR/JavaClassName.mli +++ b/infer/src/IR/JavaClassName.mli @@ -52,3 +52,5 @@ val get_user_defined_class_if_anonymous_inner : t -> t option SomeClass$NestedClass$1$17$5. In this example, we should return SomeClass$NestedClass. If this is not an anonymous class, returns [None]. *) + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/IR/Procname.ml b/infer/src/IR/Procname.ml index d85dc17bc..7f113db07 100644 --- a/infer/src/IR/Procname.ml +++ b/infer/src/IR/Procname.ml @@ -20,7 +20,7 @@ module Java = struct | Non_Static (** in Java, procedures called with invokevirtual, invokespecial, and invokeinterface *) | Static (** in Java, procedures called with invokestatic *) - [@@deriving compare, yojson_of] + [@@deriving compare, equal, yojson_of] (** Type of java procedure names. *) type t = @@ -29,7 +29,7 @@ module Java = struct ; class_name: Typ.Name.t ; return_type: Typ.t option (* option because constructors have no return type *) ; kind: kind } - [@@deriving compare, yojson_of] + [@@deriving compare, equal, yojson_of] let ensure_java_type t = if not (Typ.is_java_type t) then @@ -171,6 +171,30 @@ module Java = struct let is_external java_pname = let package = get_package java_pname in Option.exists ~f:Config.java_package_is_external package + + + module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let method_name = HashNormalizer.StringNormalizer.normalize t.method_name in + let parameters = + IList.map_changed t.parameters ~equal:phys_equal ~f:Typ.Normalizer.normalize + in + let class_name = Typ.Name.Normalizer.normalize t.class_name in + let return_type = + IOption.map_changed t.return_type ~equal:phys_equal ~f:Typ.Normalizer.normalize + in + if + phys_equal method_name t.method_name + && phys_equal parameters t.parameters + && phys_equal class_name t.class_name + && phys_equal return_type t.return_type + then t + else {method_name; parameters; class_name; return_type; kind= t.kind} + end) end module Parameter = struct @@ -826,3 +850,23 @@ module UnitCache = struct let cache_set pname value = cache := Some (pname, value) in (cache_get, cache_set) end + +module Normalizer = struct + include HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = hash + + let normalize t = + match t with + | Java java_pname -> + let java_pname' = Java.Normalizer.normalize java_pname in + if phys_equal java_pname java_pname' then t else Java java_pname' + | _ -> + t + end) + + let reset () = + reset () ; + Java.Normalizer.reset () +end diff --git a/infer/src/IR/Procname.mli b/infer/src/IR/Procname.mli index 6ec4198d0..64e09baf2 100644 --- a/infer/src/IR/Procname.mli +++ b/infer/src/IR/Procname.mli @@ -346,3 +346,5 @@ val to_filename : t -> string val get_qualifiers : t -> QualifiedCppName.t (** get qualifiers of C/objc/C++ method/function *) + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/IR/Struct.ml b/infer/src/IR/Struct.ml index acce23968..7afc24c85 100644 --- a/infer/src/IR/Struct.ml +++ b/infer/src/IR/Struct.ml @@ -292,3 +292,83 @@ let is_not_java_interface = function false | _ -> true + + +module FieldNormalizer = HashNormalizer.Make (struct + type t = field [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize f = + let field_name, typ, annot = f in + let field_name' = Fieldname.Normalizer.normalize field_name in + let typ' = Typ.Normalizer.normalize typ in + let annot' = Annot.Item.Normalizer.normalize annot in + if phys_equal field_name field_name' && phys_equal typ typ' && phys_equal annot annot' then f + else (field_name', typ', annot') +end) + +module JavaClassInfoOptNormalizer = HashNormalizer.Make (struct + type t = java_class_info option [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize_location_opt loc_opt = + IOption.map_changed loc_opt ~equal:phys_equal ~f:Location.Normalizer.normalize + + + let normalize_java_class_info java_class_info = + let loc = normalize_location_opt java_class_info.loc in + if phys_equal loc java_class_info.loc then java_class_info else {java_class_info with loc} + + + let normalize java_class_info_opt = + IOption.map_changed java_class_info_opt ~equal:phys_equal ~f:normalize_java_class_info +end) + +module Normalizer = struct + include HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let fields = IList.map_changed ~equal:phys_equal ~f:FieldNormalizer.normalize t.fields in + let statics = IList.map_changed ~equal:phys_equal ~f:FieldNormalizer.normalize t.statics in + let supers = IList.map_changed ~equal:phys_equal ~f:Typ.Name.Normalizer.normalize t.supers in + let objc_protocols = + IList.map_changed ~equal:phys_equal ~f:Typ.Name.Normalizer.normalize t.objc_protocols + in + let methods = + IList.map_changed ~equal:phys_equal ~f:Procname.Normalizer.normalize t.methods + in + let exported_objc_methods = + IList.map_changed ~equal:phys_equal ~f:Procname.Normalizer.normalize t.exported_objc_methods + in + let annots = Annot.Item.Normalizer.normalize t.annots in + let java_class_info = JavaClassInfoOptNormalizer.normalize t.java_class_info in + if + phys_equal fields t.fields && phys_equal statics t.statics && phys_equal supers t.supers + && phys_equal objc_protocols t.objc_protocols + && phys_equal methods t.methods + && phys_equal exported_objc_methods t.exported_objc_methods + && phys_equal annots t.annots + && phys_equal java_class_info t.java_class_info + then t + else + { fields + ; statics + ; supers + ; objc_protocols + ; methods + ; exported_objc_methods + ; annots + ; java_class_info + ; dummy= t.dummy } + end) + + let reset () = + reset () ; + FieldNormalizer.reset () ; + JavaClassInfoOptNormalizer.reset () +end diff --git a/infer/src/IR/Struct.mli b/infer/src/IR/Struct.mli index 0cc963dcf..b4233bcb9 100644 --- a/infer/src/IR/Struct.mli +++ b/infer/src/IR/Struct.mli @@ -77,3 +77,5 @@ val merge : Typ.Name.t -> newer:t -> current:t -> t val is_not_java_interface : t -> bool (** check that a struct either defines a non-java type, or a non-java-interface type (abstract or normal class) *) + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/IR/Tenv.ml b/infer/src/IR/Tenv.ml index 1a1d6bb6e..a911b5718 100644 --- a/infer/src/IR/Tenv.ml +++ b/infer/src/IR/Tenv.ml @@ -12,8 +12,6 @@ module L = Logging (** Hash tables on type names. *) module TypenameHash = Caml.Hashtbl.Make (Typ.Name) -module TypenameHashNormalizer = MaximumSharing.ForHashtbl (TypenameHash) - (** Type for type environment. *) type t = Struct.t TypenameHash.t @@ -171,13 +169,39 @@ let store_to_filename tenv tenv_filename = if Config.debug_mode then store_debug_file tenv tenv_filename +module Normalizer = struct + let normalize tenv = + let new_tenv = TypenameHash.create (TypenameHash.length tenv) in + let normalize_mapping name tstruct = + let name = Typ.Name.Normalizer.normalize name in + let tstruct = Struct.Normalizer.normalize tstruct in + TypenameHash.add new_tenv name tstruct + in + TypenameHash.iter normalize_mapping tenv ; + new_tenv + + + let reset () = + Typ.Normalizer.reset () ; + Typ.Name.Normalizer.reset () ; + Struct.Normalizer.reset () ; + Fieldname.Normalizer.reset () ; + Procname.Normalizer.reset () ; + SourceFile.Normalizer.reset () ; + Location.Normalizer.reset () ; + Annot.Item.Normalizer.reset () ; + JavaClassName.Normalizer.reset () ; + HashNormalizer.StringNormalizer.reset () +end + let store_global tenv = (* update in-memory global tenv for later uses by this process, e.g. in single-core mode the frontend and backend run in the same process *) if Config.debug_level_capture > 0 then L.debug Capture Quiet "Tenv.store: global tenv has size %d bytes.@." (Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ; - let tenv = TypenameHashNormalizer.normalize tenv in + let tenv = Normalizer.normalize tenv in + Normalizer.reset () ; if Config.debug_level_capture > 0 then L.debug Capture Quiet "Tenv.store: canonicalized tenv has size %d bytes.@." (Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ; diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 4d32c2014..1e353d9a5 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -54,9 +54,7 @@ type ikind = | IULongLong (** [unsigned long long] (or [unsigned int64_] on Microsoft Visual C) *) | I128 (** [__int128_t] *) | IU128 (** [__uint128_t] *) -[@@deriving compare, yojson_of] - -let equal_ikind = [%compare.equal: ikind] +[@@deriving compare, equal, yojson_of] let ikind_to_string = function | IChar -> @@ -130,9 +128,7 @@ let ikind_is_char = function IChar | ISChar | IUChar -> true | _ -> false (** Kinds of floating-point numbers *) type fkind = FFloat (** [float] *) | FDouble (** [double] *) | FLongDouble (** [long double] *) -[@@deriving compare, yojson_of] - -let equal_fkind = [%compare.equal: fkind] +[@@deriving compare, equal, yojson_of] let fkind_to_string = function | FFloat -> @@ -150,9 +146,7 @@ type ptr_kind = | Pk_objc_weak (** Obj-C __weak pointer *) | Pk_objc_unsafe_unretained (** Obj-C __unsafe_unretained pointer *) | Pk_objc_autoreleasing (** Obj-C __autoreleasing pointer *) -[@@deriving compare, yojson_of] - -let equal_ptr_kind = [%compare.equal: ptr_kind] +[@@deriving compare, equal, yojson_of] let ptr_kind_string = function | Pk_reference -> @@ -169,7 +163,7 @@ let ptr_kind_string = function module T = struct type type_quals = {is_const: bool; is_restrict: bool; is_volatile: bool} - [@@deriving compare, yojson_of] + [@@deriving compare, equal, yojson_of] (** types for sil (structured) expressions *) type t = {desc: desc; quals: type_quals} @@ -374,8 +368,6 @@ let to_string typ = module Name = struct type t = name [@@deriving compare, equal, yojson_of] - let equal = [%compare.equal: t] - let hash = Hashtbl.hash let qual_name = function @@ -542,6 +534,20 @@ module Name = struct let pp = pp end) + + module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + match t with + | CStruct _ | CUnion _ | CppClass _ | ObjcClass _ | ObjcProtocol _ -> + t + | JavaClass java_class_name -> + let java_class_name' = JavaClassName.Normalizer.normalize java_class_name in + if phys_equal java_class_name java_class_name' then t else JavaClass java_class_name' + end) end (** dump a type with all the details. *) @@ -694,3 +700,52 @@ let pointer_to_java_lang_object = mk_ptr (mk_struct Name.Java.java_lang_object) let pointer_to_java_lang_string = mk_ptr (mk_struct Name.Java.java_lang_string) let pointer_to_objc_nszone = mk_ptr (mk_struct (CStruct (QualifiedCppName.of_qual_string "NSZone"))) + +module TypeQualsNormalizer = HashNormalizer.Make (struct + type t = type_quals [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize = Fn.id +end) + +module rec DescNormalizer : (HashNormalizer.S with type t = desc) = HashNormalizer.Make (struct + type t = desc [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + match t with + | Tint _ | Tfloat _ | Tvoid | Tfun -> + t + | Tstruct name -> + let name' = Name.Normalizer.normalize name in + if phys_equal name name' then t else Tstruct name' + | TVar str_var -> + let str_var' = HashNormalizer.StringNormalizer.normalize str_var in + if phys_equal str_var str_var' then t else TVar str_var' + | Tptr (pointed, ptr_kind) -> + let pointed' = Normalizer.normalize pointed in + if phys_equal pointed pointed' then t else Tptr (pointed', ptr_kind) + | Tarray {elt; length; stride} -> + let elt' = Normalizer.normalize elt in + if phys_equal elt elt' then t else Tarray {elt= elt'; length; stride} +end) + +and Normalizer : (HashNormalizer.S with type t = t) = struct + include HashNormalizer.Make (struct + include T + + let hash = Hashtbl.hash + + let normalize t = + let quals = TypeQualsNormalizer.normalize t.quals in + let desc = DescNormalizer.normalize t.desc in + if phys_equal desc t.desc && phys_equal quals t.quals then t else {desc; quals} + end) + + let reset () = + reset () ; + TypeQualsNormalizer.reset () ; + DescNormalizer.reset () +end diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index 76fd71a49..4d2f7a3c8 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -275,6 +275,8 @@ module Name : sig module Set : PrettyPrintable.PPSet with type elt = t module Map : PrettyPrintable.PPMap with type key = t + + module Normalizer : HashNormalizer.S with type t = t end val equal : t -> t -> bool @@ -355,3 +357,5 @@ val has_block_prefix : string -> bool val unsome : string -> t option -> t type typ = t + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/base/Location.ml b/infer/src/base/Location.ml index ae4b1b0c1..a68236785 100644 --- a/infer/src/base/Location.ml +++ b/infer/src/base/Location.ml @@ -52,3 +52,13 @@ module Map = PrettyPrintable.MakePPMap (struct let pp = pp end) + +module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize t = + let file = SourceFile.Normalizer.normalize t.file in + if phys_equal file t.file then t else {t with file} +end) diff --git a/infer/src/base/Location.mli b/infer/src/base/Location.mli index e2b24d8ea..8d0b70c57 100644 --- a/infer/src/base/Location.mli +++ b/infer/src/base/Location.mli @@ -37,3 +37,5 @@ val pp_file_pos : Format.formatter -> t -> unit val pp_range : Format.formatter -> t * t -> unit module Map : PrettyPrintable.PPMap with type key = t + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/base/SourceFile.ml b/infer/src/base/SourceFile.ml index e3cd888e6..0a8582859 100644 --- a/infer/src/base/SourceFile.ml +++ b/infer/src/base/SourceFile.ml @@ -257,3 +257,31 @@ module SQLite = struct RelativeProjectRootAndWorkspace {workspace_rel_root= prefix; rel_path} else L.die InternalError "Could not deserialize sourcefile with tag=%c, str= %s@." tag str end + +module Normalizer = HashNormalizer.Make (struct + type nonrec t = t [@@deriving equal] + + let hash = Hashtbl.hash + + let normalize fname = + let string_normalize = HashNormalizer.StringNormalizer.normalize in + match fname with + | Invalid {ml_source_file} -> + let ml_source_file' = string_normalize ml_source_file in + if phys_equal ml_source_file ml_source_file' then fname + else Invalid {ml_source_file= ml_source_file'} + | RelativeProjectRootAndWorkspace {workspace_rel_root; rel_path} -> + let workspace_rel_root' = string_normalize workspace_rel_root in + let rel_path' = string_normalize rel_path in + if phys_equal workspace_rel_root workspace_rel_root' && phys_equal rel_path rel_path' then + fname + else + RelativeProjectRootAndWorkspace + {workspace_rel_root= workspace_rel_root'; rel_path= rel_path'} + | RelativeProjectRoot rel_path -> + let rel_path' = string_normalize rel_path in + if phys_equal rel_path rel_path' then fname else RelativeProjectRoot rel_path' + | Absolute path -> + let path' = string_normalize path in + if phys_equal path path' then fname else Absolute path' +end) diff --git a/infer/src/base/SourceFile.mli b/infer/src/base/SourceFile.mli index 51b39b479..11259b08b 100644 --- a/infer/src/base/SourceFile.mli +++ b/infer/src/base/SourceFile.mli @@ -64,3 +64,5 @@ val has_extension : t -> ext:string -> bool (** returns whether the source file has provided extension *) module SQLite : SqliteUtils.Data with type t = t + +module Normalizer : HashNormalizer.S with type t = t diff --git a/infer/src/inferunit.ml b/infer/src/inferunit.ml index be6c7247d..ddb7e1e0e 100644 --- a/infer/src/inferunit.ml +++ b/infer/src/inferunit.ml @@ -39,7 +39,6 @@ let () = ; JavaProfilerSamplesTest.tests ; LivenessTests.tests ; LRUHashtblTests.tests - ; MaximumSharingTests.tests ; ProcCfgTests.tests ; RestartSchedulerTests.tests ; SchedulerTests.tests diff --git a/infer/src/istd/HashNormalizer.ml b/infer/src/istd/HashNormalizer.ml new file mode 100644 index 000000000..04ee86499 --- /dev/null +++ b/infer/src/istd/HashNormalizer.ml @@ -0,0 +1,47 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! Core + +module type NormalizedT = sig + include Caml.Hashtbl.HashedType + + val normalize : t -> t +end + +module type S = sig + type t + + val normalize : t -> t + + val reset : unit -> unit +end + +module Make (T : NormalizedT) = struct + type t = T.t + + module H = Caml.Hashtbl.Make (T) + + let table : t H.t = H.create 11 + + let normalize t = + match H.find_opt table t with + | Some t' -> + t' + | None -> + let normalized = T.normalize t in + H.add table normalized normalized ; + normalized + + + let reset () = H.reset table +end + +module StringNormalizer = Make (struct + include String + + let normalize = Fn.id +end) diff --git a/infer/src/istd/HashNormalizer.mli b/infer/src/istd/HashNormalizer.mli new file mode 100644 index 000000000..5857c8385 --- /dev/null +++ b/infer/src/istd/HashNormalizer.mli @@ -0,0 +1,30 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +(** a hashed type with a normalization function which respects equality *) +module type NormalizedT = sig + include Caml.Hashtbl.HashedType + + val normalize : t -> t +end + +(** normalizer module which uses a hashtable to store normalized representatives *) +module type S = sig + (** type the normalizer works on *) + type t + + val normalize : t -> t + (** return equal normalized representative *) + + val reset : unit -> unit + (** reset underlying hashtable *) +end + +module Make (T : NormalizedT) : S with type t = T.t + +(** normalizer for strings *) +module StringNormalizer : S with type t = string diff --git a/infer/src/istd/IOption.ml b/infer/src/istd/IOption.ml index 1257821e3..9d9f240cc 100644 --- a/infer/src/istd/IOption.ml +++ b/infer/src/istd/IOption.ml @@ -17,6 +17,15 @@ let if_none_eval = value_default_f let exists2 x y ~f = match (x, y) with Some x, Some y -> f x y | _, _ -> false +let map_changed opt ~equal ~f = + match opt with + | None -> + opt + | Some x -> + let x' = f x in + if equal x x' then opt else Some x' + + module Let_syntax = struct include Option.Let_syntax diff --git a/infer/src/istd/IOption.mli b/infer/src/istd/IOption.mli index 4f36f7126..4ba38ca55 100644 --- a/infer/src/istd/IOption.mli +++ b/infer/src/istd/IOption.mli @@ -25,6 +25,9 @@ val if_none_eval : f:(unit -> 'a) -> 'a option -> 'a val exists2 : 'a option -> 'b option -> f:('a -> 'b -> bool) -> bool (** Like [Option.exists] but gets two parameters. *) +val map_changed : 'a option -> equal:('a -> 'a -> bool) -> f:('a -> 'a) -> 'a option +(** Like [Option.map] but maintain physical equality *) + include sig [@@@warning "-32-60"] diff --git a/infer/src/istd/MaximumSharing.ml b/infer/src/istd/MaximumSharing.ml deleted file mode 100644 index b926a1dc3..000000000 --- a/infer/src/istd/MaximumSharing.ml +++ /dev/null @@ -1,287 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd - -exception MaximumSharingLazyValue - -module Hashing : sig - type hash_value = private int - - type state - - val of_int : int -> hash_value - - val shallow : 'a -> hash_value - (** A deterministic hash function that visits O(1) objects, to ensure termination on cyclic - values. *) - - val alloc_of_block : tag:int -> size:int -> state - - val fold_hash_value : state -> hash_value -> state - - val get_hash_value : state -> hash_value -end = struct - type hash_value = Hash.hash_value - - type state = Hash.state - - let of_int = Fn.id - - let shallow = - (* - [hash x] is defined as [seeded_hash_param 10 100 0 x]. - Here we don't care about the specific numbers as long as the function is deterministic. - *) - Caml.Hashtbl.hash - - - let alloc_of_block ~tag ~size = - let state = Hash.alloc () in - let state = Hash.fold_int state tag in - let state = Hash.fold_int state size in - state - - - let fold_hash_value = Hash.fold_int - - let get_hash_value = Hash.get_hash_value -end - -module Sharer : sig - type t - - val create : unit -> t - - val normalize_value : t -> 'a -> 'a -end = struct - let hashed_obj eq = - ( module struct - type t = Hashing.hash_value * Obj.t - - let hash ((h, _) : t) = (h :> int) - - let equal ((h1, o1) : t) ((h2, o2) : t) = Int.equal (h1 :> int) (h2 :> int) && eq o1 o2 - end : Caml.Hashtbl.HashedType - with type t = Hashing.hash_value * Obj.t ) - - - module HashedNoscanBlock = (val hashed_obj Poly.equal) - - module PhysEqualedHashedScannableBlock = (val hashed_obj phys_equal) - - module HashedNormalizedScannableBlock = (val hashed_obj PhysEqual.shallow_equal) - - module HNoscan = Caml.Hashtbl.Make (HashedNoscanBlock) - module HPhysEq = Caml.Hashtbl.Make (PhysEqualedHashedScannableBlock) - module HNorm = Caml.Hashtbl.Make (HashedNormalizedScannableBlock) - - type visited = - | Visiting of {mutable to_patch: (PhysEqualedHashedScannableBlock.t * int) list} - | Normalized of HashedNormalizedScannableBlock.t - - type t = - { noscan_blocks: HashedNoscanBlock.t HNoscan.t - ; visited_blocks: visited HPhysEq.t - ; hash_normalized: HashedNormalizedScannableBlock.t HNorm.t - ; fail_on_forward: bool - ; fail_on_nonstring: bool - ; fail_on_objects: bool } - - let create () = - { noscan_blocks= HNoscan.create 1 - ; visited_blocks= HPhysEq.create 1 - ; hash_normalized= HNorm.create 1 - ; (* these are just for safety because the code hasn't been tested on them, it should work fine though *) - fail_on_forward= true - ; fail_on_nonstring= true - ; fail_on_objects= true } - - - let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o) - - let dummy_should_not_be_hashed_or_used = - (* - Must be different than any block found in values. - Must fail if hashed (there is actually no way to ensure that :( )) - *) - Obj.repr (lazy (assert false)) - - - (* - TODO: be much more efficient and write it in C to be able to use the GC flags to - mark visited values. - *) - let rec hash_and_normalize_obj sharer o parent_shallow_hash_block parent_field_i = - if Obj.is_int o then hash_and_normalize_int o - else hash_and_normalize_block sharer o parent_shallow_hash_block parent_field_i - - - and hash_and_normalize_block sharer block parent_shallow_hash_block parent_field_i = - let shallow_hash = Hashing.shallow block in - let shallow_hash_block = (shallow_hash, block) in - let tag = Obj.tag block in - if tag >= Obj.no_scan_tag then ( - (* - No-scan blocks (strings, int64, closures, weird stuff) are treated separately. - They are hashed and compared using the Stdlib polymorphic functions. - *) - assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ; - match HNoscan.find_opt sharer.noscan_blocks shallow_hash_block with - | Some hash_normalized -> - hash_normalized - | None -> - HNoscan.add sharer.noscan_blocks shallow_hash_block shallow_hash_block ; - shallow_hash_block ) - else if Int.equal tag Obj.lazy_tag then - (* - For now MaximumSharing is used before marshalling. - It makes little sense to marshal lazy values. - Because lazy blocks are normal scannable blocks, this special case could be safely removed. - *) - raise MaximumSharingLazyValue - else - (* - This is where we could win by mutating the value directly. - Instead we need to use a hashtbl using a shallow hash (for termination), which adds a - multiplicative factor to the running time. - *) - match HPhysEq.find_opt sharer.visited_blocks shallow_hash_block with - | Some (Normalized hash_normalized) -> - (* The block has already been visited, we can reuse the result. *) - hash_normalized - | Some (Visiting visiting) -> - (* - The block is being visited, which means we have a cycle. - We record fields to be patched after we have finished treating the cycle. - - For termination we have to return a shallow hash. - We also need to return a phys_equally different value so that it will trigger - copy-on-write on the whole cycle (then patch can safely be applied and in any order), - even though it may not be necessary if the whole cycle and its dependencies could be - kept as-is. - The value that is returned should not be hashed or used. The current implementation - respects it. - *) - visiting.to_patch <- (parent_shallow_hash_block, parent_field_i) :: visiting.to_patch ; - (shallow_hash, dummy_should_not_be_hashed_or_used) - | None -> - let visited = Visiting {to_patch= []} in - let[@warning "-8"] (Visiting visiting) = visited in - HPhysEq.add sharer.visited_blocks shallow_hash_block visited ; - let hash_normalized = - if Int.equal tag Obj.forward_tag then ( - assert (not sharer.fail_on_forward) ; - (* - Forward_tag is an intermediate block resulting from the evaluating of a lazy. - As an optimization, let's replace it directly with the normalization of the result - as if this intermediate block didn't exist. - - This remains untested for now (hence the assertion above). - Not obvious to test as optimizations or the GC can already do the substitution. - *) - hash_and_normalize_obj sharer (Obj.field block 0) parent_shallow_hash_block - parent_field_i ) - else ( - (* For regular blocks, normalize each field then use a shallow comparison. *) - assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ; - let hash_shallow_normalized = - let size = Obj.size block in - hash_and_normalize_block_fields sharer shallow_hash_block block block size 0 - (Hashing.alloc_of_block ~tag ~size) - in - match HNorm.find_opt sharer.hash_normalized hash_shallow_normalized with - | Some hash_normalized -> - hash_normalized - | None -> - HNorm.add sharer.hash_normalized hash_shallow_normalized hash_shallow_normalized ; - hash_shallow_normalized ) - in - let hash_normalized = - match visiting.to_patch with - | [] (* not the head of a cycle *) -> - hash_normalized - | _ :: _ as to_patch -> - (* - The whole cycle has been treated, we now need to patch values that pointed to - this block. We need to look them up in the [visited_blocks] hash table because - they have been duplicated since we recorded them. - *) - let _, normalized = hash_normalized in - List.iter to_patch ~f:(fun (hash_block_to_patch, field_i_to_patch) -> - let normalized_block_to_patch = - if phys_equal hash_block_to_patch shallow_hash_block then - (* Self-cycle, e.g. [let rec x = 1 :: x]. No lookup! *) - normalized - else - let[@warning "-8"] (Normalized (_, normalized_block_to_patch)) = - HPhysEq.find sharer.visited_blocks hash_block_to_patch - in - normalized_block_to_patch - in - Obj.set_field normalized_block_to_patch field_i_to_patch normalized ) ; - (* - For cycle heads, for consistency with the [Visiting] case above we need to - use the shallow hash. - *) - (shallow_hash, normalized) - in - HPhysEq.replace sharer.visited_blocks shallow_hash_block (Normalized hash_normalized) ; - hash_normalized - - - and hash_and_normalize_block_fields sharer original_shallow_hash_block original_block new_block - size field_i hash_state = - if field_i >= size then (Hashing.get_hash_value hash_state, new_block) - else - let field_v = Obj.field original_block field_i in - let field_hash, field_v' = - hash_and_normalize_obj sharer field_v original_shallow_hash_block field_i - in - let hash_state = Hashing.fold_hash_value hash_state field_hash in - let new_block = - if phys_equal field_v field_v' then new_block - else - let new_block = - if phys_equal original_block new_block then (* copy-on-write *) - Obj.dup original_block - else new_block - in - Obj.set_field new_block field_i field_v' ; - new_block - in - (hash_and_normalize_block_fields [@tailcall]) sharer original_shallow_hash_block - original_block new_block size (field_i + 1) hash_state - - - let dummy_should_not_be_patched = - (Hashing.of_int 0, (* Make sure it fails hard if [Obj.set_field] is called on it *) Obj.repr 0) - - - (** Returns a value structurally equal but with potentially more sharing. Potentially unsafe if - used on mutable values that are modified later. Preserves polymorphic compare, hashing, - no-sharing marshalling. May have an impact on code using [phys_equal] or marshalling with - sharing. *) - let normalize_value sharer v = - hash_and_normalize_obj sharer (Obj.repr v) dummy_should_not_be_patched (-1) |> snd |> Obj.obj -end - -module ForHashtbl (H : Caml.Hashtbl.S) = struct - let normalize h = - let sharer = Sharer.create () in - (* If a hash table has been created with [add] and not [replace] only, it is possible to - have several values for a given key. We need to collect them all and reinsert them in - the reverse order. *) - let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in - (* No need to preserve the initial size of the original hash table *) - let h' = H.create (H.length h) in - List.iter rev_bindings ~f:(fun (k, v) -> - let k' = Sharer.normalize_value sharer k in - let v' = Sharer.normalize_value sharer v in - H.add h' k' v' ) ; - h' -end diff --git a/infer/src/istd/MaximumSharing.mli b/infer/src/istd/MaximumSharing.mli deleted file mode 100644 index cb545f62a..000000000 --- a/infer/src/istd/MaximumSharing.mli +++ /dev/null @@ -1,23 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd - -(** Current implementation will stack overflow on deep values (TODO: a tailrec version). *) - -module Sharer : sig - type t - - val create : unit -> t - - val normalize_value : t -> 'a -> 'a -end - -module ForHashtbl (H : Caml.Hashtbl.S) : sig - val normalize : 'a H.t -> 'a H.t - (** Duplicate a hash table with maximum sharing. *) -end diff --git a/infer/src/istd/PhysEqual.mli b/infer/src/istd/PhysEqual.mli index 79821ecca..0f4910c5e 100644 --- a/infer/src/istd/PhysEqual.mli +++ b/infer/src/istd/PhysEqual.mli @@ -7,8 +7,6 @@ open! IStd -val shallow_equal : 'a -> 'a -> bool - (** Helpers function to enforce physical equality. Let suppose [construct/deconstruct] is a 1-level-allocation OCaml construction/deconstruction, diff --git a/infer/src/unit/MaximumSharingTests.ml b/infer/src/unit/MaximumSharingTests.ml deleted file mode 100644 index 1b86f3b3a..000000000 --- a/infer/src/unit/MaximumSharingTests.ml +++ /dev/null @@ -1,79 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd -open OUnit2 - -let inputs = - let a = `A 'a' in - let b = Array.create ~len:10_000 a in - let c = Array.create ~len:1_000 b in - let d = Array.create ~len:1_000 c in - let rec e = 1 :: e in - let rec f = 1 :: 2 :: f in - [ ("unit", Obj.repr (), `PhysEqual) - ; ("same representation", Obj.repr ([42], [|42; 0|]), `MarshalNoSharing_MustBeBetter) - ; ("10K times the same element", Obj.repr b, `PhysEqual) - ; ("1K times 10K times the same element", Obj.repr c, `PhysEqual) - ; ("1K times 1K times 10K times the same element", Obj.repr d, `PhysEqual) - ; ("Self cycle", Obj.repr e, `MarshalWithSharing (* ideally `PhysEqual *)) - ; ("Cyclic value", Obj.repr f, `MarshalWithSharing (* ideally `PhysEqual *)) ] - - -let tests = - let normalize input = - let sharer = MaximumSharing.Sharer.create () in - MaximumSharing.Sharer.normalize_value sharer input - in - let test_one input checks _ = - (* Save this now, in case `MaximumSharing` mutates the [input], even though it shouldn't *) - let serialized_input_with_sharing = Marshal.to_string input [] in - let serialized_input_no_sharing = - match checks with - | `PhysEqual | `MarshalWithSharing -> - "UNUSED" - | `MarshalNoSharing_MustBeBetter -> - (* OOMs for big or cyclic values *) - Marshal.to_string input [Marshal.No_sharing] - in - let reachable_words_input = Obj.reachable_words input in - let normalized = normalize input in - (* - We can't really check [input] hasn't been mutated but its marshalling with sharing - shouldn't have changed - *) - let serialized_input_with_sharing' = Marshal.to_string input [] in - assert_equal serialized_input_with_sharing serialized_input_with_sharing' ; - (* - The whole goal of [MaximumSharing] is to reduce the memory footprint. - Let's make sure this contract is fulfilled. - There is no guarantee the serialized version will be smaller, e.g. - [let x = Some 0 in (x, Array.init 254 (fun i -> Some i), x)] - is smaller with no sharing. - *) - let reachable_words_normalized = Obj.reachable_words normalized in - assert_bool "less reachable words" (reachable_words_normalized <= reachable_words_input) ; - (* Cannot use [assert_equal] because it doesn't shortcut physical equalities *) - match checks with - | `PhysEqual -> - assert_bool "phys_equal" (phys_equal input normalized) - | `MarshalWithSharing -> - let serialized_normalized_with_sharing = Marshal.to_string normalized [] in - assert_equal serialized_input_with_sharing serialized_normalized_with_sharing - | `MarshalNoSharing_MustBeBetter -> - assert_bool "equal" (Poly.equal input normalized) ; - assert_bool "strictly less reachable words" - (reachable_words_normalized < reachable_words_input) ; - (* - In case structural equality and marshalling have slightly different semantics, - let's also make sure the serialized versions are indistinguishable - *) - let serialized_normalized_no_sharing = Marshal.to_string normalized [Marshal.No_sharing] in - assert_equal serialized_input_no_sharing serialized_normalized_no_sharing - in - let tests_ = List.map inputs ~f:(fun (name, input, checks) -> name >:: test_one input checks) in - "MaximumSharing_tests" >::: tests_