[tenv][java] hashcons instead of Obj magic

Summary:
The global type environment is loaded in all analysis workers, so it is best to minimise its memory footprint. `MaxSharing` does a great job at this but
- it is very slow
- it may be involved in observed segfaults, since it uses potentially unsafe operations

This diff replaces `MaxSharing` with a post-facto poor-man's hashcons pass through the parts involved in a java type environment. It is not as efficient as `MaxSharing` at reducing footprint, but it's much faster.

Reviewed By: jberdine

Differential Revision: D24590400

fbshipit-source-id: c37100325
master
Nikos Gorogiannis 4 years ago committed by Facebook GitHub Bot
parent fcc27cf7a2
commit 4fb2fde4af

@ -28,8 +28,6 @@ and value =
| Class of Typ.t
| Annot of t
let equal = [%compare.equal: t]
let volatile = {class_name= "volatile"; parameters= []}
let final = {class_name= "final"; parameters= []}
@ -84,6 +82,80 @@ and pp fmt annotation =
F.fprintf fmt "(%a)" (F.pp_print_list ~pp_sep:comma_sep pp_parameter) annotation.parameters
module rec ValueNormalizer : (HashNormalizer.S with type t = value) = struct
module rec V : (HashNormalizer.NormalizedT with type t = value) = struct
type t = value [@@deriving equal]
let hash = Hashtbl.hash
let normalize value =
match value with
| Str str ->
let str' = HashNormalizer.StringNormalizer.normalize str in
if phys_equal str str' then value else Str str'
| Bool _ ->
value
| Enum {class_typ; value= str_value} ->
let class_typ' = Typ.Normalizer.normalize class_typ in
let str_value' = HashNormalizer.StringNormalizer.normalize str_value in
if phys_equal class_typ class_typ' && phys_equal str_value str_value' then value
else Enum {class_typ= class_typ'; value= str_value'}
| Array list ->
let list' = IList.map_changed list ~equal:phys_equal ~f:N.normalize in
if phys_equal list list' then value else Array list'
| Class typ ->
let typ' = Typ.Normalizer.normalize typ in
if phys_equal typ typ' then value else Class typ'
| Annot t ->
let t' = TNormalizer.normalize t in
if phys_equal t t' then value else Annot t'
end
and N : (HashNormalizer.S with type t = V.t) = HashNormalizer.Make (V)
include N
end
and ParameterNormalizer : (HashNormalizer.S with type t = parameter) = HashNormalizer.Make (struct
type t = parameter [@@deriving equal]
let hash = Hashtbl.hash
let normalize_str_opt str_opt =
IOption.map_changed str_opt ~equal:phys_equal ~f:HashNormalizer.StringNormalizer.normalize
let normalize parameter =
let name = normalize_str_opt parameter.name in
let value = ValueNormalizer.normalize parameter.value in
if phys_equal name parameter.name && phys_equal value parameter.value then parameter
else {name; value}
end)
and TNormalizer : (HashNormalizer.S with type t = t) = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let class_name = HashNormalizer.StringNormalizer.normalize t.class_name in
let parameters =
IList.map_changed ~equal:phys_equal ~f:ParameterNormalizer.normalize t.parameters
in
if phys_equal class_name t.class_name && phys_equal parameters t.parameters then t
else {class_name; parameters}
end)
module PairNormalizer = HashNormalizer.Make (struct
type nonrec t = t * bool [@@deriving equal]
let hash = Hashtbl.hash
let normalize ((t, b) as pair) =
let t' = TNormalizer.normalize t in
if phys_equal t t' then pair else (t', b)
end)
module Item = struct
(** Annotation for one item: a list of annotations with visibility. *)
type nonrec t = (t * bool) list [@@deriving compare, equal]
@ -101,6 +173,23 @@ module Item = struct
let is_empty ia = List.is_empty ia
let is_final ia = List.exists ia ~f:(fun (x, b) -> b && is_final x)
module Normalizer = struct
include HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize pairs = IList.map_changed pairs ~equal:phys_equal ~f:PairNormalizer.normalize
end)
let reset () =
reset () ;
PairNormalizer.reset () ;
TNormalizer.reset () ;
ParameterNormalizer.reset () ;
ValueNormalizer.reset ()
end
end
module Class = struct

@ -55,6 +55,8 @@ module Item : sig
val is_final : t -> bool
(** Check if final annotation is included in. *)
module Normalizer : HashNormalizer.S with type t = t
end
module Class : sig

@ -58,3 +58,16 @@ let is_java_outer_instance ({field_name} as field) =
let last_char = field_name.[String.length field_name - 1] in
Char.(last_char >= '0' && last_char <= '9')
&& String.is_suffix field_name ~suffix:(this ^ String.of_char last_char)
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let class_name = Typ.Name.Normalizer.normalize t.class_name in
let field_name = HashNormalizer.StringNormalizer.normalize t.field_name in
if phys_equal class_name t.class_name && phys_equal field_name t.field_name then t
else {class_name; field_name}
end)

@ -43,3 +43,5 @@ val to_simplified_string : t -> string
val pp : F.formatter -> t -> unit
(** Pretty print a field name. *)
module Normalizer : HashNormalizer.S with type t = t

@ -108,3 +108,18 @@ let is_external_via_config t =
let pp_with_verbosity ~verbose fmt t =
if verbose then pp fmt t else F.pp_print_string fmt (classname t)
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let classname = HashNormalizer.StringNormalizer.normalize t.classname in
let package =
IOption.map_changed t.package ~equal:phys_equal ~f:HashNormalizer.StringNormalizer.normalize
in
if phys_equal classname t.classname && phys_equal package t.package then t
else {classname; package}
end)

@ -52,3 +52,5 @@ val get_user_defined_class_if_anonymous_inner : t -> t option
SomeClass$NestedClass$1$17$5. In this example, we should return SomeClass$NestedClass.
If this is not an anonymous class, returns [None]. *)
module Normalizer : HashNormalizer.S with type t = t

@ -20,7 +20,7 @@ module Java = struct
| Non_Static
(** in Java, procedures called with invokevirtual, invokespecial, and invokeinterface *)
| Static (** in Java, procedures called with invokestatic *)
[@@deriving compare, yojson_of]
[@@deriving compare, equal, yojson_of]
(** Type of java procedure names. *)
type t =
@ -29,7 +29,7 @@ module Java = struct
; class_name: Typ.Name.t
; return_type: Typ.t option (* option because constructors have no return type *)
; kind: kind }
[@@deriving compare, yojson_of]
[@@deriving compare, equal, yojson_of]
let ensure_java_type t =
if not (Typ.is_java_type t) then
@ -171,6 +171,30 @@ module Java = struct
let is_external java_pname =
let package = get_package java_pname in
Option.exists ~f:Config.java_package_is_external package
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let method_name = HashNormalizer.StringNormalizer.normalize t.method_name in
let parameters =
IList.map_changed t.parameters ~equal:phys_equal ~f:Typ.Normalizer.normalize
in
let class_name = Typ.Name.Normalizer.normalize t.class_name in
let return_type =
IOption.map_changed t.return_type ~equal:phys_equal ~f:Typ.Normalizer.normalize
in
if
phys_equal method_name t.method_name
&& phys_equal parameters t.parameters
&& phys_equal class_name t.class_name
&& phys_equal return_type t.return_type
then t
else {method_name; parameters; class_name; return_type; kind= t.kind}
end)
end
module Parameter = struct
@ -826,3 +850,23 @@ module UnitCache = struct
let cache_set pname value = cache := Some (pname, value) in
(cache_get, cache_set)
end
module Normalizer = struct
include HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = hash
let normalize t =
match t with
| Java java_pname ->
let java_pname' = Java.Normalizer.normalize java_pname in
if phys_equal java_pname java_pname' then t else Java java_pname'
| _ ->
t
end)
let reset () =
reset () ;
Java.Normalizer.reset ()
end

@ -346,3 +346,5 @@ val to_filename : t -> string
val get_qualifiers : t -> QualifiedCppName.t
(** get qualifiers of C/objc/C++ method/function *)
module Normalizer : HashNormalizer.S with type t = t

@ -292,3 +292,83 @@ let is_not_java_interface = function
false
| _ ->
true
module FieldNormalizer = HashNormalizer.Make (struct
type t = field [@@deriving equal]
let hash = Hashtbl.hash
let normalize f =
let field_name, typ, annot = f in
let field_name' = Fieldname.Normalizer.normalize field_name in
let typ' = Typ.Normalizer.normalize typ in
let annot' = Annot.Item.Normalizer.normalize annot in
if phys_equal field_name field_name' && phys_equal typ typ' && phys_equal annot annot' then f
else (field_name', typ', annot')
end)
module JavaClassInfoOptNormalizer = HashNormalizer.Make (struct
type t = java_class_info option [@@deriving equal]
let hash = Hashtbl.hash
let normalize_location_opt loc_opt =
IOption.map_changed loc_opt ~equal:phys_equal ~f:Location.Normalizer.normalize
let normalize_java_class_info java_class_info =
let loc = normalize_location_opt java_class_info.loc in
if phys_equal loc java_class_info.loc then java_class_info else {java_class_info with loc}
let normalize java_class_info_opt =
IOption.map_changed java_class_info_opt ~equal:phys_equal ~f:normalize_java_class_info
end)
module Normalizer = struct
include HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let fields = IList.map_changed ~equal:phys_equal ~f:FieldNormalizer.normalize t.fields in
let statics = IList.map_changed ~equal:phys_equal ~f:FieldNormalizer.normalize t.statics in
let supers = IList.map_changed ~equal:phys_equal ~f:Typ.Name.Normalizer.normalize t.supers in
let objc_protocols =
IList.map_changed ~equal:phys_equal ~f:Typ.Name.Normalizer.normalize t.objc_protocols
in
let methods =
IList.map_changed ~equal:phys_equal ~f:Procname.Normalizer.normalize t.methods
in
let exported_objc_methods =
IList.map_changed ~equal:phys_equal ~f:Procname.Normalizer.normalize t.exported_objc_methods
in
let annots = Annot.Item.Normalizer.normalize t.annots in
let java_class_info = JavaClassInfoOptNormalizer.normalize t.java_class_info in
if
phys_equal fields t.fields && phys_equal statics t.statics && phys_equal supers t.supers
&& phys_equal objc_protocols t.objc_protocols
&& phys_equal methods t.methods
&& phys_equal exported_objc_methods t.exported_objc_methods
&& phys_equal annots t.annots
&& phys_equal java_class_info t.java_class_info
then t
else
{ fields
; statics
; supers
; objc_protocols
; methods
; exported_objc_methods
; annots
; java_class_info
; dummy= t.dummy }
end)
let reset () =
reset () ;
FieldNormalizer.reset () ;
JavaClassInfoOptNormalizer.reset ()
end

@ -77,3 +77,5 @@ val merge : Typ.Name.t -> newer:t -> current:t -> t
val is_not_java_interface : t -> bool
(** check that a struct either defines a non-java type, or a non-java-interface type (abstract or
normal class) *)
module Normalizer : HashNormalizer.S with type t = t

@ -12,8 +12,6 @@ module L = Logging
(** Hash tables on type names. *)
module TypenameHash = Caml.Hashtbl.Make (Typ.Name)
module TypenameHashNormalizer = MaximumSharing.ForHashtbl (TypenameHash)
(** Type for type environment. *)
type t = Struct.t TypenameHash.t
@ -171,13 +169,39 @@ let store_to_filename tenv tenv_filename =
if Config.debug_mode then store_debug_file tenv tenv_filename
module Normalizer = struct
let normalize tenv =
let new_tenv = TypenameHash.create (TypenameHash.length tenv) in
let normalize_mapping name tstruct =
let name = Typ.Name.Normalizer.normalize name in
let tstruct = Struct.Normalizer.normalize tstruct in
TypenameHash.add new_tenv name tstruct
in
TypenameHash.iter normalize_mapping tenv ;
new_tenv
let reset () =
Typ.Normalizer.reset () ;
Typ.Name.Normalizer.reset () ;
Struct.Normalizer.reset () ;
Fieldname.Normalizer.reset () ;
Procname.Normalizer.reset () ;
SourceFile.Normalizer.reset () ;
Location.Normalizer.reset () ;
Annot.Item.Normalizer.reset () ;
JavaClassName.Normalizer.reset () ;
HashNormalizer.StringNormalizer.reset ()
end
let store_global tenv =
(* update in-memory global tenv for later uses by this process, e.g. in single-core mode the
frontend and backend run in the same process *)
if Config.debug_level_capture > 0 then
L.debug Capture Quiet "Tenv.store: global tenv has size %d bytes.@."
(Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ;
let tenv = TypenameHashNormalizer.normalize tenv in
let tenv = Normalizer.normalize tenv in
Normalizer.reset () ;
if Config.debug_level_capture > 0 then
L.debug Capture Quiet "Tenv.store: canonicalized tenv has size %d bytes.@."
(Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ;

@ -54,9 +54,7 @@ type ikind =
| IULongLong (** [unsigned long long] (or [unsigned int64_] on Microsoft Visual C) *)
| I128 (** [__int128_t] *)
| IU128 (** [__uint128_t] *)
[@@deriving compare, yojson_of]
let equal_ikind = [%compare.equal: ikind]
[@@deriving compare, equal, yojson_of]
let ikind_to_string = function
| IChar ->
@ -130,9 +128,7 @@ let ikind_is_char = function IChar | ISChar | IUChar -> true | _ -> false
(** Kinds of floating-point numbers *)
type fkind = FFloat (** [float] *) | FDouble (** [double] *) | FLongDouble (** [long double] *)
[@@deriving compare, yojson_of]
let equal_fkind = [%compare.equal: fkind]
[@@deriving compare, equal, yojson_of]
let fkind_to_string = function
| FFloat ->
@ -150,9 +146,7 @@ type ptr_kind =
| Pk_objc_weak (** Obj-C __weak pointer *)
| Pk_objc_unsafe_unretained (** Obj-C __unsafe_unretained pointer *)
| Pk_objc_autoreleasing (** Obj-C __autoreleasing pointer *)
[@@deriving compare, yojson_of]
let equal_ptr_kind = [%compare.equal: ptr_kind]
[@@deriving compare, equal, yojson_of]
let ptr_kind_string = function
| Pk_reference ->
@ -169,7 +163,7 @@ let ptr_kind_string = function
module T = struct
type type_quals = {is_const: bool; is_restrict: bool; is_volatile: bool}
[@@deriving compare, yojson_of]
[@@deriving compare, equal, yojson_of]
(** types for sil (structured) expressions *)
type t = {desc: desc; quals: type_quals}
@ -374,8 +368,6 @@ let to_string typ =
module Name = struct
type t = name [@@deriving compare, equal, yojson_of]
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
let qual_name = function
@ -542,6 +534,20 @@ module Name = struct
let pp = pp
end)
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
match t with
| CStruct _ | CUnion _ | CppClass _ | ObjcClass _ | ObjcProtocol _ ->
t
| JavaClass java_class_name ->
let java_class_name' = JavaClassName.Normalizer.normalize java_class_name in
if phys_equal java_class_name java_class_name' then t else JavaClass java_class_name'
end)
end
(** dump a type with all the details. *)
@ -694,3 +700,52 @@ let pointer_to_java_lang_object = mk_ptr (mk_struct Name.Java.java_lang_object)
let pointer_to_java_lang_string = mk_ptr (mk_struct Name.Java.java_lang_string)
let pointer_to_objc_nszone = mk_ptr (mk_struct (CStruct (QualifiedCppName.of_qual_string "NSZone")))
module TypeQualsNormalizer = HashNormalizer.Make (struct
type t = type_quals [@@deriving equal]
let hash = Hashtbl.hash
let normalize = Fn.id
end)
module rec DescNormalizer : (HashNormalizer.S with type t = desc) = HashNormalizer.Make (struct
type t = desc [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
match t with
| Tint _ | Tfloat _ | Tvoid | Tfun ->
t
| Tstruct name ->
let name' = Name.Normalizer.normalize name in
if phys_equal name name' then t else Tstruct name'
| TVar str_var ->
let str_var' = HashNormalizer.StringNormalizer.normalize str_var in
if phys_equal str_var str_var' then t else TVar str_var'
| Tptr (pointed, ptr_kind) ->
let pointed' = Normalizer.normalize pointed in
if phys_equal pointed pointed' then t else Tptr (pointed', ptr_kind)
| Tarray {elt; length; stride} ->
let elt' = Normalizer.normalize elt in
if phys_equal elt elt' then t else Tarray {elt= elt'; length; stride}
end)
and Normalizer : (HashNormalizer.S with type t = t) = struct
include HashNormalizer.Make (struct
include T
let hash = Hashtbl.hash
let normalize t =
let quals = TypeQualsNormalizer.normalize t.quals in
let desc = DescNormalizer.normalize t.desc in
if phys_equal desc t.desc && phys_equal quals t.quals then t else {desc; quals}
end)
let reset () =
reset () ;
TypeQualsNormalizer.reset () ;
DescNormalizer.reset ()
end

@ -275,6 +275,8 @@ module Name : sig
module Set : PrettyPrintable.PPSet with type elt = t
module Map : PrettyPrintable.PPMap with type key = t
module Normalizer : HashNormalizer.S with type t = t
end
val equal : t -> t -> bool
@ -355,3 +357,5 @@ val has_block_prefix : string -> bool
val unsome : string -> t option -> t
type typ = t
module Normalizer : HashNormalizer.S with type t = t

@ -52,3 +52,13 @@ module Map = PrettyPrintable.MakePPMap (struct
let pp = pp
end)
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize t =
let file = SourceFile.Normalizer.normalize t.file in
if phys_equal file t.file then t else {t with file}
end)

@ -37,3 +37,5 @@ val pp_file_pos : Format.formatter -> t -> unit
val pp_range : Format.formatter -> t * t -> unit
module Map : PrettyPrintable.PPMap with type key = t
module Normalizer : HashNormalizer.S with type t = t

@ -257,3 +257,31 @@ module SQLite = struct
RelativeProjectRootAndWorkspace {workspace_rel_root= prefix; rel_path}
else L.die InternalError "Could not deserialize sourcefile with tag=%c, str= %s@." tag str
end
module Normalizer = HashNormalizer.Make (struct
type nonrec t = t [@@deriving equal]
let hash = Hashtbl.hash
let normalize fname =
let string_normalize = HashNormalizer.StringNormalizer.normalize in
match fname with
| Invalid {ml_source_file} ->
let ml_source_file' = string_normalize ml_source_file in
if phys_equal ml_source_file ml_source_file' then fname
else Invalid {ml_source_file= ml_source_file'}
| RelativeProjectRootAndWorkspace {workspace_rel_root; rel_path} ->
let workspace_rel_root' = string_normalize workspace_rel_root in
let rel_path' = string_normalize rel_path in
if phys_equal workspace_rel_root workspace_rel_root' && phys_equal rel_path rel_path' then
fname
else
RelativeProjectRootAndWorkspace
{workspace_rel_root= workspace_rel_root'; rel_path= rel_path'}
| RelativeProjectRoot rel_path ->
let rel_path' = string_normalize rel_path in
if phys_equal rel_path rel_path' then fname else RelativeProjectRoot rel_path'
| Absolute path ->
let path' = string_normalize path in
if phys_equal path path' then fname else Absolute path'
end)

@ -64,3 +64,5 @@ val has_extension : t -> ext:string -> bool
(** returns whether the source file has provided extension *)
module SQLite : SqliteUtils.Data with type t = t
module Normalizer : HashNormalizer.S with type t = t

@ -39,7 +39,6 @@ let () =
; JavaProfilerSamplesTest.tests
; LivenessTests.tests
; LRUHashtblTests.tests
; MaximumSharingTests.tests
; ProcCfgTests.tests
; RestartSchedulerTests.tests
; SchedulerTests.tests

@ -0,0 +1,47 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! Core
module type NormalizedT = sig
include Caml.Hashtbl.HashedType
val normalize : t -> t
end
module type S = sig
type t
val normalize : t -> t
val reset : unit -> unit
end
module Make (T : NormalizedT) = struct
type t = T.t
module H = Caml.Hashtbl.Make (T)
let table : t H.t = H.create 11
let normalize t =
match H.find_opt table t with
| Some t' ->
t'
| None ->
let normalized = T.normalize t in
H.add table normalized normalized ;
normalized
let reset () = H.reset table
end
module StringNormalizer = Make (struct
include String
let normalize = Fn.id
end)

@ -0,0 +1,30 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(** a hashed type with a normalization function which respects equality *)
module type NormalizedT = sig
include Caml.Hashtbl.HashedType
val normalize : t -> t
end
(** normalizer module which uses a hashtable to store normalized representatives *)
module type S = sig
(** type the normalizer works on *)
type t
val normalize : t -> t
(** return equal normalized representative *)
val reset : unit -> unit
(** reset underlying hashtable *)
end
module Make (T : NormalizedT) : S with type t = T.t
(** normalizer for strings *)
module StringNormalizer : S with type t = string

@ -17,6 +17,15 @@ let if_none_eval = value_default_f
let exists2 x y ~f = match (x, y) with Some x, Some y -> f x y | _, _ -> false
let map_changed opt ~equal ~f =
match opt with
| None ->
opt
| Some x ->
let x' = f x in
if equal x x' then opt else Some x'
module Let_syntax = struct
include Option.Let_syntax

@ -25,6 +25,9 @@ val if_none_eval : f:(unit -> 'a) -> 'a option -> 'a
val exists2 : 'a option -> 'b option -> f:('a -> 'b -> bool) -> bool
(** Like [Option.exists] but gets two parameters. *)
val map_changed : 'a option -> equal:('a -> 'a -> bool) -> f:('a -> 'a) -> 'a option
(** Like [Option.map] but maintain physical equality *)
include sig
[@@@warning "-32-60"]

@ -1,287 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
exception MaximumSharingLazyValue
module Hashing : sig
type hash_value = private int
type state
val of_int : int -> hash_value
val shallow : 'a -> hash_value
(** A deterministic hash function that visits O(1) objects, to ensure termination on cyclic
values. *)
val alloc_of_block : tag:int -> size:int -> state
val fold_hash_value : state -> hash_value -> state
val get_hash_value : state -> hash_value
end = struct
type hash_value = Hash.hash_value
type state = Hash.state
let of_int = Fn.id
let shallow =
(*
[hash x] is defined as [seeded_hash_param 10 100 0 x].
Here we don't care about the specific numbers as long as the function is deterministic.
*)
Caml.Hashtbl.hash
let alloc_of_block ~tag ~size =
let state = Hash.alloc () in
let state = Hash.fold_int state tag in
let state = Hash.fold_int state size in
state
let fold_hash_value = Hash.fold_int
let get_hash_value = Hash.get_hash_value
end
module Sharer : sig
type t
val create : unit -> t
val normalize_value : t -> 'a -> 'a
end = struct
let hashed_obj eq =
( module struct
type t = Hashing.hash_value * Obj.t
let hash ((h, _) : t) = (h :> int)
let equal ((h1, o1) : t) ((h2, o2) : t) = Int.equal (h1 :> int) (h2 :> int) && eq o1 o2
end : Caml.Hashtbl.HashedType
with type t = Hashing.hash_value * Obj.t )
module HashedNoscanBlock = (val hashed_obj Poly.equal)
module PhysEqualedHashedScannableBlock = (val hashed_obj phys_equal)
module HashedNormalizedScannableBlock = (val hashed_obj PhysEqual.shallow_equal)
module HNoscan = Caml.Hashtbl.Make (HashedNoscanBlock)
module HPhysEq = Caml.Hashtbl.Make (PhysEqualedHashedScannableBlock)
module HNorm = Caml.Hashtbl.Make (HashedNormalizedScannableBlock)
type visited =
| Visiting of {mutable to_patch: (PhysEqualedHashedScannableBlock.t * int) list}
| Normalized of HashedNormalizedScannableBlock.t
type t =
{ noscan_blocks: HashedNoscanBlock.t HNoscan.t
; visited_blocks: visited HPhysEq.t
; hash_normalized: HashedNormalizedScannableBlock.t HNorm.t
; fail_on_forward: bool
; fail_on_nonstring: bool
; fail_on_objects: bool }
let create () =
{ noscan_blocks= HNoscan.create 1
; visited_blocks= HPhysEq.create 1
; hash_normalized= HNorm.create 1
; (* these are just for safety because the code hasn't been tested on them, it should work fine though *)
fail_on_forward= true
; fail_on_nonstring= true
; fail_on_objects= true }
let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o)
let dummy_should_not_be_hashed_or_used =
(*
Must be different than any block found in values.
Must fail if hashed (there is actually no way to ensure that :( ))
*)
Obj.repr (lazy (assert false))
(*
TODO: be much more efficient and write it in C to be able to use the GC flags to
mark visited values.
*)
let rec hash_and_normalize_obj sharer o parent_shallow_hash_block parent_field_i =
if Obj.is_int o then hash_and_normalize_int o
else hash_and_normalize_block sharer o parent_shallow_hash_block parent_field_i
and hash_and_normalize_block sharer block parent_shallow_hash_block parent_field_i =
let shallow_hash = Hashing.shallow block in
let shallow_hash_block = (shallow_hash, block) in
let tag = Obj.tag block in
if tag >= Obj.no_scan_tag then (
(*
No-scan blocks (strings, int64, closures, weird stuff) are treated separately.
They are hashed and compared using the Stdlib polymorphic functions.
*)
assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ;
match HNoscan.find_opt sharer.noscan_blocks shallow_hash_block with
| Some hash_normalized ->
hash_normalized
| None ->
HNoscan.add sharer.noscan_blocks shallow_hash_block shallow_hash_block ;
shallow_hash_block )
else if Int.equal tag Obj.lazy_tag then
(*
For now MaximumSharing is used before marshalling.
It makes little sense to marshal lazy values.
Because lazy blocks are normal scannable blocks, this special case could be safely removed.
*)
raise MaximumSharingLazyValue
else
(*
This is where we could win by mutating the value directly.
Instead we need to use a hashtbl using a shallow hash (for termination), which adds a
multiplicative factor to the running time.
*)
match HPhysEq.find_opt sharer.visited_blocks shallow_hash_block with
| Some (Normalized hash_normalized) ->
(* The block has already been visited, we can reuse the result. *)
hash_normalized
| Some (Visiting visiting) ->
(*
The block is being visited, which means we have a cycle.
We record fields to be patched after we have finished treating the cycle.
For termination we have to return a shallow hash.
We also need to return a phys_equally different value so that it will trigger
copy-on-write on the whole cycle (then patch can safely be applied and in any order),
even though it may not be necessary if the whole cycle and its dependencies could be
kept as-is.
The value that is returned should not be hashed or used. The current implementation
respects it.
*)
visiting.to_patch <- (parent_shallow_hash_block, parent_field_i) :: visiting.to_patch ;
(shallow_hash, dummy_should_not_be_hashed_or_used)
| None ->
let visited = Visiting {to_patch= []} in
let[@warning "-8"] (Visiting visiting) = visited in
HPhysEq.add sharer.visited_blocks shallow_hash_block visited ;
let hash_normalized =
if Int.equal tag Obj.forward_tag then (
assert (not sharer.fail_on_forward) ;
(*
Forward_tag is an intermediate block resulting from the evaluating of a lazy.
As an optimization, let's replace it directly with the normalization of the result
as if this intermediate block didn't exist.
This remains untested for now (hence the assertion above).
Not obvious to test as optimizations or the GC can already do the substitution.
*)
hash_and_normalize_obj sharer (Obj.field block 0) parent_shallow_hash_block
parent_field_i )
else (
(* For regular blocks, normalize each field then use a shallow comparison. *)
assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ;
let hash_shallow_normalized =
let size = Obj.size block in
hash_and_normalize_block_fields sharer shallow_hash_block block block size 0
(Hashing.alloc_of_block ~tag ~size)
in
match HNorm.find_opt sharer.hash_normalized hash_shallow_normalized with
| Some hash_normalized ->
hash_normalized
| None ->
HNorm.add sharer.hash_normalized hash_shallow_normalized hash_shallow_normalized ;
hash_shallow_normalized )
in
let hash_normalized =
match visiting.to_patch with
| [] (* not the head of a cycle *) ->
hash_normalized
| _ :: _ as to_patch ->
(*
The whole cycle has been treated, we now need to patch values that pointed to
this block. We need to look them up in the [visited_blocks] hash table because
they have been duplicated since we recorded them.
*)
let _, normalized = hash_normalized in
List.iter to_patch ~f:(fun (hash_block_to_patch, field_i_to_patch) ->
let normalized_block_to_patch =
if phys_equal hash_block_to_patch shallow_hash_block then
(* Self-cycle, e.g. [let rec x = 1 :: x]. No lookup! *)
normalized
else
let[@warning "-8"] (Normalized (_, normalized_block_to_patch)) =
HPhysEq.find sharer.visited_blocks hash_block_to_patch
in
normalized_block_to_patch
in
Obj.set_field normalized_block_to_patch field_i_to_patch normalized ) ;
(*
For cycle heads, for consistency with the [Visiting] case above we need to
use the shallow hash.
*)
(shallow_hash, normalized)
in
HPhysEq.replace sharer.visited_blocks shallow_hash_block (Normalized hash_normalized) ;
hash_normalized
and hash_and_normalize_block_fields sharer original_shallow_hash_block original_block new_block
size field_i hash_state =
if field_i >= size then (Hashing.get_hash_value hash_state, new_block)
else
let field_v = Obj.field original_block field_i in
let field_hash, field_v' =
hash_and_normalize_obj sharer field_v original_shallow_hash_block field_i
in
let hash_state = Hashing.fold_hash_value hash_state field_hash in
let new_block =
if phys_equal field_v field_v' then new_block
else
let new_block =
if phys_equal original_block new_block then (* copy-on-write *)
Obj.dup original_block
else new_block
in
Obj.set_field new_block field_i field_v' ;
new_block
in
(hash_and_normalize_block_fields [@tailcall]) sharer original_shallow_hash_block
original_block new_block size (field_i + 1) hash_state
let dummy_should_not_be_patched =
(Hashing.of_int 0, (* Make sure it fails hard if [Obj.set_field] is called on it *) Obj.repr 0)
(** Returns a value structurally equal but with potentially more sharing. Potentially unsafe if
used on mutable values that are modified later. Preserves polymorphic compare, hashing,
no-sharing marshalling. May have an impact on code using [phys_equal] or marshalling with
sharing. *)
let normalize_value sharer v =
hash_and_normalize_obj sharer (Obj.repr v) dummy_should_not_be_patched (-1) |> snd |> Obj.obj
end
module ForHashtbl (H : Caml.Hashtbl.S) = struct
let normalize h =
let sharer = Sharer.create () in
(* If a hash table has been created with [add] and not [replace] only, it is possible to
have several values for a given key. We need to collect them all and reinsert them in
the reverse order. *)
let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in
(* No need to preserve the initial size of the original hash table *)
let h' = H.create (H.length h) in
List.iter rev_bindings ~f:(fun (k, v) ->
let k' = Sharer.normalize_value sharer k in
let v' = Sharer.normalize_value sharer v in
H.add h' k' v' ) ;
h'
end

@ -1,23 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
(** Current implementation will stack overflow on deep values (TODO: a tailrec version). *)
module Sharer : sig
type t
val create : unit -> t
val normalize_value : t -> 'a -> 'a
end
module ForHashtbl (H : Caml.Hashtbl.S) : sig
val normalize : 'a H.t -> 'a H.t
(** Duplicate a hash table with maximum sharing. *)
end

@ -7,8 +7,6 @@
open! IStd
val shallow_equal : 'a -> 'a -> bool
(** Helpers function to enforce physical equality.
Let suppose [construct/deconstruct] is a 1-level-allocation OCaml construction/deconstruction,

@ -1,79 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
open OUnit2
let inputs =
let a = `A 'a' in
let b = Array.create ~len:10_000 a in
let c = Array.create ~len:1_000 b in
let d = Array.create ~len:1_000 c in
let rec e = 1 :: e in
let rec f = 1 :: 2 :: f in
[ ("unit", Obj.repr (), `PhysEqual)
; ("same representation", Obj.repr ([42], [|42; 0|]), `MarshalNoSharing_MustBeBetter)
; ("10K times the same element", Obj.repr b, `PhysEqual)
; ("1K times 10K times the same element", Obj.repr c, `PhysEqual)
; ("1K times 1K times 10K times the same element", Obj.repr d, `PhysEqual)
; ("Self cycle", Obj.repr e, `MarshalWithSharing (* ideally `PhysEqual *))
; ("Cyclic value", Obj.repr f, `MarshalWithSharing (* ideally `PhysEqual *)) ]
let tests =
let normalize input =
let sharer = MaximumSharing.Sharer.create () in
MaximumSharing.Sharer.normalize_value sharer input
in
let test_one input checks _ =
(* Save this now, in case `MaximumSharing` mutates the [input], even though it shouldn't *)
let serialized_input_with_sharing = Marshal.to_string input [] in
let serialized_input_no_sharing =
match checks with
| `PhysEqual | `MarshalWithSharing ->
"UNUSED"
| `MarshalNoSharing_MustBeBetter ->
(* OOMs for big or cyclic values *)
Marshal.to_string input [Marshal.No_sharing]
in
let reachable_words_input = Obj.reachable_words input in
let normalized = normalize input in
(*
We can't really check [input] hasn't been mutated but its marshalling with sharing
shouldn't have changed
*)
let serialized_input_with_sharing' = Marshal.to_string input [] in
assert_equal serialized_input_with_sharing serialized_input_with_sharing' ;
(*
The whole goal of [MaximumSharing] is to reduce the memory footprint.
Let's make sure this contract is fulfilled.
There is no guarantee the serialized version will be smaller, e.g.
[let x = Some 0 in (x, Array.init 254 (fun i -> Some i), x)]
is smaller with no sharing.
*)
let reachable_words_normalized = Obj.reachable_words normalized in
assert_bool "less reachable words" (reachable_words_normalized <= reachable_words_input) ;
(* Cannot use [assert_equal] because it doesn't shortcut physical equalities *)
match checks with
| `PhysEqual ->
assert_bool "phys_equal" (phys_equal input normalized)
| `MarshalWithSharing ->
let serialized_normalized_with_sharing = Marshal.to_string normalized [] in
assert_equal serialized_input_with_sharing serialized_normalized_with_sharing
| `MarshalNoSharing_MustBeBetter ->
assert_bool "equal" (Poly.equal input normalized) ;
assert_bool "strictly less reachable words"
(reachable_words_normalized < reachable_words_input) ;
(*
In case structural equality and marshalling have slightly different semantics,
let's also make sure the serialized versions are indistinguishable
*)
let serialized_normalized_no_sharing = Marshal.to_string normalized [Marshal.No_sharing] in
assert_equal serialized_input_no_sharing serialized_normalized_no_sharing
in
let tests_ = List.map inputs ~f:(fun (name, input, checks) -> name >:: test_one input checks) in
"MaximumSharing_tests" >::: tests_
Loading…
Cancel
Save