|
|
|
(*
|
|
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
|
|
*
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*)
|
|
|
|
|
|
|
|
open! IStd
|
|
|
|
|
|
|
|
exception MaximumSharingLazyValue
|
|
|
|
|
|
|
|
module Hashing : sig
|
|
|
|
type hash_value = private int
|
|
|
|
|
|
|
|
type state
|
|
|
|
|
|
|
|
val of_int : int -> hash_value
|
|
|
|
|
|
|
|
val shallow : 'a -> hash_value
|
|
|
|
(** A deterministic hash function that visits O(1) objects, to ensure termination on cyclic
|
|
|
|
values. *)
|
|
|
|
|
|
|
|
val alloc_of_block : tag:int -> size:int -> state
|
|
|
|
|
|
|
|
val fold_hash_value : state -> hash_value -> state
|
|
|
|
|
|
|
|
val get_hash_value : state -> hash_value
|
|
|
|
end = struct
|
|
|
|
type hash_value = Hash.hash_value
|
|
|
|
|
|
|
|
type state = Hash.state
|
|
|
|
|
|
|
|
let of_int = Fn.id
|
|
|
|
|
|
|
|
let shallow =
|
|
|
|
(*
|
|
|
|
[hash x] is defined as [seeded_hash_param 10 100 0 x].
|
|
|
|
Here we don't care about the specific numbers as long as the function is deterministic.
|
|
|
|
*)
|
|
|
|
Caml.Hashtbl.hash
|
|
|
|
|
|
|
|
|
|
|
|
let alloc_of_block ~tag ~size =
|
|
|
|
let state = Hash.alloc () in
|
|
|
|
let state = Hash.fold_int state tag in
|
|
|
|
let state = Hash.fold_int state size in
|
|
|
|
state
|
|
|
|
|
|
|
|
|
|
|
|
let fold_hash_value = Hash.fold_int
|
|
|
|
|
|
|
|
let get_hash_value = Hash.get_hash_value
|
|
|
|
end
|
|
|
|
|
|
|
|
module Sharer : sig
|
|
|
|
type t
|
|
|
|
|
|
|
|
val create : unit -> t
|
|
|
|
|
|
|
|
val normalize_value : t -> 'a -> 'a
|
|
|
|
end = struct
|
|
|
|
let hashed_obj eq =
|
|
|
|
( module struct
|
|
|
|
type t = Hashing.hash_value * Obj.t
|
|
|
|
|
|
|
|
let hash ((h, _) : t) = (h :> int)
|
|
|
|
|
|
|
|
let equal ((h1, o1) : t) ((h2, o2) : t) = Int.equal (h1 :> int) (h2 :> int) && eq o1 o2
|
|
|
|
end : Caml.Hashtbl.HashedType
|
|
|
|
with type t = Hashing.hash_value * Obj.t )
|
|
|
|
|
|
|
|
|
|
|
|
module HashedNoscanBlock = (val hashed_obj Poly.equal)
|
|
|
|
|
|
|
|
module PhysEqualedHashedScannableBlock = (val hashed_obj phys_equal)
|
|
|
|
|
|
|
|
module HashedNormalizedScannableBlock = (val hashed_obj PhysEqual.shallow_equal)
|
|
|
|
|
|
|
|
module HNoscan = Caml.Hashtbl.Make (HashedNoscanBlock)
|
|
|
|
module HPhysEq = Caml.Hashtbl.Make (PhysEqualedHashedScannableBlock)
|
|
|
|
module HNorm = Caml.Hashtbl.Make (HashedNormalizedScannableBlock)
|
|
|
|
|
|
|
|
type visited =
|
|
|
|
| Visiting of {mutable to_patch: (PhysEqualedHashedScannableBlock.t * int) list}
|
|
|
|
| Normalized of HashedNormalizedScannableBlock.t
|
|
|
|
|
|
|
|
type t =
|
|
|
|
{ noscan_blocks: HashedNoscanBlock.t HNoscan.t
|
|
|
|
; visited_blocks: visited HPhysEq.t
|
|
|
|
; hash_normalized: HashedNormalizedScannableBlock.t HNorm.t
|
|
|
|
; fail_on_forward: bool
|
|
|
|
; fail_on_nonstring: bool
|
|
|
|
; fail_on_objects: bool }
|
|
|
|
|
|
|
|
let create () =
|
|
|
|
{ noscan_blocks= HNoscan.create 1
|
|
|
|
; visited_blocks= HPhysEq.create 1
|
|
|
|
; hash_normalized= HNorm.create 1
|
|
|
|
; (* these are just for safety because the code hasn't been tested on them, it should work fine though *)
|
|
|
|
fail_on_forward= true
|
|
|
|
; fail_on_nonstring= true
|
|
|
|
; fail_on_objects= true }
|
|
|
|
|
|
|
|
|
|
|
|
let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o)
|
|
|
|
|
|
|
|
let dummy_should_not_be_hashed_or_used =
|
|
|
|
(*
|
|
|
|
Must be different than any block found in values.
|
|
|
|
Must fail if hashed (there is actually no way to ensure that :( ))
|
|
|
|
*)
|
|
|
|
Obj.repr (lazy (assert false))
|
|
|
|
|
|
|
|
|
|
|
|
(*
|
|
|
|
TODO: be much more efficient and write it in C to be able to use the GC flags to
|
|
|
|
mark visited values.
|
|
|
|
*)
|
|
|
|
let rec hash_and_normalize_obj sharer o parent_shallow_hash_block parent_field_i =
|
|
|
|
if Obj.is_int o then hash_and_normalize_int o
|
|
|
|
else hash_and_normalize_block sharer o parent_shallow_hash_block parent_field_i
|
|
|
|
|
|
|
|
|
|
|
|
and hash_and_normalize_block sharer block parent_shallow_hash_block parent_field_i =
|
|
|
|
let shallow_hash = Hashing.shallow block in
|
|
|
|
let shallow_hash_block = (shallow_hash, block) in
|
|
|
|
let tag = Obj.tag block in
|
|
|
|
if tag >= Obj.no_scan_tag then (
|
|
|
|
(*
|
|
|
|
No-scan blocks (strings, int64, closures, weird stuff) are treated separately.
|
|
|
|
They are hashed and compared using the Stdlib polymorphic functions.
|
|
|
|
*)
|
|
|
|
assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ;
|
|
|
|
match HNoscan.find_opt sharer.noscan_blocks shallow_hash_block with
|
|
|
|
| Some hash_normalized ->
|
|
|
|
hash_normalized
|
|
|
|
| None ->
|
|
|
|
HNoscan.add sharer.noscan_blocks shallow_hash_block shallow_hash_block ;
|
|
|
|
shallow_hash_block )
|
|
|
|
else if Int.equal tag Obj.lazy_tag then
|
|
|
|
(*
|
|
|
|
For now MaximumSharing is used before marshalling.
|
|
|
|
It makes little sense to marshal lazy values.
|
|
|
|
Because lazy blocks are normal scannable blocks, this special case could be safely removed.
|
|
|
|
*)
|
|
|
|
raise MaximumSharingLazyValue
|
|
|
|
else
|
|
|
|
(*
|
|
|
|
This is where we could win by mutating the value directly.
|
|
|
|
Instead we need to use a hashtbl using a shallow hash (for termination), which adds a
|
|
|
|
multiplicative factor to the running time.
|
|
|
|
*)
|
|
|
|
match HPhysEq.find_opt sharer.visited_blocks shallow_hash_block with
|
|
|
|
| Some (Normalized hash_normalized) ->
|
|
|
|
(* The block has already been visited, we can reuse the result. *)
|
|
|
|
hash_normalized
|
|
|
|
| Some (Visiting visiting) ->
|
|
|
|
(*
|
|
|
|
The block is being visited, which means we have a cycle.
|
|
|
|
We record fields to be patched after we have finished treating the cycle.
|
|
|
|
|
|
|
|
For termination we have to return a shallow hash.
|
|
|
|
We also need to return a phys_equally different value so that it will trigger
|
|
|
|
copy-on-write on the whole cycle (then patch can safely be applied and in any order),
|
|
|
|
even though it may not be necessary if the whole cycle and its dependencies could be
|
|
|
|
kept as-is.
|
|
|
|
The value that is returned should not be hashed or used. The current implementation
|
|
|
|
respects it.
|
|
|
|
*)
|
|
|
|
visiting.to_patch <- (parent_shallow_hash_block, parent_field_i) :: visiting.to_patch ;
|
|
|
|
(shallow_hash, dummy_should_not_be_hashed_or_used)
|
|
|
|
| None ->
|
|
|
|
let visited = Visiting {to_patch= []} in
|
|
|
|
let[@warning "-8"] (Visiting visiting) = visited in
|
|
|
|
HPhysEq.add sharer.visited_blocks shallow_hash_block visited ;
|
|
|
|
let hash_normalized =
|
|
|
|
if Int.equal tag Obj.forward_tag then (
|
|
|
|
assert (not sharer.fail_on_forward) ;
|
|
|
|
(*
|
|
|
|
Forward_tag is an intermediate block resulting from the evaluating of a lazy.
|
|
|
|
As an optimization, let's replace it directly with the normalization of the result
|
|
|
|
as if this intermediate block didn't exist.
|
|
|
|
|
|
|
|
This remains untested for now (hence the assertion above).
|
|
|
|
Not obvious to test as optimizations or the GC can already do the substitution.
|
|
|
|
*)
|
|
|
|
hash_and_normalize_obj sharer (Obj.field block 0) parent_shallow_hash_block
|
|
|
|
parent_field_i )
|
|
|
|
else (
|
|
|
|
(* For regular blocks, normalize each field then use a shallow comparison. *)
|
|
|
|
assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ;
|
|
|
|
let hash_shallow_normalized =
|
|
|
|
let size = Obj.size block in
|
|
|
|
hash_and_normalize_block_fields sharer shallow_hash_block block block size 0
|
|
|
|
(Hashing.alloc_of_block ~tag ~size)
|
|
|
|
in
|
|
|
|
match HNorm.find_opt sharer.hash_normalized hash_shallow_normalized with
|
|
|
|
| Some hash_normalized ->
|
|
|
|
hash_normalized
|
|
|
|
| None ->
|
|
|
|
HNorm.add sharer.hash_normalized hash_shallow_normalized hash_shallow_normalized ;
|
|
|
|
hash_shallow_normalized )
|
|
|
|
in
|
|
|
|
let hash_normalized =
|
|
|
|
match visiting.to_patch with
|
|
|
|
| [] (* not the head of a cycle *) ->
|
|
|
|
hash_normalized
|
|
|
|
| _ :: _ as to_patch ->
|
|
|
|
(*
|
|
|
|
The whole cycle has been treated, we now need to patch values that pointed to
|
|
|
|
this block. We need to look them up in the [visited_blocks] hash table because
|
|
|
|
they have been duplicated since we recorded them.
|
|
|
|
*)
|
|
|
|
let _, normalized = hash_normalized in
|
|
|
|
List.iter to_patch ~f:(fun (hash_block_to_patch, field_i_to_patch) ->
|
|
|
|
let normalized_block_to_patch =
|
|
|
|
if phys_equal hash_block_to_patch shallow_hash_block then
|
|
|
|
(* Self-cycle, e.g. [let rec x = 1 :: x]. No lookup! *)
|
|
|
|
normalized
|
|
|
|
else
|
|
|
|
let[@warning "-8"] (Normalized (_, normalized_block_to_patch)) =
|
|
|
|
HPhysEq.find sharer.visited_blocks hash_block_to_patch
|
|
|
|
in
|
|
|
|
normalized_block_to_patch
|
|
|
|
in
|
|
|
|
Obj.set_field normalized_block_to_patch field_i_to_patch normalized ) ;
|
|
|
|
(*
|
|
|
|
For cycle heads, for consistency with the [Visiting] case above we need to
|
|
|
|
use the shallow hash.
|
|
|
|
*)
|
|
|
|
(shallow_hash, normalized)
|
|
|
|
in
|
|
|
|
HPhysEq.replace sharer.visited_blocks shallow_hash_block (Normalized hash_normalized) ;
|
|
|
|
hash_normalized
|
|
|
|
|
|
|
|
|
|
|
|
and hash_and_normalize_block_fields sharer original_shallow_hash_block original_block new_block
|
|
|
|
size field_i hash_state =
|
|
|
|
if field_i >= size then (Hashing.get_hash_value hash_state, new_block)
|
|
|
|
else
|
|
|
|
let field_v = Obj.field original_block field_i in
|
|
|
|
let field_hash, field_v' =
|
|
|
|
hash_and_normalize_obj sharer field_v original_shallow_hash_block field_i
|
|
|
|
in
|
|
|
|
let hash_state = Hashing.fold_hash_value hash_state field_hash in
|
|
|
|
let new_block =
|
|
|
|
if phys_equal field_v field_v' then new_block
|
|
|
|
else
|
|
|
|
let new_block =
|
|
|
|
if phys_equal original_block new_block then (* copy-on-write *)
|
|
|
|
Obj.dup original_block
|
|
|
|
else new_block
|
|
|
|
in
|
|
|
|
Obj.set_field new_block field_i field_v' ;
|
|
|
|
new_block
|
|
|
|
in
|
|
|
|
(hash_and_normalize_block_fields [@tailcall]) sharer original_shallow_hash_block
|
|
|
|
original_block new_block size (field_i + 1) hash_state
|
|
|
|
|
|
|
|
|
|
|
|
let dummy_should_not_be_patched =
|
|
|
|
(Hashing.of_int 0, (* Make sure it fails hard if [Obj.set_field] is called on it *) Obj.repr 0)
|
|
|
|
|
|
|
|
|
|
|
|
(** Returns a value structurally equal but with potentially more sharing. Potentially unsafe if
|
|
|
|
used on mutable values that are modified later. Preserves polymorphic compare, hashing,
|
|
|
|
no-sharing marshalling. May have an impact on code using [phys_equal] or marshalling with
|
|
|
|
sharing. *)
|
|
|
|
let normalize_value sharer v =
|
|
|
|
hash_and_normalize_obj sharer (Obj.repr v) dummy_should_not_be_patched (-1) |> snd |> Obj.obj
|
|
|
|
end
|
|
|
|
|
|
|
|
module ForHashtbl (H : Caml.Hashtbl.S) = struct
|
|
|
|
let normalize h =
|
|
|
|
let sharer = Sharer.create () in
|
|
|
|
(* If a hash table has been created with [add] and not [replace] only, it is possible to
|
|
|
|
have several values for a given key. We need to collect them all and reinsert them in
|
|
|
|
the reverse order. *)
|
|
|
|
let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in
|
|
|
|
(* No need to preserve the initial size of the original hash table *)
|
|
|
|
let h' = H.create (H.length h) in
|
|
|
|
List.iter rev_bindings ~f:(fun (k, v) ->
|
|
|
|
let k' = Sharer.normalize_value sharer k in
|
|
|
|
let v' = Sharer.normalize_value sharer v in
|
|
|
|
H.add h' k' v' ) ;
|
|
|
|
h'
|
|
|
|
end
|