You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

288 lines
11 KiB

(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
exception MaximumSharingLazyValue
module Hashing : sig
type hash_value = private int
type state
val of_int : int -> hash_value
val shallow : 'a -> hash_value
(** A deterministic hash function that visits O(1) objects, to ensure termination on cyclic
values. *)
val alloc_of_block : tag:int -> size:int -> state
val fold_hash_value : state -> hash_value -> state
val get_hash_value : state -> hash_value
end = struct
type hash_value = Hash.hash_value
type state = Hash.state
let of_int = Fn.id
let shallow =
(*
[hash x] is defined as [seeded_hash_param 10 100 0 x].
Here we don't care about the specific numbers as long as the function is deterministic.
*)
Caml.Hashtbl.hash
let alloc_of_block ~tag ~size =
let state = Hash.alloc () in
let state = Hash.fold_int state tag in
let state = Hash.fold_int state size in
state
let fold_hash_value = Hash.fold_int
let get_hash_value = Hash.get_hash_value
end
module Sharer : sig
type t
val create : unit -> t
val normalize_value : t -> 'a -> 'a
end = struct
let hashed_obj eq =
( module struct
type t = Hashing.hash_value * Obj.t
let hash ((h, _) : t) = (h :> int)
let equal ((h1, o1) : t) ((h2, o2) : t) = Int.equal (h1 :> int) (h2 :> int) && eq o1 o2
end : Caml.Hashtbl.HashedType
with type t = Hashing.hash_value * Obj.t )
module HashedNoscanBlock = (val hashed_obj Poly.equal)
module PhysEqualedHashedScannableBlock = (val hashed_obj phys_equal)
module HashedNormalizedScannableBlock = (val hashed_obj PhysEqual.shallow_equal)
module HNoscan = Caml.Hashtbl.Make (HashedNoscanBlock)
module HPhysEq = Caml.Hashtbl.Make (PhysEqualedHashedScannableBlock)
module HNorm = Caml.Hashtbl.Make (HashedNormalizedScannableBlock)
type visited =
| Visiting of {mutable to_patch: (PhysEqualedHashedScannableBlock.t * int) list}
| Normalized of HashedNormalizedScannableBlock.t
type t =
{ noscan_blocks: HashedNoscanBlock.t HNoscan.t
; visited_blocks: visited HPhysEq.t
; hash_normalized: HashedNormalizedScannableBlock.t HNorm.t
; fail_on_forward: bool
; fail_on_nonstring: bool
; fail_on_objects: bool }
let create () =
{ noscan_blocks= HNoscan.create 1
; visited_blocks= HPhysEq.create 1
; hash_normalized= HNorm.create 1
; (* these are just for safety because the code hasn't been tested on them, it should work fine though *)
fail_on_forward= true
; fail_on_nonstring= true
; fail_on_objects= true }
let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o)
let dummy_should_not_be_hashed_or_used =
(*
Must be different than any block found in values.
Must fail if hashed (there is actually no way to ensure that :( ))
*)
Obj.repr (lazy (assert false))
(*
TODO: be much more efficient and write it in C to be able to use the GC flags to
mark visited values.
*)
let rec hash_and_normalize_obj sharer o parent_shallow_hash_block parent_field_i =
if Obj.is_int o then hash_and_normalize_int o
else hash_and_normalize_block sharer o parent_shallow_hash_block parent_field_i
and hash_and_normalize_block sharer block parent_shallow_hash_block parent_field_i =
let shallow_hash = Hashing.shallow block in
let shallow_hash_block = (shallow_hash, block) in
let tag = Obj.tag block in
if tag >= Obj.no_scan_tag then (
(*
No-scan blocks (strings, int64, closures, weird stuff) are treated separately.
They are hashed and compared using the Stdlib polymorphic functions.
*)
assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ;
match HNoscan.find_opt sharer.noscan_blocks shallow_hash_block with
| Some hash_normalized ->
hash_normalized
| None ->
HNoscan.add sharer.noscan_blocks shallow_hash_block shallow_hash_block ;
shallow_hash_block )
else if Int.equal tag Obj.lazy_tag then
(*
For now MaximumSharing is used before marshalling.
It makes little sense to marshal lazy values.
Because lazy blocks are normal scannable blocks, this special case could be safely removed.
*)
raise MaximumSharingLazyValue
else
(*
This is where we could win by mutating the value directly.
Instead we need to use a hashtbl using a shallow hash (for termination), which adds a
multiplicative factor to the running time.
*)
match HPhysEq.find_opt sharer.visited_blocks shallow_hash_block with
| Some (Normalized hash_normalized) ->
(* The block has already been visited, we can reuse the result. *)
hash_normalized
| Some (Visiting visiting) ->
(*
The block is being visited, which means we have a cycle.
We record fields to be patched after we have finished treating the cycle.
For termination we have to return a shallow hash.
We also need to return a phys_equally different value so that it will trigger
copy-on-write on the whole cycle (then patch can safely be applied and in any order),
even though it may not be necessary if the whole cycle and its dependencies could be
kept as-is.
The value that is returned should not be hashed or used. The current implementation
respects it.
*)
visiting.to_patch <- (parent_shallow_hash_block, parent_field_i) :: visiting.to_patch ;
(shallow_hash, dummy_should_not_be_hashed_or_used)
| None ->
let visited = Visiting {to_patch= []} in
let[@warning "-8"] (Visiting visiting) = visited in
HPhysEq.add sharer.visited_blocks shallow_hash_block visited ;
let hash_normalized =
if Int.equal tag Obj.forward_tag then (
assert (not sharer.fail_on_forward) ;
(*
Forward_tag is an intermediate block resulting from the evaluating of a lazy.
As an optimization, let's replace it directly with the normalization of the result
as if this intermediate block didn't exist.
This remains untested for now (hence the assertion above).
Not obvious to test as optimizations or the GC can already do the substitution.
*)
hash_and_normalize_obj sharer (Obj.field block 0) parent_shallow_hash_block
parent_field_i )
else (
(* For regular blocks, normalize each field then use a shallow comparison. *)
assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ;
let hash_shallow_normalized =
let size = Obj.size block in
hash_and_normalize_block_fields sharer shallow_hash_block block block size 0
(Hashing.alloc_of_block ~tag ~size)
in
match HNorm.find_opt sharer.hash_normalized hash_shallow_normalized with
| Some hash_normalized ->
hash_normalized
| None ->
HNorm.add sharer.hash_normalized hash_shallow_normalized hash_shallow_normalized ;
hash_shallow_normalized )
in
let hash_normalized =
match visiting.to_patch with
| [] (* not the head of a cycle *) ->
hash_normalized
| _ :: _ as to_patch ->
(*
The whole cycle has been treated, we now need to patch values that pointed to
this block. We need to look them up in the [visited_blocks] hash table because
they have been duplicated since we recorded them.
*)
let _, normalized = hash_normalized in
List.iter to_patch ~f:(fun (hash_block_to_patch, field_i_to_patch) ->
let normalized_block_to_patch =
if phys_equal hash_block_to_patch shallow_hash_block then
(* Self-cycle, e.g. [let rec x = 1 :: x]. No lookup! *)
normalized
else
let[@warning "-8"] (Normalized (_, normalized_block_to_patch)) =
HPhysEq.find sharer.visited_blocks hash_block_to_patch
in
normalized_block_to_patch
in
Obj.set_field normalized_block_to_patch field_i_to_patch normalized ) ;
(*
For cycle heads, for consistency with the [Visiting] case above we need to
use the shallow hash.
*)
(shallow_hash, normalized)
in
HPhysEq.replace sharer.visited_blocks shallow_hash_block (Normalized hash_normalized) ;
hash_normalized
and hash_and_normalize_block_fields sharer original_shallow_hash_block original_block new_block
size field_i hash_state =
if field_i >= size then (Hashing.get_hash_value hash_state, new_block)
else
let field_v = Obj.field original_block field_i in
let field_hash, field_v' =
hash_and_normalize_obj sharer field_v original_shallow_hash_block field_i
in
let hash_state = Hashing.fold_hash_value hash_state field_hash in
let new_block =
if phys_equal field_v field_v' then new_block
else
let new_block =
if phys_equal original_block new_block then (* copy-on-write *)
Obj.dup original_block
else new_block
in
Obj.set_field new_block field_i field_v' ;
new_block
in
(hash_and_normalize_block_fields [@tailcall]) sharer original_shallow_hash_block
original_block new_block size (field_i + 1) hash_state
let dummy_should_not_be_patched =
(Hashing.of_int 0, (* Make sure it fails hard if [Obj.set_field] is called on it *) Obj.repr 0)
(** Returns a value structurally equal but with potentially more sharing. Potentially unsafe if
used on mutable values that are modified later. Preserves polymorphic compare, hashing,
no-sharing marshalling. May have an impact on code using [phys_equal] or marshalling with
sharing. *)
let normalize_value sharer v =
hash_and_normalize_obj sharer (Obj.repr v) dummy_should_not_be_patched (-1) |> snd |> Obj.obj
end
module ForHashtbl (H : Caml.Hashtbl.S) = struct
let normalize h =
let sharer = Sharer.create () in
(* If a hash table has been created with [add] and not [replace] only, it is possible to
have several values for a given key. We need to collect them all and reinsert them in
the reverse order. *)
let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in
(* No need to preserve the initial size of the original hash table *)
let h' = H.create (H.length h) in
List.iter rev_bindings ~f:(fun (k, v) ->
let k' = Sharer.normalize_value sharer k in
let v' = Sharer.normalize_value sharer v in
H.add h' k' v' ) ;
h'
end