Summary: Reduces the size of the `tenv` by sharing values as most as possible, in an untyped - but supposedly safe - way, by using black magic on objects. Can be reused for other things later. Reviewed By: ngorogiannis Differential Revision: D15855870 fbshipit-source-id: 169a4b86bmaster
parent
384b3c5798
commit
0efd8960e1
@ -0,0 +1,172 @@
|
|||||||
|
(*
|
||||||
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the MIT license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree.
|
||||||
|
*)
|
||||||
|
|
||||||
|
open! IStd
|
||||||
|
|
||||||
|
exception MaximumSharingLazyValue
|
||||||
|
|
||||||
|
module Hashing : sig
|
||||||
|
type hash_value = private int
|
||||||
|
|
||||||
|
type state
|
||||||
|
|
||||||
|
val of_int : int -> hash_value
|
||||||
|
|
||||||
|
val of_no_scan_block : 'a -> hash_value
|
||||||
|
|
||||||
|
val alloc_of_block : tag:int -> size:int -> state
|
||||||
|
|
||||||
|
val fold_hash_value : state -> hash_value -> state
|
||||||
|
|
||||||
|
val get_hash_value : state -> hash_value
|
||||||
|
end = struct
|
||||||
|
type hash_value = Hash.hash_value
|
||||||
|
|
||||||
|
type state = Hash.state
|
||||||
|
|
||||||
|
let of_int = Fn.id
|
||||||
|
|
||||||
|
let of_no_scan_block = Caml.Hashtbl.hash
|
||||||
|
|
||||||
|
let alloc_of_block ~tag ~size =
|
||||||
|
let state = Hash.alloc () in
|
||||||
|
let state = Hash.fold_int state tag in
|
||||||
|
let state = Hash.fold_int state size in
|
||||||
|
state
|
||||||
|
|
||||||
|
|
||||||
|
let fold_hash_value = Hash.fold_int
|
||||||
|
|
||||||
|
let get_hash_value = Hash.get_hash_value
|
||||||
|
end
|
||||||
|
|
||||||
|
module Sharer : sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val create : unit -> t
|
||||||
|
|
||||||
|
val normalize_value : t -> 'a -> 'a
|
||||||
|
end = struct
|
||||||
|
module HashedNormalizedObj = struct
|
||||||
|
type t = Hashing.hash_value * Obj.t
|
||||||
|
|
||||||
|
let equal ((h1, o1) : t) ((h2, o2) : t) =
|
||||||
|
Int.equal (h1 :> int) (h2 :> int)
|
||||||
|
&&
|
||||||
|
if Obj.tag o1 >= Obj.no_scan_tag then Polymorphic_compare.equal o1 o2
|
||||||
|
else PhysEqual.shallow_equal o1 o2
|
||||||
|
|
||||||
|
|
||||||
|
let hash ((h, _) : t) = (h :> int)
|
||||||
|
end
|
||||||
|
|
||||||
|
module H = Caml.Hashtbl.Make (HashedNormalizedObj)
|
||||||
|
|
||||||
|
type t =
|
||||||
|
{ inplace: bool
|
||||||
|
(** Uses [Obj.set_field] on possibly immutable values, hence should be avoided when used with flambda *)
|
||||||
|
; hash_normalized: HashedNormalizedObj.t H.t
|
||||||
|
; fail_on_forward: bool
|
||||||
|
; fail_on_nonstring: bool
|
||||||
|
; fail_on_objects: bool }
|
||||||
|
|
||||||
|
let create () =
|
||||||
|
{ inplace= false
|
||||||
|
; hash_normalized= H.create 1
|
||||||
|
; (* these are just for safety because the code hasn't been tested on them, it should work fine though *)
|
||||||
|
fail_on_forward= true
|
||||||
|
; fail_on_nonstring= true
|
||||||
|
; fail_on_objects= true }
|
||||||
|
|
||||||
|
|
||||||
|
let normalize_block sharer hash block =
|
||||||
|
let hash_block = (hash, block) in
|
||||||
|
match H.find_opt sharer.hash_normalized hash_block with
|
||||||
|
| Some hash_normalized ->
|
||||||
|
hash_normalized
|
||||||
|
| None ->
|
||||||
|
H.add sharer.hash_normalized hash_block hash_block ;
|
||||||
|
hash_block
|
||||||
|
|
||||||
|
|
||||||
|
let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o)
|
||||||
|
|
||||||
|
(*
|
||||||
|
TODO: currently the function explores the graph without sharing. It is possible to build values
|
||||||
|
on which this will behave exponentially. This could be avoided by keeping a hashtbl of visited
|
||||||
|
values. But it would be much more efficient to write it in C to be able to use the GC flags to
|
||||||
|
mark visited values.
|
||||||
|
*)
|
||||||
|
let rec hash_and_normalize_obj sharer o =
|
||||||
|
if Obj.is_int o then hash_and_normalize_int o else hash_and_normalize_block sharer o
|
||||||
|
|
||||||
|
|
||||||
|
and hash_and_normalize_block sharer block =
|
||||||
|
let tag = Obj.tag block in
|
||||||
|
if tag >= Obj.no_scan_tag then (
|
||||||
|
assert ((not sharer.fail_on_nonstring) || Int.equal tag Obj.string_tag) ;
|
||||||
|
hash_and_normalize_no_scan_block sharer block )
|
||||||
|
else if Int.equal tag Obj.forward_tag then (
|
||||||
|
assert (not sharer.fail_on_forward) ;
|
||||||
|
hash_and_normalize_obj sharer (Obj.field block 0) )
|
||||||
|
else if Int.equal tag Obj.lazy_tag then raise MaximumSharingLazyValue
|
||||||
|
else (
|
||||||
|
assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ;
|
||||||
|
let size = Obj.size block in
|
||||||
|
hash_and_normalize_block_fields sharer block block size 0 (Hashing.alloc_of_block ~tag ~size) )
|
||||||
|
|
||||||
|
|
||||||
|
and hash_and_normalize_block_fields sharer original_block new_block size field_i hash_state =
|
||||||
|
if field_i >= size then normalize_block sharer (Hashing.get_hash_value hash_state) new_block
|
||||||
|
else
|
||||||
|
let field_v = Obj.field original_block field_i in
|
||||||
|
let field_hash, field_v' = hash_and_normalize_obj sharer field_v in
|
||||||
|
let hash_state = Hashing.fold_hash_value hash_state field_hash in
|
||||||
|
let new_block =
|
||||||
|
if phys_equal field_v field_v' then new_block
|
||||||
|
else
|
||||||
|
let new_block =
|
||||||
|
if phys_equal original_block new_block && not sharer.inplace then
|
||||||
|
(* copy-on-write *)
|
||||||
|
Obj.dup original_block
|
||||||
|
else new_block
|
||||||
|
in
|
||||||
|
Obj.set_field new_block field_i field_v' ;
|
||||||
|
new_block
|
||||||
|
in
|
||||||
|
(hash_and_normalize_block_fields [@tailcall]) sharer original_block new_block size
|
||||||
|
(field_i + 1) hash_state
|
||||||
|
|
||||||
|
|
||||||
|
and hash_and_normalize_no_scan_block sharer block =
|
||||||
|
normalize_block sharer (Hashing.of_no_scan_block block) block
|
||||||
|
|
||||||
|
|
||||||
|
(**
|
||||||
|
Returns a value structurally equal but with potentially more sharing.
|
||||||
|
Potentially unsafe if used on mutable values that are modified later.
|
||||||
|
Preserves polymorphic compare, hashing, no-sharing marshalling.
|
||||||
|
May have an impact on code using [phys_equal] or marshalling with sharing.
|
||||||
|
*)
|
||||||
|
let normalize_value sharer v = hash_and_normalize_obj sharer (Obj.repr v) |> snd |> Obj.obj
|
||||||
|
end
|
||||||
|
|
||||||
|
module ForHashtbl (H : Caml.Hashtbl.S) = struct
|
||||||
|
let normalize h =
|
||||||
|
let sharer = Sharer.create () in
|
||||||
|
(* If a hash table has been created with [add] and not [replace] only, it is possible to
|
||||||
|
have several values for a given key. We need to collect them all and reinsert them in
|
||||||
|
the reverse order. *)
|
||||||
|
let rev_bindings = H.fold (fun k v acc -> (k, v) :: acc) h [] in
|
||||||
|
(* No need to preserve the initial size of the original hash table *)
|
||||||
|
let h' = H.create (H.length h) in
|
||||||
|
List.iter rev_bindings ~f:(fun (k, v) ->
|
||||||
|
let k' = Sharer.normalize_value sharer k in
|
||||||
|
let v' = Sharer.normalize_value sharer v in
|
||||||
|
H.add h' k' v' ) ;
|
||||||
|
h'
|
||||||
|
end
|
@ -0,0 +1,20 @@
|
|||||||
|
(*
|
||||||
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the MIT license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree.
|
||||||
|
*)
|
||||||
|
|
||||||
|
open! IStd
|
||||||
|
|
||||||
|
exception MaximumSharingLazyValue
|
||||||
|
|
||||||
|
(**
|
||||||
|
Current implementation will stack overflow on deep (TODO: a tailrec version)
|
||||||
|
or circular values (much harder to detect sharing, also not needed for now).
|
||||||
|
*)
|
||||||
|
|
||||||
|
module ForHashtbl (H : Caml.Hashtbl.S) : sig
|
||||||
|
val normalize : 'a H.t -> 'a H.t
|
||||||
|
(** Duplicate a hash table with maximum sharing. *)
|
||||||
|
end
|
Loading…
Reference in new issue