From 4c50dc096c687664c9d70b4c9cfe7f412945928f Mon Sep 17 00:00:00 2001 From: Mehdi Bouaziz Date: Mon, 1 Jul 2019 02:43:30 -0700 Subject: [PATCH] MaximumSharing: handle cycles Summary: Do not fail on cycles, normalize values issuing from cycles, but do not try to recognize equal cycles like `let rec x = 1 :: x` and `let rec y = 1 :: 1 :: y`. This is unlikely to happen in our code. Reviewed By: ngorogiannis Differential Revision: D16017365 fbshipit-source-id: 691bb756c --- infer/src/istd/MaximumSharing.ml | 93 ++++++++++++++++++++++----- infer/src/istd/MaximumSharing.mli | 3 +- infer/src/unit/MaximumSharingTests.ml | 30 ++++----- 3 files changed, 92 insertions(+), 34 deletions(-) diff --git a/infer/src/istd/MaximumSharing.ml b/infer/src/istd/MaximumSharing.ml index 24051b8bb..ab39f7050 100644 --- a/infer/src/istd/MaximumSharing.ml +++ b/infer/src/istd/MaximumSharing.ml @@ -7,8 +7,6 @@ open! IStd -exception MaximumSharingCyclicValue - exception MaximumSharingLazyValue module Hashing : sig @@ -82,7 +80,9 @@ end = struct module HPhysEq = Caml.Hashtbl.Make (PhysEqualedHashedScannableBlock) module HNorm = Caml.Hashtbl.Make (HashedNormalizedScannableBlock) - type visited = Visiting | Normalized of HashedNormalizedScannableBlock.t + type visited = + | Visiting of {mutable to_patch: (PhysEqualedHashedScannableBlock.t * int) list} + | Normalized of HashedNormalizedScannableBlock.t type t = { inplace: bool @@ -107,15 +107,24 @@ end = struct let hash_and_normalize_int o = (Hashing.of_int (Obj.obj o : int), o) + let dummy_should_not_be_hashed_or_used = + (* + Must be different than any block found in values. + Must fail if hashed (there is actually no way to ensure that :( )) + *) + Obj.repr (lazy (assert false)) + + (* TODO: be much more efficient and write it in C to be able to use the GC flags to mark visited values. - *) - let rec hash_and_normalize_obj sharer o = - if Obj.is_int o then hash_and_normalize_int o else hash_and_normalize_block sharer o + *) + let rec hash_and_normalize_obj sharer o parent_shallow_hash_block parent_field_i = + if Obj.is_int o then hash_and_normalize_int o + else hash_and_normalize_block sharer o parent_shallow_hash_block parent_field_i - and hash_and_normalize_block sharer block = + and hash_and_normalize_block sharer block parent_shallow_hash_block parent_field_i = let shallow_hash = Hashing.shallow block in let shallow_hash_block = (shallow_hash, block) in let tag = Obj.tag block in @@ -148,13 +157,25 @@ end = struct | Some (Normalized hash_normalized) -> (* The block has already been visited, we can reuse the result. *) hash_normalized - | Some Visiting -> + | Some (Visiting visiting) -> (* The block is being visited, which means we have a cycle. + We record fields to be patched after we have finished treating the cycle. + + For termination we have to return a shallow hash. + We also need to return a phys_equally different value so that it will trigger + copy-on-write on the whole cycle (then patch can safely be applied and in any order), + even though it may not be necessary if the whole cycle and its dependencies could be + kept as-is. + The value that is returned should not be hashed or used. The current implementation + respects it. *) - raise MaximumSharingCyclicValue + visiting.to_patch <- (parent_shallow_hash_block, parent_field_i) :: visiting.to_patch ; + (shallow_hash, dummy_should_not_be_hashed_or_used) | None -> - HPhysEq.add sharer.visited_blocks shallow_hash_block Visiting ; + let visited = Visiting {to_patch= []} in + let[@warning "-8"] (Visiting visiting) = visited in + HPhysEq.add sharer.visited_blocks shallow_hash_block visited ; let hash_normalized = if Int.equal tag Obj.forward_tag then ( assert (not sharer.fail_on_forward) ; @@ -166,13 +187,14 @@ end = struct This remains untested for now (hence the assertion above). Not obvious to test as optimizations or the GC can already do the substitution. *) - hash_and_normalize_obj sharer (Obj.field block 0) ) + hash_and_normalize_obj sharer (Obj.field block 0) parent_shallow_hash_block + parent_field_i ) else ( (* For regular blocks, normalize each field then use a shallow comparison. *) assert ((not sharer.fail_on_objects) || not (Int.equal tag Obj.object_tag)) ; let hash_shallow_normalized = let size = Obj.size block in - hash_and_normalize_block_fields sharer block block size 0 + hash_and_normalize_block_fields sharer shallow_hash_block block block size 0 (Hashing.alloc_of_block ~tag ~size) in match HNorm.find_opt sharer.hash_normalized hash_shallow_normalized with @@ -182,15 +204,47 @@ end = struct HNorm.add sharer.hash_normalized hash_shallow_normalized hash_shallow_normalized ; hash_shallow_normalized ) in + let hash_normalized = + match visiting.to_patch with + | [] (* not the head of a cycle *) -> + hash_normalized + | _ :: _ as to_patch -> + (* + The whole cycle has been treated, we now need to patch values that pointed to + this block. We need to look them up in the [visited_blocks] hash table because + they have been duplicated since we recorded them. + *) + let _, normalized = hash_normalized in + List.iter to_patch ~f:(fun (hash_block_to_patch, field_i_to_patch) -> + let normalized_block_to_patch = + if phys_equal hash_block_to_patch shallow_hash_block then + (* Self-cycle, e.g. [let rec x = 1 :: x]. No lookup! *) + normalized + else + let[@warning "-8"] (Normalized (_, normalized_block_to_patch)) = + HPhysEq.find sharer.visited_blocks hash_block_to_patch + in + normalized_block_to_patch + in + Obj.set_field normalized_block_to_patch field_i_to_patch normalized ) ; + (* + For cycle heads, for consistency with the [Visiting] case above we need to + use the shallow hash. + *) + (shallow_hash, normalized) + in HPhysEq.replace sharer.visited_blocks shallow_hash_block (Normalized hash_normalized) ; hash_normalized - and hash_and_normalize_block_fields sharer original_block new_block size field_i hash_state = + and hash_and_normalize_block_fields sharer original_shallow_hash_block original_block new_block + size field_i hash_state = if field_i >= size then (Hashing.get_hash_value hash_state, new_block) else let field_v = Obj.field original_block field_i in - let field_hash, field_v' = hash_and_normalize_obj sharer field_v in + let field_hash, field_v' = + hash_and_normalize_obj sharer field_v original_shallow_hash_block field_i + in let hash_state = Hashing.fold_hash_value hash_state field_hash in let new_block = if phys_equal field_v field_v' then new_block @@ -204,8 +258,12 @@ end = struct Obj.set_field new_block field_i field_v' ; new_block in - (hash_and_normalize_block_fields [@tailcall]) sharer original_block new_block size - (field_i + 1) hash_state + (hash_and_normalize_block_fields [@tailcall]) sharer original_shallow_hash_block + original_block new_block size (field_i + 1) hash_state + + + let dummy_should_not_be_patched = + (Hashing.of_int 0, (* Make sure it fails hard if [Obj.set_field] is called on it *) Obj.repr 0) (** @@ -214,7 +272,8 @@ end = struct Preserves polymorphic compare, hashing, no-sharing marshalling. May have an impact on code using [phys_equal] or marshalling with sharing. *) - let normalize_value sharer v = hash_and_normalize_obj sharer (Obj.repr v) |> snd |> Obj.obj + let normalize_value sharer v = + hash_and_normalize_obj sharer (Obj.repr v) dummy_should_not_be_patched (-1) |> snd |> Obj.obj end module ForHashtbl (H : Caml.Hashtbl.S) = struct diff --git a/infer/src/istd/MaximumSharing.mli b/infer/src/istd/MaximumSharing.mli index 985fa4b9e..a43187136 100644 --- a/infer/src/istd/MaximumSharing.mli +++ b/infer/src/istd/MaximumSharing.mli @@ -8,8 +8,7 @@ open! IStd (** - Current implementation will stack overflow on deep values (TODO: a tailrec version) - and throw on circular values. + Current implementation will stack overflow on deep values (TODO: a tailrec version). *) module Sharer : sig diff --git a/infer/src/unit/MaximumSharingTests.ml b/infer/src/unit/MaximumSharingTests.ml index 481bf9b35..9d483336f 100644 --- a/infer/src/unit/MaximumSharingTests.ml +++ b/infer/src/unit/MaximumSharingTests.ml @@ -13,11 +13,15 @@ let inputs = let b = Array.create ~len:10_000 a in let c = Array.create ~len:1_000 b in let d = Array.create ~len:1_000 c in + let rec e = 1 :: e in + let rec f = 1 :: 2 :: f in [ ("unit", Obj.repr (), `PhysEqual) - ; ("same representation", Obj.repr ([42], [|42; 0|]), `Marshal_MustBeBetter) + ; ("same representation", Obj.repr ([42], [|42; 0|]), `MarshalNoSharing_MustBeBetter) ; ("10K times the same element", Obj.repr b, `PhysEqual) ; ("1K times 10K times the same element", Obj.repr c, `PhysEqual) - ; ("1K times 1K times 10K times the same element", Obj.repr d, `PhysEqual) ] + ; ("1K times 1K times 10K times the same element", Obj.repr d, `PhysEqual) + ; ("Self cycle", Obj.repr e, `MarshalWithSharing (* ideally `PhysEqual *)) + ; ("Cyclic value", Obj.repr f, `MarshalWithSharing (* ideally `PhysEqual *)) ] let tests = @@ -30,9 +34,9 @@ let tests = let serialized_input_with_sharing = Marshal.to_string input [] in let serialized_input_no_sharing = match checks with - | `PhysEqual -> + | `PhysEqual | `MarshalWithSharing -> "UNUSED" - | `Marshal_MustBeBetter -> + | `MarshalNoSharing_MustBeBetter -> (* OOMs for big or cyclic values *) Marshal.to_string input [Marshal.No_sharing] in @@ -53,19 +57,15 @@ let tests = *) let reachable_words_normalized = Obj.reachable_words normalized in assert_bool "less reachable words" (reachable_words_normalized <= reachable_words_input) ; - let eq = - (* Cannot use [assert_equal] because it doesn't shortcut physical equalities *) - match checks with - | `PhysEqual -> - phys_equal input normalized - | `Marshal_MustBeBetter -> - Polymorphic_compare.equal input normalized - in - assert_bool "equal" eq ; + (* Cannot use [assert_equal] because it doesn't shortcut physical equalities *) match checks with | `PhysEqual -> - () - | `Marshal_MustBeBetter -> + assert_bool "phys_equal" (phys_equal input normalized) + | `MarshalWithSharing -> + let serialized_normalized_with_sharing = Marshal.to_string normalized [] in + assert_equal serialized_input_with_sharing serialized_normalized_with_sharing + | `MarshalNoSharing_MustBeBetter -> + assert_bool "equal" (Polymorphic_compare.equal input normalized) ; assert_bool "strictly less reachable words" (reachable_words_normalized < reachable_words_input) ; (*