From a89d3db364a1562aae1b7c50feefad4e6c654723 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Thu, 2 Jul 2020 04:38:36 -0700 Subject: [PATCH] [pulse] change recency maps to be backed by lists Summary: This one is observed to be more memory efficient. Intuitively, maps need to be re-allocated more often than lists for balancing. In pulse, we'll often only ever add new values, in increasing order (when they are fresh variables created as we symbolically execute the program), which pushes maps into their worst-case allocation pattern. At least I suspect that's what happens. With lists, this case is handled much better as lists are not re-allocated when adding elements. This is somewhat confirmed by benchmarking and observing GC stats. Reviewed By: skcho Differential Revision: D22140908 fbshipit-source-id: 29815112f --- infer/src/base/CommandLineOption.mli | 2 - infer/src/checkers/impurity.ml | 2 +- infer/src/istd/RecencyMap.ml | 225 +++++++++++++++------------ infer/src/istd/RecencyMap.mli | 29 ++-- infer/src/pulse/PulseBaseDomain.ml | 2 +- infer/src/pulse/PulseInterproc.ml | 40 ++--- infer/src/pulse/PulseOperations.ml | 5 +- 7 files changed, 147 insertions(+), 158 deletions(-) diff --git a/infer/src/base/CommandLineOption.mli b/infer/src/base/CommandLineOption.mli index 7aa2f53ed..02b2fc93e 100644 --- a/infer/src/base/CommandLineOption.mli +++ b/infer/src/base/CommandLineOption.mli @@ -209,8 +209,6 @@ val mk_subcommand : val args_env_var : string (** environment variable use to pass arguments from parent to child processes *) -val strict_mode : bool - val strict_mode_env_var : string val env_var_sep : char diff --git a/infer/src/checkers/impurity.ml b/infer/src/checkers/impurity.ml index 16f9a0e6a..9ae14e9f1 100644 --- a/infer/src/checkers/impurity.ml +++ b/infer/src/checkers/impurity.ml @@ -106,7 +106,7 @@ let get_modified_params pname post_stack pre_heap post formals = | Some (addr, _) when Typ.is_pointer typ -> ( match BaseMemory.find_opt addr pre_heap with | Some edges_pre -> - BaseMemory.Edges.fold edges_pre ~init:acc ~f:(fun acc access (addr, _) -> + BaseMemory.Edges.fold edges_pre ~init:acc ~f:(fun acc (access, (addr, _)) -> add_to_modified ~var ~access ~addr pre_heap post acc ) | None -> debug "The address is not materialized in in pre-heap." ; diff --git a/infer/src/istd/RecencyMap.ml b/infer/src/istd/RecencyMap.ml index 6c3412145..00595b676 100644 --- a/infer/src/istd/RecencyMap.ml +++ b/infer/src/istd/RecencyMap.ml @@ -13,13 +13,13 @@ module type Config = sig end module type S = sig - type t [@@deriving compare] + type t type key type value - val equal : (value -> value -> bool) -> t -> t -> bool + val equal : t -> t -> bool val pp : F.formatter -> t -> unit @@ -27,33 +27,31 @@ module type S = sig val add : key -> value -> t -> t - val find_opt : key -> t -> value option + val bindings : t -> (key * value) list - val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc + val filter : t -> f:(key * value -> bool) -> t - val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc + val find_opt : key -> t -> value option + + val fold : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t val is_empty : t -> bool - val bindings : t -> (key * value) list - - val union : f:(key -> value -> value -> value option) -> t -> t -> t + val mem : t -> key -> bool - val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t + val union_left_biased : t -> t -> t end module Make - (Key : PrettyPrintable.PrintableOrderedType) + (Key : PrettyPrintable.PrintableEquatableOrderedType) (Value : PrettyPrintable.PrintableOrderedType) (Config : Config) : S with type key = Key.t and type value = Value.t = struct type key = Key.t type value = Value.t [@@deriving compare] - module M = Caml.Map.Make (Key) - (** [new_] and [old] together represent the map. Keys may be present in both [old] and [new_], in which case bindings in [new_] take precendence. @@ -62,70 +60,109 @@ module Make [Config.limit] most recently modified elements will always be in the map, but we may temporarily have up to [2*Config.limit] elements in memory. - Using [Caml.Map]s as our backing data-structure allows us to avoid having to scan the [new_] - collection before adding an element in order to avoid adding it multiple times (which would - force us to either go over the [2*Config.limit] max number of elements or risk us losing some - of the [N] most recently-used keys because of repeating keys). *) + Adding elements requires scanning [new_] to possibly remove it, which is [O(Config.limit)]. + This is assumed small (eg, around 50). *) type t = { count_new: int (** invariant: [count_new] is [M.length new_] *) - ; new_: value M.t (** invariant: [M.length new_ ≤ Config.limit] *) - ; old: value M.t - (** invariant: [M.length old ≤ Config.limit]. Actually, the length of [old] is always + ; new_: (key, value) List.Assoc.t (** invariant: [List.length new_ ≤ Config.limit] *) + ; old: (key, value) List.Assoc.t + (** invariant: [List.length old ≤ Config.limit]. Actually, the length of [old] is always either [0] or [N], except possibly after a call to [merge]. *) } - [@@deriving compare] - - let empty = {count_new= 0; old= M.empty; new_= M.empty} - let edges map = - M.union - (fun _addr edge_new _edge_old -> (* bindings in [new_] take precedence *) Some edge_new) - map.new_ map.old + let empty = {count_new= 0; old= []; new_= []} + + let rec remove_aux front_rev key = function + | [] -> + List.rev front_rev + | (key', _) :: rest when Key.equal key key' -> + List.rev_append front_rev rest + | binding :: rest -> + remove_aux (binding :: front_rev) key rest + + + (** optimised: unroll [map.new_] a few times to try and avoid allocating unecessary intermediate + lists *) + let remove_from_new key map = + match map.new_ with + | [] -> + (false, []) + | (key', _) :: rest when Key.equal key key' -> + (true, rest) + | binding :: (key', _) :: rest when Key.equal key key' -> + (true, binding :: rest) + | binding1 :: binding2 :: (key', _) :: rest when Key.equal key key' -> + (true, binding1 :: binding2 :: rest) + | [_] | [_; _] | [_; _; _] -> + (* not found in the unrollings and no more elements *) (false, map.new_) + | binding1 :: binding2 :: binding3 :: rest -> + if List.exists rest ~f:(fun (key', _) -> Key.equal key key') then + (true, binding1 :: binding2 :: binding3 :: remove_aux [] key rest) + else (false, map.new_) (** {2 External API} *) - let is_empty map = map.count_new = 0 && M.is_empty map.old + let is_empty map = map.count_new = 0 && List.is_empty map.old let find_opt key map = - match M.find_opt key map.new_ with + match List.Assoc.find ~equal:Key.equal map.new_ key with | Some _ as value_yes -> value_yes | None -> (* Ideally we would want to put the binding in [new_] to reflect that it was recently accessed but that would imply returning the new map, which is odd for a [find_opt] function. Could be done in the future. *) - M.find_opt key map.old + List.Assoc.find ~equal:Key.equal map.old key + let mem map key = find_opt key map |> Option.is_some + let add key value map = + let removed_from_new, new_without_key = remove_from_new key map in (* compute the final count first *) - let next_count_new = if M.mem key map.new_ then map.count_new else map.count_new + 1 in + let next_count_new = if removed_from_new then map.count_new else map.count_new + 1 in (* shrink if we are about to go past the limit size of [new_] *) - if next_count_new > Config.limit then {count_new= 1; new_= M.singleton key value; old= map.new_} - else {count_new= next_count_new; new_= M.add key value map.new_; old= map.old} - + if next_count_new > Config.limit then {count_new= 1; new_= [(key, value)]; old= new_without_key} + else {count_new= next_count_new; new_= (key, value) :: new_without_key; old= map.old} - let equal equal_value map1 map2 = - let edges1 = edges map1 in - let edges2 = edges map2 in - M.equal equal_value edges1 edges2 + let equal map1 map2 = phys_equal map1 map2 let fold map ~init ~f = - let acc = M.fold (fun key value acc -> f acc key value) map.new_ init in - M.fold (fun key value acc -> if M.mem key map.new_ then acc else f acc key value) map.old acc + let acc = List.fold map.new_ ~init ~f in + (* this is quadratic time but the lists are at most [Config.limit] long, assumed small *) + List.fold map.old ~init:acc ~f:(fun acc binding -> + if List.Assoc.mem ~equal:Key.equal map.new_ (fst binding) then acc else f acc binding ) - let fold_bindings map ~init ~f = fold map ~init ~f:(fun acc k v -> f acc (k, v)) + let bindings map = fold ~init:[] ~f:(fun bindings binding -> binding :: bindings) map + + let filter map ~f = + let count_new = ref map.count_new in + let f_update_count_new binding = + let r = f binding in + if not r then decr count_new ; + r + in + let new_ = List.filter map.new_ ~f:f_update_count_new in + let old = List.filter map.old ~f in + {new_; count_new= !count_new; old} + let pp fmt map = let pp_item fmt (k, v) = F.fprintf fmt "%a -> %a" Key.pp k Value.pp v in - IContainer.pp_collection ~pp_item fmt map ~fold:fold_bindings + IContainer.pp_collection ~pp_item fmt map ~fold let map m ~f = - let new_ = M.map f m.new_ in - let old = M.mapi (fun key value -> if M.mem key m.new_ then value else f value) m.old in + let new_ = List.Assoc.map m.new_ ~f in + let old = + List.map m.old ~f:(fun ((key, value) as binding) -> + if List.Assoc.mem ~equal:Key.equal m.new_ key then binding + else + let value' = f value in + if phys_equal value value' then binding else (key, value') ) + in {m with new_; old} @@ -140,66 +177,48 @@ module Make (!acc_ref, m') - let bindings map = M.bindings (edges map) - - (** [merge ~f map1 map2] has at least all the elements [M.merge f map1.new_ map2.new_] but if - there are more than [N] then some will be arbitrarily demoted to [old]. Elements of - [M.merge f map1.old map2.old] may not make it into the final map if [old] then overflows too. - In other words: merge new first, overflow arbitrarily into old, then overflow old arbitrarily - into nothingness. *) - let merge ~f map1 map2 = - let count_new = ref 0 in - let count_old = ref 0 in - let old = ref M.empty in - let new_ = - M.merge - (fun addr val1_opt val2_opt -> - match f addr val1_opt val2_opt with - | None -> - None - | Some value as value_opt -> - incr count_new ; - if !count_new > Config.limit then ( - incr count_old ; - old := M.add addr value !old ; - None ) - else value_opt ) - map1.new_ map2.new_ + (** return [List.take (l1 @ l2) Config.limit], the length thereof, and the rest of [l1 @ l2] *) + let concat_and_spill l1 count1 l2 = + if Int.equal count1 0 then (l2, List.length l2, []) + else if Int.equal count1 Config.limit then (l1, count1, l2) + else + (* take elements from the start of [l2] to add to [l1], in order to preserve recency ordering + *) + let l2_rev, count_l2, rest_rev = + List.fold l2 ~init:([], 0, []) + ~f:(fun ((l2_rev, count_l2, rest_rev) as acc) ((key2, _) as binding) -> + if count_l2 + count1 >= Config.limit then (l2_rev, count_l2, binding :: rest_rev) + else if List.Assoc.mem ~equal:Key.equal l1 key2 then acc + else (binding :: l2_rev, count_l2 + 1, rest_rev) ) + in + (l1 @ List.rev l2_rev, count1 + count_l2, List.rev rest_rev) + + + let union_left_biased map1 map2 = + (* first take everything from [map1] *) + let new_, count_new, rest = + (* hack: reverse because [bindings] reverses the recency order (clients do not need to know + that but we care) *) + let bindings1 = List.rev (bindings map1) in + let count = List.length bindings1 in + if count <= Config.limit then (bindings1, count, []) + else + let new_, rest = List.split_n bindings1 Config.limit in + (new_, Config.limit, rest) in - (* save [old] as it was after having added just the [new_] components *) - let old_from_new_ = !old in - let old = - M.merge - (fun addr val1_opt val2_opt -> - (* Check if we have already seen [addr] when merging the [new_] maps. As bindings in - [new_] override those in [old] we don't want to add it again in case it was spilled - into [old_from_new_], but it's fine to add it again if it made it to [new_]. - - Also if [!old] is already full then we cannot add any more elements so return [None]. - *) - if !count_old >= Config.limit || M.mem addr old_from_new_ then None - else - match f addr val1_opt val2_opt with - | None -> - None - | Some _ as value_opt -> - incr count_old ; - value_opt ) - map1.old map2.old + let new_, count_new, rest = + if count_new < Config.limit then + (* still room: fill with [map2.new_] *) + concat_and_spill new_ count_new map2.new_ + else (new_, count_new, rest) + in + let new_, count_new, rest = + if count_new < Config.limit then + (* still room: fill with [map2.old] *) + concat_and_spill new_ count_new map2.old + else (new_, count_new, rest) in - {count_new= !count_new; old= M.union (fun _k _v1 _v2 -> assert false) old_from_new_ old; new_} - - - (** standard-looking implementation of [union] based on [merge] *) - let union ~f map1 map2 = - merge - ~f:(fun addr val1_opt val2_opt -> - match (val1_opt, val2_opt) with - | None, None -> - None - | (Some _ as val_), None | None, (Some _ as val_) -> - val_ - | Some val1, Some val2 -> - f addr val1 val2 ) - map1 map2 + (* Note: we haven't bothered trying to fill [rest] as much as possible because the contract is + to keep between [Config.limit] and [2*Config.limit] elements so this is good enough. *) + {new_; count_new; old= rest} end diff --git a/infer/src/istd/RecencyMap.mli b/infer/src/istd/RecencyMap.mli index a660d744c..400457485 100644 --- a/infer/src/istd/RecencyMap.mli +++ b/infer/src/istd/RecencyMap.mli @@ -14,17 +14,15 @@ module type Config = sig end (** A functional map interface where only the [N] most recently-accessed elements are guaranteed to - be persisted, similarly to an LRU cache. The map stores at most [2*N] elements. - - Operations on the map have the same asymptotic complexity as {!Map.Make}. *) + be persisted, similarly to an LRU cache. The map stores at most [2*N] elements. *) module type S = sig - type t [@@deriving compare] + type t type key type value - val equal : (value -> value -> bool) -> t -> t -> bool + val equal : t -> t -> bool val pp : F.formatter -> t -> unit @@ -32,29 +30,24 @@ module type S = sig val add : key -> value -> t -> t - val find_opt : key -> t -> value option + val bindings : t -> (key * value) list - val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc + val filter : t -> f:(key * value -> bool) -> t - val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc - (** convenience function based on [fold] *) + val find_opt : key -> t -> value option + + val fold : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t val is_empty : t -> bool - val bindings : t -> (key * value) list - - val union : f:(key -> value -> value -> value option) -> t -> t -> t - (** same caveat as {!merge} *) + val mem : t -> key -> bool - val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t - (** if the maps passed as arguments disagree over which keys are the most recent and there are - more than [2*N] different keys between the two maps then some bindings will be arbitrarily - lost *) + val union_left_biased : t -> t -> t end module Make - (Key : PrettyPrintable.PrintableOrderedType) + (Key : PrettyPrintable.PrintableEquatableOrderedType) (Value : PrettyPrintable.PrintableOrderedType) (Config : Config) : S with type key = Key.t and type value = Value.t diff --git a/infer/src/pulse/PulseBaseDomain.ml b/infer/src/pulse/PulseBaseDomain.ml index 132ac7918..3aeac7dc7 100644 --- a/infer/src/pulse/PulseBaseDomain.ml +++ b/infer/src/pulse/PulseBaseDomain.ml @@ -231,7 +231,7 @@ end = struct and visit_edges orig_var ~f rev_accesses ~edges astate visited_accum = let finish visited_accum = Continue visited_accum in - Container.fold_until edges ~fold:Memory.Edges.fold_bindings ~finish ~init:visited_accum + Container.fold_until edges ~fold:Memory.Edges.fold ~finish ~init:visited_accum ~f:(fun visited_accum (access, (address, _trace)) -> match visit_address orig_var ~f (access :: rev_accesses) astate address visited_accum with | Continue _ as cont -> diff --git a/infer/src/pulse/PulseInterproc.ml b/infer/src/pulse/PulseInterproc.ml index 61177401c..d8749b0bc 100644 --- a/infer/src/pulse/PulseInterproc.ml +++ b/infer/src/pulse/PulseInterproc.ml @@ -134,7 +134,7 @@ let rec materialize_pre_from_address callee_proc_name call_location ~pre ~addr_p | None -> Ok call_state | Some edges_pre -> - Container.fold_result ~fold:Memory.Edges.fold_bindings ~init:call_state edges_pre + Container.fold_result ~fold:Memory.Edges.fold ~init:call_state edges_pre ~f:(fun call_state (access, (addr_pre_dest, _)) -> let astate, addr_hist_dest_caller = Memory.eval_edge addr_hist_caller access call_state.astate @@ -156,25 +156,8 @@ let materialize_pre_from_actual callee_proc_name call_location ~pre ~formal ~act |> function Some result -> result | None -> Ok call_state -let is_cell_read_only ~edges_pre_opt ~cell_post:(edges_post, attrs_post) = - match edges_pre_opt with - | None -> - false - | Some edges_pre when not (Attributes.is_modified attrs_post) -> - let are_edges_equal = - BaseMemory.Edges.equal - (fun (addr_dest_pre, _) (addr_dest_post, _) -> - (* NOTE: ignores traces - - TODO: can the traces be leveraged here? maybe easy to detect writes by looking at - the post trace *) - AbstractValue.equal addr_dest_pre addr_dest_post ) - edges_pre edges_post - in - if CommandLineOption.strict_mode then assert are_edges_equal ; - are_edges_equal - | _ -> - false +let is_cell_read_only ~edges_pre_opt ~cell_post:(_, attrs_post) = + match edges_pre_opt with None -> false | Some _ -> not (Attributes.is_modified attrs_post) let materialize_pre_for_parameters callee_proc_name call_location pre_post ~formals ~actuals @@ -292,13 +275,11 @@ let delete_edges_in_callee_pre_from_caller ~addr_callee:_ ~edges_pre_opt ~addr_c | None -> old_post_edges | Some edges_pre -> - BaseMemory.Edges.merge old_post_edges edges_pre ~f:(fun _access old_opt pre_opt -> - (* TODO: should apply [call_state.subst] to [_access]! Actually, should rewrite the - whole [cell_pre] beforehand so that [Edges.merge] makes sense. *) - if Option.is_some pre_opt then - (* delete edge if some edge for the same access exists in the pre *) - None - else (* keep old edge if it exists *) old_opt ) ) + (* TODO: should apply [call_state.subst] to [_access]! Actually, should rewrite the + whole [cell_pre] beforehand so that [Edges.merge] makes sense. *) + BaseMemory.Edges.filter old_post_edges ~f:(fun (access, _) -> + (* delete edge if some edge for the same access exists in the pre *) + not (BaseMemory.Edges.mem edges_pre access) ) ) let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt @@ -327,8 +308,7 @@ let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt delete_edges_in_callee_pre_from_caller ~addr_callee ~edges_pre_opt ~addr_caller call_state in let edges_post_caller = - BaseMemory.Edges.union post_edges_minus_pre translated_post_edges ~f:(fun _ _ post_cell -> - Some post_cell ) + BaseMemory.Edges.union_left_biased translated_post_edges post_edges_minus_pre in AbductiveDomain.set_post_edges addr_caller edges_post_caller call_state.astate in @@ -354,7 +334,7 @@ let rec record_post_for_address callee_proc_name call_loc ({AbductiveDomain.pre; ~cell_callee_post call_state in Memory.Edges.fold ~init:call_state_after_post edges_post - ~f:(fun call_state _access (addr_callee_dest, _) -> + ~f:(fun call_state (_access, (addr_callee_dest, _)) -> let call_state, addr_hist_curr_dest = call_state_subst_find_or_new call_state addr_callee_dest ~default_hist_caller:(snd addr_hist_caller) diff --git a/infer/src/pulse/PulseOperations.ml b/infer/src/pulse/PulseOperations.ml index 86ff89a8f..d30bd5df0 100644 --- a/infer/src/pulse/PulseOperations.ml +++ b/infer/src/pulse/PulseOperations.ml @@ -56,8 +56,7 @@ module Closures = struct let+ () = IContainer.iter_result ~fold:Attributes.fold attributes ~f:(function | Attribute.Closure _ -> - IContainer.iter_result ~fold:Memory.Edges.fold_bindings edges - ~f:(fun (access, addr_trace) -> + IContainer.iter_result ~fold:Memory.Edges.fold edges ~f:(fun (access, addr_trace) -> if is_captured_fake_access access then let+ _ = check_addr_access action addr_trace astate in () @@ -241,7 +240,7 @@ let invalidate_array_elements location cause addr_trace astate = | None -> astate | Some edges -> - Memory.Edges.fold edges ~init:astate ~f:(fun astate access dest_addr_trace -> + Memory.Edges.fold edges ~init:astate ~f:(fun astate (access, dest_addr_trace) -> match (access : Memory.Access.t) with | ArrayAccess _ -> AddressAttributes.invalidate dest_addr_trace cause location astate