From ab2813e355bc5bbfa36920609f335ca0042aa687 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Fri, 4 Dec 2020 07:30:05 -0800 Subject: [PATCH] [pulse] canonicalize wrt equality relation Summary: When we know `v1 = v2`, canonicalize `v2 -> v1 * v3 -> v2` to `v1 -> v1 * v3 -> v1`. Only do this when creating summaries (and so also when reporting errors) for now. This only takes into account the equality relation between variables for now. It needs to be extended to take into account other ways variables can be equal, eg when two variables are equal to the same constant or the same term. Reviewed By: skcho Differential Revision: D25092158 fbshipit-source-id: 9e589b631 --- infer/src/checkers/impurity.ml | 20 ++++--- infer/src/pulse/PulseAbductiveDomain.ml | 79 +++++++++++++++++-------- infer/src/pulse/PulseBaseMemory.ml | 54 +++++++++++++++-- infer/src/pulse/PulseBaseMemory.mli | 8 +++ infer/src/pulse/PulseBaseStack.ml | 30 +++++++--- infer/src/pulse/PulseBaseStack.mli | 4 ++ infer/src/pulse/PulseFormula.ml | 7 ++- infer/src/pulse/PulseFormula.mli | 3 + infer/src/pulse/PulsePathCondition.ml | 2 + infer/src/pulse/PulsePathCondition.mli | 3 + 10 files changed, 163 insertions(+), 47 deletions(-) diff --git a/infer/src/checkers/impurity.ml b/infer/src/checkers/impurity.ml index da3b06df5..2aad81d8c 100644 --- a/infer/src/checkers/impurity.ml +++ b/infer/src/checkers/impurity.ml @@ -67,7 +67,7 @@ let add_invalid_and_modified ~pvar ~access ~check_empty attrs access_list acc = invalid_and_modified ) -let add_to_modified ~pvar ~access ~addr pre_heap post modified_vars = +let add_to_modified pname ~pvar ~access ~addr pre_heap post modified_vars = let rec aux (access_list, modified_vars) ~addr_to_explore ~visited = match addr_to_explore with | [] -> @@ -83,8 +83,12 @@ let add_to_modified ~pvar ~access ~addr pre_heap post modified_vars = | None, None -> aux (access_list, modified_vars) ~addr_to_explore ~visited | Some _, None -> - L.(die InternalError) - "It is unexpected to have an address which has a binding in pre but not in post!" + L.die InternalError + "It is unexpected to have an address which has a binding in pre but not in post!@\n\ + %a is in the pre but not the post of the call to %a@\n\ + callee heap pre: @[%a@]@\n\ + callee post: @[%a@]@\n" + AbstractValue.pp addr Procname.pp pname BaseMemory.pp pre_heap BaseDomain.pp post | None, Some (_, attrs_post) -> aux (add_invalid_and_modified ~pvar ~access ~check_empty:false attrs_post access_list @@ -107,7 +111,7 @@ let add_to_modified ~pvar ~access ~addr pre_heap post modified_vars = addr_list | None -> aux - (add_invalid_and_modified ~pvar ~access ~check_empty:true attrs_post access_list + (add_invalid_and_modified ~pvar ~access ~check_empty:false attrs_post access_list modified_vars) ~addr_to_explore ~visited ) ) in @@ -122,7 +126,7 @@ let get_modified_params pname post_stack pre_heap post formals = match BaseMemory.find_opt addr pre_heap with | Some edges_pre -> BaseMemory.Edges.fold edges_pre ~init:acc ~f:(fun acc (access, (addr, _)) -> - add_to_modified ~pvar ~access ~addr pre_heap post acc ) + add_to_modified pname ~pvar ~access ~addr pre_heap post acc ) | None -> debug "The address is not materialized in in pre-heap." ; acc ) @@ -130,14 +134,14 @@ let get_modified_params pname post_stack pre_heap post formals = acc ) -let get_modified_globals pre_heap post post_stack = +let get_modified_globals pname pre_heap post post_stack = BaseStack.fold (fun var (addr, _) modified_globals -> if Var.is_global var then (* since global vars are rooted in the stack, we don't have access here but we still want to pick up changes to globals. *) - add_to_modified + add_to_modified pname ~pvar:(Option.value_exn (Var.get_pvar var)) ~access:HilExp.Access.Dereference ~addr pre_heap post modified_globals else modified_globals ) @@ -168,7 +172,7 @@ let extract_impurity tenv pname formals (exec_state : ExecutionDomain.t) : Impur let post = AbductiveDomain.get_post astate in let post_stack = post.BaseDomain.stack in let modified_params = get_modified_params pname post_stack pre_heap post formals in - let modified_globals = get_modified_globals pre_heap post post_stack in + let modified_globals = get_modified_globals pname pre_heap post post_stack in let skipped_calls = SkippedCalls.filter (fun proc_name _ -> diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index cb86c2c5c..8e32f6d65 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -387,22 +387,13 @@ let set_post_cell (addr, history) (edges, attr_set) location astate = |> BaseAddressAttributes.add addr attr_set ) -let filter_for_summary astate = +let filter_stack_for_summary astate = let post_stack = BaseStack.filter (fun var _ -> Var.appears_in_source_code var && not (is_local var astate)) (astate.post :> BaseDomain.t).stack in - (* deregister empty edges *) - let deregister_empty heap = - BaseMemory.filter (fun _addr edges -> not (BaseMemory.Edges.is_empty edges)) heap - in - let pre_heap = deregister_empty (astate.pre :> base_domain).heap in - let post_heap = deregister_empty (astate.post :> base_domain).heap in - { astate with - pre= PreDomain.update astate.pre ~heap:pre_heap - ; post= PostDomain.update ~stack:post_stack ~heap:post_heap astate.post - ; topl= PulseTopl.filter_for_summary astate.path_condition astate.topl } + {astate with post= PostDomain.update ~stack:post_stack astate.post} let add_out_of_scope_attribute addr pvar location history heap typ = @@ -440,10 +431,6 @@ let invalidate_locals pdesc astate : t = type summary = t [@@deriving yojson_of] let is_allocated {post; pre} v = - let is_heap_allocated base_mem v = - BaseMemory.find_opt v base_mem.heap - |> Option.exists ~f:(fun edges -> not (BaseMemory.Edges.is_empty edges)) - in let is_pvar = function Var.ProgramVar _ -> true | Var.LogicalVar _ -> false in let is_stack_allocated base_mem v = BaseStack.exists @@ -451,8 +438,8 @@ let is_allocated {post; pre} v = base_mem.stack in (* OPTIM: the post stack contains at least the pre stack so no need to check both *) - is_heap_allocated (post :> BaseDomain.t) v - || is_heap_allocated (pre :> BaseDomain.t) v + BaseMemory.is_allocated (post :> BaseDomain.t).heap v + || BaseMemory.is_allocated (pre :> BaseDomain.t).heap v || is_stack_allocated (post :> BaseDomain.t) v @@ -474,18 +461,58 @@ let incorporate_new_eqs astate new_eqs = Continue () ) -let summary_of_post pdesc astate = +(** it's a good idea to normalize the path condition before calling this function *) +let canonicalize astate = let open SatUnsat.Import in - let astate = filter_for_summary astate in - let astate, live_addresses, _ = discard_unreachable astate in - let* path_condition, new_eqs = - PathCondition.simplify ~keep:live_addresses astate.path_condition + let get_var_repr v = PathCondition.get_var_repr astate.path_condition v in + let canonicalize_base stack heap = + let stack' = BaseStack.canonicalize ~get_var_repr stack in + (* note: this step also de-registers addresses pointing to empty edges *) + let+ heap' = BaseMemory.canonicalize ~get_var_repr heap in + (stack', heap') in - let+ () = incorporate_new_eqs astate new_eqs in - let astate = - {astate with path_condition; topl= PulseTopl.simplify ~keep:live_addresses astate.topl} + (* need different functions for pre and post to appease the type system *) + let canonicalize_pre (pre : PreDomain.t) = + let+ stack', heap' = canonicalize_base (pre :> BaseDomain.t).stack (pre :> BaseDomain.t).heap in + PreDomain.update ~stack:stack' ~heap:heap' pre in - invalidate_locals pdesc astate + let canonicalize_post (post : PostDomain.t) = + let+ stack', heap' = + canonicalize_base (post :> BaseDomain.t).stack (post :> BaseDomain.t).heap + in + PostDomain.update ~stack:stack' ~heap:heap' post + in + let* pre = canonicalize_pre astate.pre in + let+ post = canonicalize_post astate.post in + {astate with pre; post} + + +let summary_of_post pdesc astate = + let open SatUnsat.Import in + (* NOTE: we normalize (to strengthen the equality relation used by canonicalization) then + canonicalize *before* garbage collecting unused addresses in case we detect any last-minute + contradictions about addresses we are about to garbage collect *) + let path_condition, is_unsat, new_eqs = PathCondition.is_unsat_expensive astate.path_condition in + if is_unsat then Unsat + else + let astate = {astate with path_condition} in + let* () = incorporate_new_eqs astate new_eqs in + L.d_printfln "Canonicalizing...@\n" ; + let* astate = canonicalize astate in + L.d_printfln "Canonicalized state: %a@\n" pp astate ; + let astate = filter_stack_for_summary astate in + let astate = + {astate with topl= PulseTopl.filter_for_summary astate.path_condition astate.topl} + in + let astate, live_addresses, _ = discard_unreachable astate in + let* path_condition, new_eqs = + PathCondition.simplify ~keep:live_addresses astate.path_condition + in + let+ () = incorporate_new_eqs astate new_eqs in + let astate = + {astate with path_condition; topl= PulseTopl.simplify ~keep:live_addresses astate.topl} + in + invalidate_locals pdesc astate let get_pre {pre} = (pre :> BaseDomain.t) diff --git a/infer/src/pulse/PulseBaseMemory.ml b/infer/src/pulse/PulseBaseMemory.ml index e0a55a537..9c1b8cb26 100644 --- a/infer/src/pulse/PulseBaseMemory.ml +++ b/infer/src/pulse/PulseBaseMemory.ml @@ -6,6 +6,7 @@ *) open! IStd +module L = Logging open PulseBasicInterface (* {3 Heap domain } *) @@ -16,6 +17,13 @@ module Access = struct let equal = [%compare.equal: t] let pp = HilExp.Access.pp AbstractValue.pp + + let canonicalize ~get_var_repr (access : t) = + match access with + | ArrayAccess (typ, addr) -> + HilExp.Access.ArrayAccess (typ, get_var_repr addr) + | FieldAccess _ | TakeAddress | Dereference -> + access end module AddrTrace = struct @@ -28,12 +36,22 @@ module AddrTrace = struct end module Edges = struct - include RecencyMap.Make (Access) (AddrTrace) - (struct - let limit = Config.pulse_recency_limit - end) + module M = + RecencyMap.Make (Access) (AddrTrace) + (struct + let limit = Config.pulse_recency_limit + end) + + let yojson_of_t edges = [%yojson_of: (Access.t * AddrTrace.t) list] (M.bindings edges) + + let canonicalize ~get_var_repr edges = + M.fold ~init:M.empty edges ~f:(fun edges' (access, (addr, hist)) -> + let addr' = get_var_repr addr in + let access' = Access.canonicalize ~get_var_repr access in + M.add access' (addr', hist) edges' ) - let yojson_of_t edges = [%yojson_of: (Access.t * AddrTrace.t) list] (bindings edges) + + include M end module Graph = PrettyPrintable.MakePPMonoMap (AbstractValue) (Edges) @@ -55,4 +73,30 @@ let find_edge_opt addr access memory = let yojson_of_t g = [%yojson_of: (AbstractValue.t * Edges.t) list] (Graph.bindings g) +let is_allocated memory v = + Graph.find_opt v memory |> Option.exists ~f:(fun edges -> not (Edges.is_empty edges)) + + +let canonicalize ~get_var_repr memory = + let exception AliasingContradiction in + try + let memory = + Graph.fold + (fun addr edges g -> + if Edges.is_empty edges then g + else + let addr' = get_var_repr addr in + if is_allocated g addr' then ( + L.d_printfln "CONTRADICTION: %a = %a, which is already allocated in %a@\n" + AbstractValue.pp addr AbstractValue.pp addr' Graph.pp g ; + raise_notrace AliasingContradiction ) + else + let edges' = Edges.canonicalize ~get_var_repr edges in + Graph.add addr' edges' g ) + memory Graph.empty + in + Sat memory + with AliasingContradiction -> Unsat + + include Graph diff --git a/infer/src/pulse/PulseBaseMemory.mli b/infer/src/pulse/PulseBaseMemory.mli index 17a8e0c96..b17bec9bb 100644 --- a/infer/src/pulse/PulseBaseMemory.mli +++ b/infer/src/pulse/PulseBaseMemory.mli @@ -29,3 +29,11 @@ val add_edge : AbstractValue.t -> Access.t -> AddrTrace.t -> t -> t val find_edge_opt : AbstractValue.t -> Access.t -> t -> AddrTrace.t option val yojson_of_t : t -> Yojson.Safe.t + +val is_allocated : t -> AbstractValue.t -> bool +(** whether the address has a non-empty set of edges *) + +val canonicalize : get_var_repr:(AbstractValue.t -> AbstractValue.t) -> t -> t SatUnsat.t +(** replace each address in the heap by its canonical representative according to the current + equality relation, represented by [get_var_repr]; also remove addresses that point to empty + edges *) diff --git a/infer/src/pulse/PulseBaseStack.ml b/infer/src/pulse/PulseBaseStack.ml index 94248c977..ac2215607 100644 --- a/infer/src/pulse/PulseBaseStack.ml +++ b/infer/src/pulse/PulseBaseStack.ml @@ -27,15 +27,31 @@ module AddrHistPair = struct else AbstractValue.pp f (fst addr_trace) end -include PrettyPrintable.MakePPMonoMap (VarAddress) (AddrHistPair) +module M = PrettyPrintable.MakePPMonoMap (VarAddress) (AddrHistPair) + +let yojson_of_t m = [%yojson_of: (VarAddress.t * AddrHistPair.t) list] (M.bindings m) + +let canonicalize ~get_var_repr stack = + let changed = ref false in + let stack' = + M.map + (fun ((addr, hist) as addr_hist) -> + let addr' = get_var_repr addr in + if phys_equal addr addr' then addr_hist + else ( + changed := true ; + (addr', hist) ) ) + stack + in + if !changed then stack' else stack + + +include M + +let compare = M.compare AddrHistPair.compare let pp fmt m = let pp_item fmt (var_address, v) = F.fprintf fmt "%a=%a" VarAddress.pp var_address AddrHistPair.pp v in - PrettyPrintable.pp_collection ~pp_item fmt (bindings m) - - -let compare = compare AddrHistPair.compare - -let yojson_of_t m = [%yojson_of: (VarAddress.t * AddrHistPair.t) list] (bindings m) + PrettyPrintable.pp_collection ~pp_item fmt (M.bindings m) diff --git a/infer/src/pulse/PulseBaseStack.mli b/infer/src/pulse/PulseBaseStack.mli index 516f02b35..58a0bfa2d 100644 --- a/infer/src/pulse/PulseBaseStack.mli +++ b/infer/src/pulse/PulseBaseStack.mli @@ -18,3 +18,7 @@ val compare : t -> t -> int [@@warning "-32"] val pp : F.formatter -> t -> unit val yojson_of_t : t -> Yojson.Safe.t + +val canonicalize : get_var_repr:(AbstractValue.t -> AbstractValue.t) -> t -> t +(** replace each address in the stack by its canonical representative according to the current + equality relation, represented by [get_var_repr] *) diff --git a/infer/src/pulse/PulseFormula.ml b/infer/src/pulse/PulseFormula.ml index 79e914027..c542d63e9 100644 --- a/infer/src/pulse/PulseFormula.ml +++ b/infer/src/pulse/PulseFormula.ml @@ -1020,6 +1020,8 @@ module Formula = struct val normalize : t -> (t * new_eqs) SatUnsat.t val implies_atom : t -> Atom.t -> bool + + val get_repr : t -> Var.t -> VarUF.repr end = struct (* Use the monadic notations when normalizing formulas. *) open SatUnsat.Import @@ -1190,7 +1192,7 @@ module Formula = struct let normalize_atom phi (atom : Atom.t) = let normalize_term phi t = Term.subst_variables t ~f:(fun v -> - let v_canon = (VarUF.find phi.var_eqs v :> Var.t) in + let v_canon = (get_repr phi v :> Var.t) in match Var.Map.find_opt v_canon phi.linear_eqs with | None -> VarSubst v_canon @@ -1533,3 +1535,6 @@ let as_int phi v = (** test if [phi.known ⊢ phi.pruned] *) let has_no_assumptions phi = Atom.Set.for_all (fun atom -> Formula.Normalizer.implies_atom phi.known atom) phi.pruned + + +let get_var_repr phi v = (Formula.Normalizer.get_repr phi.both v :> Var.t) diff --git a/infer/src/pulse/PulseFormula.mli b/infer/src/pulse/PulseFormula.mli index 813608f5d..46ac49503 100644 --- a/infer/src/pulse/PulseFormula.mli +++ b/infer/src/pulse/PulseFormula.mli @@ -68,3 +68,6 @@ val is_known_zero : t -> Var.t -> bool val as_int : t -> Var.t -> int option val has_no_assumptions : t -> bool + +val get_var_repr : t -> Var.t -> Var.t +(** get the canonical representative for the variable according to the equality relation *) diff --git a/infer/src/pulse/PulsePathCondition.ml b/infer/src/pulse/PulsePathCondition.ml index 0e5923c66..6011a349c 100644 --- a/infer/src/pulse/PulsePathCondition.ml +++ b/infer/src/pulse/PulsePathCondition.ml @@ -423,3 +423,5 @@ let as_int phi v = let has_no_assumptions phi = Formula.has_no_assumptions phi.formula + +let get_var_repr phi v = Formula.get_var_repr phi.formula v diff --git a/infer/src/pulse/PulsePathCondition.mli b/infer/src/pulse/PulsePathCondition.mli index ea1ea8702..b070564ea 100644 --- a/infer/src/pulse/PulsePathCondition.mli +++ b/infer/src/pulse/PulsePathCondition.mli @@ -75,3 +75,6 @@ val as_int : t -> AbstractValue.t -> int option val has_no_assumptions : t -> bool (** whether the current path is independent of any calling context *) + +val get_var_repr : t -> AbstractValue.t -> AbstractValue.t +(** get the canonical representative for the variable according to the equality relation *)