From 7207e05682222c8fe895c69f324bdfaed798ff86 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Thu, 29 Apr 2021 10:21:57 -0700 Subject: [PATCH] [pulse] discard "pruned" atoms that refer to variables outside the pre Summary: See added test: pulse sometimes insisted that an issue was latent even though the condition that made it latent could not be influenced (hence could the issue could never become manifest) by callers because it was unrelated to the pre, i.e. it came from a mutation inside the function. In these cases, we want to report the issue straight away instead of keeping it latent. Reviewed By: skcho Differential Revision: D28002725 fbshipit-source-id: ce9e6f190 --- infer/src/istd/UnionFind.ml | 16 +++-- infer/src/istd/UnionFind.mli | 12 ++-- infer/src/pulse/PulseAbductiveDomain.ml | 12 ++-- infer/src/pulse/PulseAbductiveDomain.mli | 5 +- infer/src/pulse/PulseFormula.ml | 58 ++++++++++++------- infer/src/pulse/PulseFormula.mli | 3 +- infer/src/pulse/PulseOperations.ml | 4 +- infer/src/pulse/PulsePathCondition.ml | 6 +- infer/src/pulse/PulsePathCondition.mli | 3 +- infer/src/pulse/unit/PulseFormulaTest.ml | 10 ++-- .../codetoanalyze/c/pulse/interprocedural.c | 10 ++++ infer/tests/codetoanalyze/c/pulse/issues.exp | 1 + 12 files changed, 84 insertions(+), 56 deletions(-) diff --git a/infer/src/istd/UnionFind.ml b/infer/src/istd/UnionFind.ml index 9f59e51b7..ab495c8c0 100644 --- a/infer/src/istd/UnionFind.ml +++ b/infer/src/istd/UnionFind.ml @@ -167,11 +167,10 @@ struct of_classes classes_keep - let reorient ~keep uf = - let should_keep x = XSet.mem x keep in + let reorient ~should_keep uf = fold_congruences uf ~init:XMap.empty ~f:(fun subst (repr, clazz) -> - (* map every variable in [repr::clazz] to either [repr] if [repr ∈ keep], or to the smallest - representative of [clazz] that's in [keep], if any *) + (* map every variable in [repr::clazz] to either [repr] if [should_keep repr], or to the + smallest representative of [clazz] that satisfies [should_keep], if any *) if should_keep (repr :> X.t) then XSet.fold (fun x subst -> if should_keep x then subst else XMap.add x (repr :> X.t) subst) @@ -190,14 +189,13 @@ struct clazz subst ) - let filter_not_in_closed_set ~keep uf = + let filter_morphism ~f uf = let classes = UF.Map.filter (fun x _ -> - (* here we take advantage of the fact [keep] is transitively closed already to drop - entire classes at once iff their representative is not in [keep]: if the class - contains *one* item in [keep] then *all* of its items are in [keep] *) - XSet.mem (x :> X.t) keep ) + (* here we take advantage of the fact [f] is transitively closed already to drop + entire classes at once iff their representative does not satisfy [f] *) + f (x :> X.t) ) uf.classes in (* rebuild [reprs] directly from [classes]: does path compression and garbage collection on the diff --git a/infer/src/istd/UnionFind.mli b/infer/src/istd/UnionFind.mli index 6e2f65bac..0d5259187 100644 --- a/infer/src/istd/UnionFind.mli +++ b/infer/src/istd/UnionFind.mli @@ -42,17 +42,17 @@ module Make (** fold over the equivalence classes of the relation, singling out the representative for each class *) - val reorient : keep:XSet.t -> t -> X.t XMap.t + val reorient : should_keep:(X.t -> bool) -> t -> X.t XMap.t (** the relation [x -> x'] derived from the equality relation that relates all [x], [x'] such that - [x∉keep], [x'∈keep], and [x=x'], as well as [y -> y'] when no element in the equivalence - class of [y] belongs to [keep] and [y'] is the representative of the class *) + [¬(should_keep x)], [should_keep x'], and [x=x'], as well as [y -> y'] when no element in the + equivalence class of [y] satisfies [should_keep] and [y'] is the representative of the class *) val apply_subst : _ XMap.t -> t -> t (** [apply_subst subst uf] eliminate all variables in the domain of [subst] from [uf], keeping the smallest representative not in the domain of [subst] for each class. Classes without any such elements are kept intact. *) - val filter_not_in_closed_set : keep:XSet.t -> t -> t - (** only keep items in [keep], assuming that [keep] is closed under the relation, i.e. that if an - item [x] is in [keep] then so are all the [y] such that [x=y] according to the relation *) + val filter_morphism : f:(X.t -> bool) -> t -> t + (** only keep items satisfying [f], assuming that [f] is invariant under the relation, i.e. that + if [f x] and [x=y] according to the relation then [f y] *) end diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index 8151d43d3..78e99a40b 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -614,10 +614,11 @@ let discard_unreachable ({pre; post} as astate) = PreDomain.filter_addr ~f:(fun address -> AbstractValue.Set.mem address pre_addresses) pre in let post_addresses = BaseDomain.reachable_addresses (post :> BaseDomain.t) in - let live_addresses = AbstractValue.Set.union pre_addresses post_addresses in let post_new, discard_addresses = PostDomain.filter_addr_with_discarded_addrs - ~f:(fun address -> AbstractValue.Set.mem address live_addresses) + ~f:(fun address -> + AbstractValue.Set.mem address pre_addresses || AbstractValue.Set.mem address post_addresses + ) post in (* note: we don't call {!PulsePathCondition.simplify} *) @@ -625,7 +626,7 @@ let discard_unreachable ({pre; post} as astate) = if phys_equal pre_new pre && phys_equal post_new post then astate else {astate with pre= pre_new; post= post_new} in - (astate, live_addresses, discard_addresses) + (astate, pre_addresses, post_addresses, discard_addresses) let is_local var astate = not (Var.is_return var || Stack.is_abducible astate var) @@ -869,13 +870,14 @@ let filter_for_summary tenv astate0 = this. *) let astate = restore_formals_for_summary astate_before_filter in let astate = {astate with topl= PulseTopl.filter_for_summary astate.path_condition astate.topl} in - let astate, live_addresses, _ = discard_unreachable astate in + let astate, pre_live_addresses, post_live_addresses, _ = discard_unreachable astate in let+ path_condition, new_eqs = PathCondition.simplify tenv ~get_dynamic_type: (BaseAddressAttributes.get_dynamic_type (astate_before_filter.post :> BaseDomain.t).attrs) - ~keep:live_addresses astate.path_condition + ~keep_pre:pre_live_addresses ~keep_post:post_live_addresses astate.path_condition in + let live_addresses = AbstractValue.Set.union pre_live_addresses post_live_addresses in ({astate with path_condition; topl= PulseTopl.simplify ~keep:live_addresses astate.topl}, new_eqs) diff --git a/infer/src/pulse/PulseAbductiveDomain.mli b/infer/src/pulse/PulseAbductiveDomain.mli index 91c70aa85..868cbe2d1 100644 --- a/infer/src/pulse/PulseAbductiveDomain.mli +++ b/infer/src/pulse/PulseAbductiveDomain.mli @@ -173,9 +173,10 @@ val is_local : Var.t -> t -> bool val find_post_cell_opt : AbstractValue.t -> t -> BaseDomain.cell option -val discard_unreachable : t -> t * AbstractValue.Set.t * AbstractValue.t list +val discard_unreachable : t -> t * AbstractValue.Set.t * AbstractValue.Set.t * AbstractValue.t list (** garbage collect unreachable addresses in the state to make it smaller and return the new state, - the live addresses, and the discarded addresses that used to have attributes attached *) + the live pre addresses, the live post addresses, and the discarded addresses that used to have + attributes attached *) val add_skipped_call : Procname.t -> Trace.t -> t -> t diff --git a/infer/src/pulse/PulseFormula.ml b/infer/src/pulse/PulseFormula.ml index 350881517..6b7ec4602 100644 --- a/infer/src/pulse/PulseFormula.ml +++ b/infer/src/pulse/PulseFormula.ml @@ -1704,9 +1704,10 @@ let and_fold_subst_variables phi0 ~up_to_f:phi_foreign ~init ~f:f_var = module QuantifierElimination : sig - val eliminate_vars : keep:Var.Set.t -> t -> t SatUnsat.t - (** [eliminate_vars ~keep φ] substitutes every variable [x] in [φ] with [x'] whenever [x'] is a - distinguished representative of the equivalence class of [x] in [φ] such that [x' ∈ keep] *) + val eliminate_vars : keep_pre:Var.Set.t -> keep_post:Var.Set.t -> t -> t SatUnsat.t + (** [eliminate_vars ~keep_pre ~keep_post φ] substitutes every variable [x] in [φ] with [x'] + whenever [x'] is a distinguished representative of the equivalence class of [x] in [φ] such + that [x' ∈ keep_pre ∪ keep_post] *) end = struct exception Contradiction @@ -1748,8 +1749,12 @@ end = struct ; both= subst_var_formula subst phi.both } - let eliminate_vars ~keep phi = - let subst = VarUF.reorient ~keep phi.known.var_eqs in + let eliminate_vars ~keep_pre ~keep_post phi = + let subst = + VarUF.reorient + ~should_keep:(fun x -> Var.Set.mem x keep_pre || Var.Set.mem x keep_post) + phi.known.var_eqs + in try Sat (subst_var subst phi) with Contradiction -> Unsat end @@ -1836,25 +1841,23 @@ module DeadVariables = struct (** Get rid of atoms when they contain only variables that do not appear in atoms mentioning - variables in [keep], or variables appearing in atoms together with variables in [keep], and so - on. In other words, the variables to keep are all the ones transitively reachable from - variables in [keep] in the graph connecting two variables whenever they appear together in a - same atom of the formula. *) - let eliminate ~keep phi = + variables in [keep_pre] or [keep_post], or variables appearing in atoms together with + variables in these sets, and so on. In other words, the variables to keep are all the ones + transitively reachable from variables in [keep_pre ∪ keep_post] in the graph connecting two + variables whenever they appear together in a same atom of the formula. *) + let eliminate ~keep_pre ~keep_post phi = (* We only consider [phi.both] when building the relation. Considering [phi.known] and [phi.pruned] as well could lead to us keeping more variables around, but that's not necessarily a good idea. Ignoring them means we err on the side of reporting potentially slightly more issues than we would otherwise, as some atoms in [phi.pruned] may vanish unfairly as a result. *) let var_graph = build_var_graph phi.both in - let vars_to_keep = get_reachable_from var_graph keep in + let vars_to_keep = get_reachable_from var_graph (Var.Set.union keep_pre keep_post) in L.d_printfln "Reachable vars: %a" Var.Set.pp vars_to_keep ; - (* discard atoms which have variables *not* in [vars_to_keep], which in particular is enough - to guarantee that *none* of their variables are in [vars_to_keep] thanks to transitive - closure on the graph above *) - let filter_atom atom = not (Atom.has_var_notin vars_to_keep atom) in let simplify_phi phi = - let var_eqs = VarUF.filter_not_in_closed_set ~keep:vars_to_keep phi.Formula.var_eqs in + let var_eqs = + VarUF.filter_morphism ~f:(fun x -> Var.Set.mem x vars_to_keep) phi.Formula.var_eqs + in let linear_eqs = Var.Map.filter (fun v _ -> Var.Set.mem v vars_to_keep) phi.Formula.linear_eqs in @@ -1863,24 +1866,35 @@ module DeadVariables = struct (fun t v -> Var.Set.mem v vars_to_keep && not (Term.has_var_notin vars_to_keep t)) phi.Formula.term_eqs in - let atoms = Atom.Set.filter filter_atom phi.Formula.atoms in + (* discard atoms which have variables *not* in [vars_to_keep], which in particular is enough + to guarantee that *none* of their variables are in [vars_to_keep] thanks to transitive + closure on the graph above *) + let atoms = + Atom.Set.filter (fun atom -> not (Atom.has_var_notin vars_to_keep atom)) phi.Formula.atoms + in {Formula.var_eqs; linear_eqs; term_eqs; atoms} in let known = simplify_phi phi.known in let both = simplify_phi phi.both in - let pruned = Atom.Set.filter filter_atom phi.pruned in + let pruned = + (* discard atoms that callers have no way of influencing, i.e. those that do not contain + variables related to variables in the pre *) + let vars_to_keep_pre = get_reachable_from var_graph keep_pre in + Atom.Set.filter (fun atom -> not (Atom.has_var_notin vars_to_keep_pre atom)) phi.pruned + in {known; pruned; both} end -let simplify tenv ~get_dynamic_type ~keep phi = +let simplify tenv ~get_dynamic_type ~keep_pre ~keep_post phi = let open SatUnsat.Import in let* phi, new_eqs = normalize tenv ~get_dynamic_type phi in - L.d_printfln_escaped "@[Simplifying %a@,wrt %a@]" pp phi Var.Set.pp keep ; + L.d_printfln_escaped "@[Simplifying %a@,wrt %a (pre) and %a (post)@]" pp phi Var.Set.pp keep_pre + Var.Set.pp keep_post ; (* get rid of as many variables as possible *) - let+ phi = QuantifierElimination.eliminate_vars ~keep phi in + let+ phi = QuantifierElimination.eliminate_vars ~keep_pre ~keep_post phi in (* TODO: doing [QuantifierElimination.eliminate_vars; DeadVariables.eliminate] a few times may eliminate even more variables *) - let phi = DeadVariables.eliminate ~keep phi in + let phi = DeadVariables.eliminate ~keep_pre ~keep_post phi in (phi, new_eqs) diff --git a/infer/src/pulse/PulseFormula.mli b/infer/src/pulse/PulseFormula.mli index 04657888c..d96a6e5bf 100644 --- a/infer/src/pulse/PulseFormula.mli +++ b/infer/src/pulse/PulseFormula.mli @@ -59,7 +59,8 @@ val normalize : Tenv.t -> get_dynamic_type:(Var.t -> Typ.t option) -> t -> (t * val simplify : Tenv.t -> get_dynamic_type:(Var.t -> Typ.t option) - -> keep:Var.Set.t + -> keep_pre:Var.Set.t + -> keep_post:Var.Set.t -> t -> (t * new_eqs) SatUnsat.t diff --git a/infer/src/pulse/PulseOperations.ml b/infer/src/pulse/PulseOperations.ml index 91990eb7a..07c57bf35 100644 --- a/infer/src/pulse/PulseOperations.ml +++ b/infer/src/pulse/PulseOperations.ml @@ -516,7 +516,7 @@ let get_dynamic_type_unreachable_values vars astate = astate None in let astate' = Stack.remove_vars vars astate in - let _, _, unreachable_addrs = AbductiveDomain.discard_unreachable astate' in + let _, _, _, unreachable_addrs = AbductiveDomain.discard_unreachable astate' in let res = List.fold unreachable_addrs ~init:[] ~f:(fun res addr -> (let open IOption.Let_syntax in @@ -552,7 +552,7 @@ let remove_vars tenv vars location orig_astate = let astate' = Stack.remove_vars vars astate in if phys_equal astate' astate then Ok astate else - let astate, _, unreachable_addrs = AbductiveDomain.discard_unreachable astate' in + let astate, _, _, unreachable_addrs = AbductiveDomain.discard_unreachable astate' in let+ () = check_memory_leak_unreachable unreachable_addrs location orig_astate in astate diff --git a/infer/src/pulse/PulsePathCondition.ml b/infer/src/pulse/PulsePathCondition.ml index dce47f36a..4270c513e 100644 --- a/infer/src/pulse/PulsePathCondition.ml +++ b/infer/src/pulse/PulsePathCondition.ml @@ -107,11 +107,11 @@ let and_eq_vars v1 v2 phi = ({is_unsat; bo_itvs; citvs; formula}, new_eqs) -let simplify tenv ~keep ~get_dynamic_type phi = +let simplify tenv ~keep_pre ~keep_post ~get_dynamic_type phi = let result = let+ {is_unsat; bo_itvs; citvs; formula} = phi in - let+| formula, new_eqs = Formula.simplify tenv ~keep ~get_dynamic_type formula in - let is_in_keep v _ = AbstractValue.Set.mem v keep in + let+| formula, new_eqs = Formula.simplify tenv ~keep_pre ~keep_post ~get_dynamic_type formula in + let is_in_keep v _ = AbstractValue.Set.mem v keep_pre || AbstractValue.Set.mem v keep_post in ( { is_unsat ; bo_itvs= BoItvs.filter is_in_keep bo_itvs ; citvs= CItvs.filter is_in_keep citvs diff --git a/infer/src/pulse/PulsePathCondition.mli b/infer/src/pulse/PulsePathCondition.mli index e098c0b9d..aca1d7280 100644 --- a/infer/src/pulse/PulsePathCondition.mli +++ b/infer/src/pulse/PulsePathCondition.mli @@ -36,7 +36,8 @@ val and_eq_vars : AbstractValue.t -> AbstractValue.t -> t -> t * new_eqs val simplify : Tenv.t - -> keep:AbstractValue.Set.t + -> keep_pre:AbstractValue.Set.t + -> keep_post:AbstractValue.Set.t -> get_dynamic_type:(AbstractValue.t -> Typ.t option) -> t -> (t * new_eqs) SatUnsat.t diff --git a/infer/src/pulse/unit/PulseFormulaTest.ml b/infer/src/pulse/unit/PulseFormulaTest.ml index 5618ceaa2..4d477212f 100644 --- a/infer/src/pulse/unit/PulseFormulaTest.ml +++ b/infer/src/pulse/unit/PulseFormulaTest.ml @@ -129,11 +129,11 @@ let dummy_get_dynamic_type _ = None let normalize phi = test ~f:(normalize dummy_tenv ~get_dynamic_type:dummy_get_dynamic_type) phi let simplify ~keep phi = - test - ~f: - (simplify dummy_tenv ~get_dynamic_type:dummy_get_dynamic_type - ~keep:(AbstractValue.Set.of_list keep)) - phi + test phi ~f:(fun phi -> + (* keep variables as if they were in the pre-condition, which makes [simplify] keeps the most + facts (eg atoms in [pruned] may be discarded if their variables are not in the pre) *) + simplify dummy_tenv ~get_dynamic_type:dummy_get_dynamic_type + ~keep_pre:(AbstractValue.Set.of_list keep) ~keep_post:AbstractValue.Set.empty phi ) let%test_module "normalization" = diff --git a/infer/tests/codetoanalyze/c/pulse/interprocedural.c b/infer/tests/codetoanalyze/c/pulse/interprocedural.c index a1f6ab68e..0c40c84ce 100644 --- a/infer/tests/codetoanalyze/c/pulse/interprocedural.c +++ b/infer/tests/codetoanalyze/c/pulse/interprocedural.c @@ -20,3 +20,13 @@ void call_if_freed_invalid_latent(int x) { } void call_if_freed_invalid2_bad() { call_if_freed_invalid_latent(7); } + +// make sure this isn't classified as latent as callers have no control over the +// value of x being tested in the body of the function +void test_modified_value_then_error_bad(int* x) { + *x = random(); + if (*x == 5) { + int* p = NULL; + *p = 42; + } +} diff --git a/infer/tests/codetoanalyze/c/pulse/issues.exp b/infer/tests/codetoanalyze/c/pulse/issues.exp index 59d5a0ee7..74515e5c0 100644 --- a/infer/tests/codetoanalyze/c/pulse/issues.exp +++ b/infer/tests/codetoanalyze/c/pulse/issues.exp @@ -3,6 +3,7 @@ codetoanalyze/c/pulse/frontend.c, assign_paren_bad, 8, NULLPTR_DEREFERENCE, no_b codetoanalyze/c/pulse/interprocedural.c, call_if_freed_invalid2_bad, 0, NULLPTR_DEREFERENCE, no_bucket, ERROR, [calling context starts here,in call to `call_if_freed_invalid_latent`,source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,when calling `if_freed_invalid_latent` here,parameter `y` of if_freed_invalid_latent,invalid access occurs here] codetoanalyze/c/pulse/interprocedural.c, call_if_freed_invalid_latent, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [*** LATENT ***,source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,when calling `if_freed_invalid_latent` here,parameter `y` of if_freed_invalid_latent,invalid access occurs here] codetoanalyze/c/pulse/interprocedural.c, if_freed_invalid_latent, 3, USE_AFTER_FREE, no_bucket, ERROR, [*** LATENT ***,invalidation part of the trace starts here,parameter `y` of if_freed_invalid_latent,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `y` of if_freed_invalid_latent,invalid access occurs here] +codetoanalyze/c/pulse/interprocedural.c, test_modified_value_then_error_bad, 4, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here] codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad, 0, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,when calling `create_p` here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad2, 4, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_no_free_bad, 0, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here]