From 9b0f1ab4157f4e0dbd982fc4412b2a9f884aabdd Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Thu, 29 Apr 2021 07:25:40 -0700 Subject: [PATCH] [pulse] make sure we do not leak local mutations of formals into the summary Summary: Before returning a summary, restore formals to their initial values. This gets rid of a false latent because the value in the path condition is now garbage-collected. Added a test for the tricky case of structs passed as values. Reviewed By: skcho Differential Revision: D28001229 fbshipit-source-id: 23dda5b43 --- infer/src/pulse/PulseAbductiveDomain.ml | 61 ++++++++++++++++++- infer/src/pulse/PulseBaseMemory.ml | 2 + infer/src/pulse/PulseBaseMemory.mli | 2 + infer/tests/codetoanalyze/c/pulse/issues.exp | 2 +- infer/tests/codetoanalyze/c/pulse/nullptr.c | 2 +- .../codetoanalyze/c/pulse/struct_values.c | 34 +++++++++++ 6 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 infer/tests/codetoanalyze/c/pulse/struct_values.c diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index fcef3e661..8151d43d3 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -646,13 +646,64 @@ let set_post_cell (addr, history) (edges, attr_set) location astate = |> BaseAddressAttributes.add addr attr_set ) -let filter_stack_for_summary astate = +(** Inside a function, formals can be used as local variables. But when exiting the function, the + initial values of formals must be restored. This recreates the graph structure from addresses of + formals in the precondition into the post-condition, up to the first dereference. The reason for + this complexity is purely to handle formals that are struct values. For example, if + [pre=&x=vx, vx --.f-> v1 --.g-> v2 --*-> v3] and + [post = &x=vx, vx --.f-> v1 --.g-> v2 --*-> v4, vx --.h->v5] then we change [post] to + [vx --.f-> v1 --.g-> v2 --*-> v3] (i.e. [post]=[pre]). *) +let restore_formals_for_summary astate = + (* The [visited] accumulator is to avoid cycles; they are impossible in theory at the moment but + this could change. *) + let rec restore_pre_var_value visited ~is_value_visible_outside addr astate = + if AbstractValue.Set.mem addr visited then ( + L.internal_error + "Found a cycle when restoring the values of formals, how did this happen?@\n%a@\n" pp astate ; + astate ) + else + let visited = AbstractValue.Set.add addr visited in + let pre_heap = (astate.pre :> BaseDomain.t).heap in + let post_heap = (astate.post :> BaseDomain.t).heap in + match BaseMemory.find_opt addr pre_heap with + | None -> + if is_value_visible_outside then astate + else Memory.map_post_heap astate ~f:(fun post_heap -> BaseMemory.remove addr post_heap) + | Some pre_edges -> + BaseMemory.Edges.fold pre_edges ~init:astate + ~f:(fun astate (access, ((addr_dest, _) as addr_hist_dest)) -> + match access with + | Dereference -> + (* dereference: we reached the actual value and are done *) + if is_value_visible_outside && BaseMemory.has_edge addr access post_heap then + (* the changes are visible even outside of the procedure: do not restore the pre value + if the post has a value for this access too *) + astate + else + Memory.map_post_heap astate ~f:(fun post_heap -> + BaseMemory.add_edge addr access addr_hist_dest post_heap ) + | FieldAccess _ | ArrayAccess _ -> + (* inlined aggregate value: just an offset, not the actual value yet; recurse *) + Memory.map_post_heap astate ~f:(fun post_heap -> + BaseMemory.add_edge addr access addr_hist_dest post_heap ) + |> restore_pre_var_value visited ~is_value_visible_outside addr_dest + | TakeAddress -> + assert false ) + in + let restore_pre_var x ((addr, _) as addr_hist) astate = + (* the address of a variable never changes *) + let astate = Stack.add x addr_hist astate in + restore_pre_var_value AbstractValue.Set.empty + ~is_value_visible_outside:(Var.is_global x || Var.is_return x) + addr astate + in let post_stack = BaseStack.filter (fun var _ -> Var.appears_in_source_code var && not (is_local var astate)) (astate.post :> BaseDomain.t).stack in - {astate with post= PostDomain.update ~stack:post_stack astate.post} + BaseStack.fold restore_pre_var (astate.pre :> BaseDomain.t).stack + {astate with post= PostDomain.update ~stack:post_stack astate.post} let add_out_of_scope_attribute addr pvar location history heap typ = @@ -812,7 +863,11 @@ let filter_for_summary tenv astate0 = L.d_printfln "Canonicalizing...@\n" ; let* astate_before_filter = canonicalize astate0 in L.d_printfln "Canonicalized state: %a@\n" pp astate_before_filter ; - let astate = filter_stack_for_summary astate_before_filter in + (* Remove the stack from the post as it's not used: the values of formals are the same as in the + pre. Moreover, formals can be treated as local variables inside the function's body so we need + to restore their initial values at the end of the function. Removing them altogether achieves + this. *) + let astate = restore_formals_for_summary astate_before_filter in let astate = {astate with topl= PulseTopl.filter_for_summary astate.path_condition astate.topl} in let astate, live_addresses, _ = discard_unreachable astate in let+ path_condition, new_eqs = diff --git a/infer/src/pulse/PulseBaseMemory.ml b/infer/src/pulse/PulseBaseMemory.ml index 6ea76b71c..f55ec030a 100644 --- a/infer/src/pulse/PulseBaseMemory.ml +++ b/infer/src/pulse/PulseBaseMemory.ml @@ -87,6 +87,8 @@ let find_edge_opt addr access memory = Graph.find_opt addr memory >>= Edges.find_opt access +let has_edge addr access memory = find_edge_opt addr access memory |> Option.is_some + let yojson_of_t g = [%yojson_of: (AbstractValue.t * Edges.t) list] (Graph.bindings g) let is_allocated memory v = diff --git a/infer/src/pulse/PulseBaseMemory.mli b/infer/src/pulse/PulseBaseMemory.mli index 4822fdfd0..35dbbeb63 100644 --- a/infer/src/pulse/PulseBaseMemory.mli +++ b/infer/src/pulse/PulseBaseMemory.mli @@ -34,6 +34,8 @@ val add_edge : AbstractValue.t -> Access.t -> AddrTrace.t -> t -> t val find_edge_opt : AbstractValue.t -> Access.t -> t -> AddrTrace.t option +val has_edge : AbstractValue.t -> Access.t -> t -> bool + val yojson_of_t : t -> Yojson.Safe.t val is_allocated : t -> AbstractValue.t -> bool diff --git a/infer/tests/codetoanalyze/c/pulse/issues.exp b/infer/tests/codetoanalyze/c/pulse/issues.exp index 07239bb67..59d5a0ee7 100644 --- a/infer/tests/codetoanalyze/c/pulse/issues.exp +++ b/infer/tests/codetoanalyze/c/pulse/issues.exp @@ -6,8 +6,8 @@ codetoanalyze/c/pulse/interprocedural.c, if_freed_invalid_latent, 3, USE_AFTER_F codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad, 0, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,when calling `create_p` here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad2, 4, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_no_free_bad, 0, MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] -codetoanalyze/c/pulse/nullptr.c, FN_bug_after_malloc_result_test_bad, 4, NULLPTR_DEREFERENCE, no_bucket, ERROR, [*** LATENT ***,source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here] codetoanalyze/c/pulse/nullptr.c, bug_after_abduction_bad, 3, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here] +codetoanalyze/c/pulse/nullptr.c, bug_after_malloc_result_test_bad, 4, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here] codetoanalyze/c/pulse/nullptr.c, bug_with_allocation_bad, 3, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here] codetoanalyze/c/pulse/nullptr.c, malloc_no_check_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,allocated by call to `malloc` (modelled),is the null pointer,null pointer dereference part of the trace starts here,allocated by call to `malloc` (modelled),assigned,invalid access occurs here] codetoanalyze/c/pulse/nullptr.c, malloc_then_call_create_null_path_then_deref_unconditionally_latent, 7, NULLPTR_DEREFERENCE, no_bucket, ERROR, [*** LATENT ***,source of the null value part of the trace starts here,allocated by call to `malloc` (modelled),is the null pointer,null pointer dereference part of the trace starts here,parameter `p` of malloc_then_call_create_null_path_then_deref_unconditionally_latent,invalid access occurs here] diff --git a/infer/tests/codetoanalyze/c/pulse/nullptr.c b/infer/tests/codetoanalyze/c/pulse/nullptr.c index 083b97853..9f2774357 100644 --- a/infer/tests/codetoanalyze/c/pulse/nullptr.c +++ b/infer/tests/codetoanalyze/c/pulse/nullptr.c @@ -96,7 +96,7 @@ void call_no_return_good() { free(x); } -void FN_bug_after_malloc_result_test_bad(int* x) { +void bug_after_malloc_result_test_bad(int* x) { x = (int*)malloc(sizeof(int)); if (x) { int* y = NULL; diff --git a/infer/tests/codetoanalyze/c/pulse/struct_values.c b/infer/tests/codetoanalyze/c/pulse/struct_values.c new file mode 100644 index 000000000..196b99b33 --- /dev/null +++ b/infer/tests/codetoanalyze/c/pulse/struct_values.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +#include + +struct inlined { + int x; + int y; +}; + +struct s { + struct inlined i; + int f; + int g; +}; + +// the analysis should not accidentally expose the mutations to callees +void changes_fields_locally(struct s a) { + int u = a.i.x; + a.f = 42; + a.i.y = 15; +} + +void struct_value_in_callee_ok() { + struct s b = {{11, 22}, 33, 44}; + changes_fields_locally(b); + if (b.i.x != 11 || b.i.y != 22 || b.f != 33 || b.g != 44) { + int* p = NULL; + *p = 42; + } +}