From 497720386e3c986ad0442bdea05b1c1d1bf5988c Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Mon, 22 Oct 2018 07:58:59 -0700 Subject: [PATCH] [pulse] join of memory graphs Summary: Instead of the non-sensical piecewise join we had until now write a proper one. Hopefully the comments explain what it does. Main one: ``` (* high-level idea: maintain some union-find data structure to identify locations in one heap with locations in the other heap. Build the initial join state as follows: - equate all locations that correspond to identical variables in both stacks, eg joining stacks {x=1} and {x=2} adds "1=2" to the unification. - add all addresses reachable from stack variables to the join state heap This gives us an abstract state that is the union of both abstract states, but more states can still be made equal. For instance, if 1 points to 3 in the first heap and 2 points to 4 in the second heap and we deduced "1 = 2" from the stacks already (as in the example just above) then we can deduce "3 = 4". Proceed in this fashion until no more equalities are discovered, and return the abstract state where a canonical representative has been chosen consistently for each equivalence class (this is what the union-find data structure gives us). *) ``` Reviewed By: mbouaziz Differential Revision: D10483978 fbshipit-source-id: f6ffd7528 --- infer/src/checkers/PulseDomain.ml | 291 +++++++++++++++--- infer/src/checkers/PulseDomain.mli | 29 +- .../tests/codetoanalyze/cpp/pulse/issues.exp | 4 + infer/tests/codetoanalyze/cpp/pulse/join.cpp | 28 ++ 4 files changed, 274 insertions(+), 78 deletions(-) create mode 100644 infer/tests/codetoanalyze/cpp/pulse/join.cpp diff --git a/infer/src/checkers/PulseDomain.ml b/infer/src/checkers/PulseDomain.ml index f4ad1da85..1449b1b44 100644 --- a/infer/src/checkers/PulseDomain.ml +++ b/infer/src/checkers/PulseDomain.ml @@ -9,6 +9,8 @@ module F = Format module L = Logging open Result.Monad_infix +(* {2 Abstract domain description } *) + (** An abstract address in memory. *) module AbstractAddress : sig type t = private int [@@deriving compare] @@ -33,44 +35,63 @@ end = struct let pp = F.pp_print_int end -module AbstractAddressDomain : AbstractDomain.S with type astate = AbstractAddress.t = struct - type astate = AbstractAddress.t +module Access = struct + type t = AccessPath.access [@@deriving compare] - let ( <= ) ~lhs ~rhs = AbstractAddress.equal lhs rhs + let pp = AccessPath.pp_access +end - let join l1 l2 = - if AbstractAddress.equal l1 l2 then l1 else (* TODO: scary *) AbstractAddress.mk_fresh () +module Memory = struct + module Edges = PrettyPrintable.MakePPMap (Access) + module Graph = PrettyPrintable.MakePPMap (AbstractAddress) + (* {3 Monomorphic {!PPMap} interface as needed } *) - let widen ~prev ~next ~num_iters:_ = join prev next + type t = AbstractAddress.t Edges.t Graph.t - let pp = AbstractAddress.pp -end + let empty = Graph.empty -module Access = struct - type t = AccessPath.access [@@deriving compare] + let find_opt = Graph.find_opt - let pp = AccessPath.pp_access -end + let for_all = Graph.for_all + + let fold = Graph.fold -module MemoryEdges = AbstractDomain.InvertedMap (Access) (AbstractAddressDomain) + let pp = Graph.pp ~pp_value:(Edges.pp ~pp_value:AbstractAddress.pp) -module MemoryDomain = struct - include AbstractDomain.InvertedMap (AbstractAddress) (MemoryEdges) + (* {3 Helper functions to traverse the two maps at once } *) let add_edge addr_src access addr_end memory = let edges = - match find_opt addr_src memory with Some edges -> edges | None -> MemoryEdges.empty + match Graph.find_opt addr_src memory with Some edges -> edges | None -> Edges.empty in - add addr_src (MemoryEdges.add access addr_end edges) memory + Graph.add addr_src (Edges.add access addr_end edges) memory let find_edge_opt addr access memory = let open Option.Monad_infix in - find_opt addr memory >>= MemoryEdges.find_opt access + Graph.find_opt addr memory >>= Edges.find_opt access end -module AliasingDomain = AbstractDomain.InvertedMap (Var) (AbstractAddressDomain) +(** to be used as maps values *) +module AbstractAddressDomain_JoinIsMin : AbstractDomain.S with type astate = AbstractAddress.t = +struct + type astate = AbstractAddress.t + + let ( <= ) ~lhs ~rhs = AbstractAddress.equal lhs rhs + + let join l1 l2 = min l1 l2 + + let widen ~prev ~next ~num_iters:_ = join prev next + + let pp = AbstractAddress.pp +end + +(* It so happens that the join we want on stacks is this followed by normalization wrt the + unification found between abstract locations, so it's convenient to define stacks as elements of + this domain. Do not use the domain operations outside of {!Domain} though as they are mostly + meaningless on their own. *) +module AliasingDomain = AbstractDomain.Map (Var) (AbstractAddressDomain_JoinIsMin) type actor = {access_expr: AccessExpression.t; location: Location.t} @@ -78,6 +99,7 @@ let pp_actor f {access_expr; location} = F.fprintf f "%a@%a" AccessExpression.pp access_expr Location.pp location +(** Locations known to be invalid for a reason described by an {!actor}. *) module type InvalidAddressesDomain = sig include AbstractDomain.S @@ -86,6 +108,10 @@ module type InvalidAddressesDomain = sig val add : AbstractAddress.t -> actor -> astate -> astate val get_invalidation : AbstractAddress.t -> astate -> actor option + (** return [Some actor] if the location was invalid by [actor], [None] if it is valid *) + + val map : (AbstractAddress.t -> AbstractAddress.t) -> astate -> astate + (** translate invalid addresses according to the mapping *) end module InvalidAddressesDomain : InvalidAddressesDomain = struct @@ -94,6 +120,7 @@ module InvalidAddressesDomain : InvalidAddressesDomain = struct let join actor _ = actor + (* actors do not participate in the comparison of sets of invalid locations *) let ( <= ) ~lhs:_ ~rhs:_ = true let widen ~prev ~next:_ ~num_iters:_ = prev @@ -104,60 +131,221 @@ module InvalidAddressesDomain : InvalidAddressesDomain = struct include AbstractDomain.Map (AbstractAddress) (InvalidationReason) let get_invalidation address invalids = find_opt address invalids + + let map f invalids = fold (fun key actor invalids -> add (f key) actor invalids) invalids empty end -type t = - {heap: MemoryDomain.astate; stack: AliasingDomain.astate; invalids: InvalidAddressesDomain.astate} +type t = {heap: Memory.t; stack: AliasingDomain.astate; invalids: InvalidAddressesDomain.astate} let initial = - {heap= MemoryDomain.empty; stack= AliasingDomain.empty; invalids= InvalidAddressesDomain.empty} + {heap= Memory.empty; stack= AliasingDomain.empty; invalids= InvalidAddressesDomain.empty} module Domain : AbstractDomain.S with type astate = t = struct type astate = t - (* This is very naive and should be improved. We can compare two memory graphs by trying to - establish that the graphs reachable from each root are the same up to some additional - unfolding, i.e. are not incompatible when we prune everything that is not reachable from the - roots. Here the roots are the known aliases in the stack since that's the only way to address - into the heap. *) - let ( <= ) ~lhs ~rhs = - phys_equal lhs rhs - || InvalidAddressesDomain.( <= ) ~lhs:lhs.invalids ~rhs:rhs.invalids - && AliasingDomain.( <= ) ~lhs:lhs.stack ~rhs:rhs.stack - && MemoryDomain.( <= ) ~lhs:lhs.heap ~rhs:rhs.heap + let piecewise_lessthan lhs rhs = + InvalidAddressesDomain.( <= ) ~lhs:lhs.invalids ~rhs:rhs.invalids + && AliasingDomain.( <= ) ~lhs:lhs.stack ~rhs:rhs.stack + && Memory.for_all + (fun addr_src edges -> + Memory.Edges.for_all + (fun edge addr_dst -> + Memory.find_edge_opt addr_src edge rhs.heap + |> Option.exists ~f:(fun addr -> AbstractAddress.equal addr addr_dst) ) + edges ) + lhs.heap + + + module JoinState = struct + module AddressUnionSet = struct + module Set = PrettyPrintable.MakePPSet (AbstractAddress) + type elt = AbstractAddress.t [@@deriving compare] - (* Like (<=) this is probably too naive *) + type t = Set.t ref + + let create x = ref (Set.singleton x) + + let compare_size _ _ = 0 + + let merge ~from ~to_ = to_ := Set.union !from !to_ + + let pp f x = Set.pp f !x + end + + module AddressUF = ImperativeUnionFind.Make (AddressUnionSet) + + (** just to get the correct type coercion *) + let to_canonical_address subst addr = (AddressUF.find subst addr :> AbstractAddress.t) + + type nonrec t = {subst: AddressUF.t; astate: t} + + (** adds [(src_addr, access, dst_addr)] to [union_heap] and record potential new equality that + results from it in [subst] *) + let union_one_edge subst src_addr access dst_addr union_heap = + let src_addr = to_canonical_address subst src_addr in + let dst_addr = to_canonical_address subst dst_addr in + match Memory.find_edge_opt src_addr access union_heap with + | None -> + (Memory.add_edge src_addr access dst_addr union_heap, `No_new_equality) + | Some dst_addr' -> + (* new equality [dst_addr = dst_addr'] found *) + ignore (AddressUF.union subst dst_addr dst_addr') ; + (union_heap, `New_equality) + + + module Addresses = Caml.Set.Make (AbstractAddress) + + let rec visit_address subst visited heap addr union_heap = + if Addresses.mem addr visited then (visited, union_heap) + else + let visited = Addresses.add addr visited in + let visit_edge access addr_dst (visited, union_heap) = + union_one_edge subst addr access addr_dst union_heap + |> fst + |> visit_address subst visited heap addr_dst + in + Memory.find_opt addr heap + |> Option.fold ~init:(visited, union_heap) ~f:(fun visited_union_heap edges -> + Memory.Edges.fold visit_edge edges visited_union_heap ) + + + let visit_stack subst heap stack union_heap = + (* start graph exploration *) + let visited = Addresses.empty in + let _, union_heap = + AliasingDomain.fold + (fun _var addr (visited, union_heap) -> visit_address subst visited heap addr union_heap) + stack (visited, union_heap) + in + union_heap + + + let populate_subst_from_stacks subst stack1 stack2 = + ignore + ((* Use [Caml.Map.merge] to detect the variables present in both stacks. Build an empty + result map since we don't use the result. *) + AliasingDomain.merge + (fun _var addr1_opt addr2_opt -> + Option.both addr1_opt addr2_opt + |> Option.iter ~f:(fun (addr1, addr2) -> + (* stack1 says [_var = addr1] and stack2 says [_var = addr2]: unify the + addresses since they are equal to the same variable *) + ignore (AddressUF.union subst addr1 addr2) ) ; + (* empty result map *) + None ) + stack1 stack2) + + + let from_astate_union {heap= heap1; stack= stack1; invalids= invalids1} + {heap= heap2; stack= stack2; invalids= invalids2} = + let subst = AddressUF.create () in + (* gather equalities from the stacks *) + populate_subst_from_stacks subst stack1 stack2 ; + (* union the heaps, take this opportunity to do garbage collection of unreachable values by + only copying the addresses reachable from the variables in the stacks *) + let heap = visit_stack subst heap1 stack1 Memory.empty |> visit_stack subst heap2 stack2 in + (* This keeps all the variables and picks one representative address for each variable in + common thanks to [AbstractAddressDomain_JoinIsMin] *) + let stack = AliasingDomain.join stack1 stack2 in + (* basically union *) + let invalids = InvalidAddressesDomain.join invalids1 invalids2 in + {subst; astate= {heap; stack; invalids}} + + + let rec normalize state = + let one_addr subst addr edges heap_has_converged = + Memory.Edges.fold + (fun access addr_dest (heap, has_converged) -> + match union_one_edge subst addr access addr_dest heap with + | heap, `No_new_equality -> + (heap, has_converged) + | heap, `New_equality -> + (heap, false) ) + edges heap_has_converged + in + let heap, has_converged = + Memory.fold (one_addr state.subst) state.astate.heap (Memory.empty, true) + in + if has_converged then ( + L.d_strln "Join unified addresses:" ; + L.d_increase_indent 1 ; + Container.iter state.subst ~fold:AddressUF.fold_sets + ~f:(fun ((repr : AddressUF.Repr.t), set) -> + L.d_strln + (F.asprintf "%a=%a" AbstractAddress.pp + (repr :> AbstractAddress.t) + AddressUnionSet.pp set) ) ; + L.d_decrease_indent 1 ; + let stack = AliasingDomain.map (to_canonical_address state.subst) state.astate.stack in + let invalids = + InvalidAddressesDomain.map (to_canonical_address state.subst) state.astate.invalids + in + {heap; stack; invalids} ) + else normalize {state with astate= {state.astate with heap}} + end + + (** Given + + - stacks S1, S2 : Var -> Address, + + - graphs G1, G2 : Address -> Access -> Address, + + - and invalid sets I1, I2 : 2^Address + + (all finite), the join of 2 abstract states (S1, G1, I1) and (S2, G2, I2) is (S, G, A) where + there exists a substitution σ from addresses to addresses such that the following holds. Given + addresses l, l', access path a, and graph G, we write l –a–> l' ∈ G if there is a path labelled + by a from l to l' in G (in particular, if a is empty then l –a–> l' ∈ G for all l, l'). + + ∀ i ∈ {1,2}, ∀ l, x, a, ∀ l' ∈ Ii, ((x, l) ∈ Si ∧ l –a–> l' ∈ Gi) + => (x, σ(l)) ∈ S ∧ σ(l) –a–> σ(l') ∈ G ∧ σ(l') ∈ I + + For now the implementation gives back a larger heap than necessary, where all the previously + reachable location are still reachable (up to the substitution) instead of only the locations + leading to invalid ones. + *) let join astate1 astate2 = if phys_equal astate1 astate2 then astate1 else - { heap= MemoryDomain.join astate1.heap astate2.heap - ; stack= AliasingDomain.join astate1.stack astate2.stack - ; invalids= InvalidAddressesDomain.join astate1.invalids astate2.invalids } + (* high-level idea: maintain some union-find data structure to identify locations in one heap + with locations in the other heap. Build the initial join state as follows: + + - equate all locations that correspond to identical variables in both stacks, eg joining + stacks {x=1} and {x=2} adds "1=2" to the unification. + + - add all addresses reachable from stack variables to the join state heap + This gives us an abstract state that is the union of both abstract states, but more states + can still be made equal. For instance, if 1 points to 3 in the first heap and 2 points to 4 + in the second heap and we deduced "1 = 2" from the stacks already (as in the example just + above) then we can deduce "3 = 4". Proceed in this fashion until no more equalities are + discovered, and return the abstract state where a canonical representative has been chosen + consistently for each equivalence class (this is what the union-find data structure gives + us). *) + JoinState.from_astate_union astate1 astate2 |> JoinState.normalize + + + (* TODO: this could be [piecewise_lessthan lhs' (join lhs rhs)] where [lhs'] is [lhs] renamed + according to the unification discovered while joining [lhs] and [rhs]. *) + let ( <= ) ~lhs ~rhs = phys_equal lhs rhs || piecewise_lessthan lhs rhs let max_widening = 5 let widen ~prev ~next ~num_iters = - (* probably pretty obvious but that widening is just bad... We need to add a wildcard [*] to - access path elements in our graph representing repeated paths if we hope to converge (like - {!AccessPath.Abs.Abstracted}, we actually need something very similar). *) + (* widening is underapproximation for now... TODO *) if num_iters > max_widening then prev else if phys_equal prev next then prev - else - { heap= MemoryDomain.widen ~num_iters ~prev:prev.heap ~next:next.heap - ; stack= AliasingDomain.widen ~num_iters ~prev:prev.stack ~next:next.stack - ; invalids= InvalidAddressesDomain.widen ~num_iters ~prev:prev.invalids ~next:next.invalids - } + else join prev next let pp fmt {heap; stack; invalids} = - F.fprintf fmt "{@[ heap=@[%a@];@;stack=@[%a@];@;invalids=@[%a@];@]}" - MemoryDomain.pp heap AliasingDomain.pp stack InvalidAddressesDomain.pp invalids + F.fprintf fmt "{@[ heap=@[%a@];@;stack=@[%a@];@;invalids=@[%a@];@]}" Memory.pp + heap AliasingDomain.pp stack InvalidAddressesDomain.pp invalids end -include Domain +(* {2 Access operations on the domain} *) module Diagnostic = struct type t = @@ -212,15 +400,15 @@ let rec walk actor ~overwrite_last addr path astate = | [a], Some new_addr -> check_addr_access actor addr astate >>| fun astate -> - let heap = MemoryDomain.add_edge addr a new_addr astate.heap in + let heap = Memory.add_edge addr a new_addr astate.heap in ({astate with heap}, new_addr) | a :: path, _ -> ( check_addr_access actor addr astate >>= fun astate -> - match MemoryDomain.find_edge_opt addr a astate.heap with + match Memory.find_edge_opt addr a astate.heap with | None -> let addr' = AbstractAddress.mk_fresh () in - let heap = MemoryDomain.add_edge addr a addr' astate.heap in + let heap = Memory.add_edge addr a addr' astate.heap in let astate = {astate with heap} in walk actor ~overwrite_last addr' path astate | Some addr' -> @@ -292,3 +480,6 @@ let invalidate location access_expr astate = >>= fun (astate, addr) -> let actor = {access_expr; location} in check_addr_access actor addr astate >>| mark_invalid actor addr + + +include Domain diff --git a/infer/src/checkers/PulseDomain.mli b/infer/src/checkers/PulseDomain.mli index 90d7825a5..a10c5b868 100644 --- a/infer/src/checkers/PulseDomain.mli +++ b/infer/src/checkers/PulseDomain.mli @@ -6,41 +6,14 @@ *) open! IStd -module F = Format module AbstractAddress : sig type t = private int [@@deriving compare] val mk_fresh : unit -> t - - val pp : F.formatter -> t -> unit end -module Access : sig - type t = AccessPath.access [@@deriving compare] - - val pp : F.formatter -> t -> unit -end - -module AbstractAddressDomain : AbstractDomain.S with type astate = AbstractAddress.t - -module MemoryEdges : module type of AbstractDomain.InvertedMap (Access) (AbstractAddressDomain) - -module MemoryDomain : module type of AbstractDomain.InvertedMap (AbstractAddress) (MemoryEdges) - -module AliasingDomain : module type of AbstractDomain.InvertedMap (Var) (AbstractAddressDomain) - -module InvalidAddressesDomain : AbstractDomain.S - -type t = - { heap: MemoryDomain.astate - (** Symbolic representation of the heap: a graph where nodes are abstract addresses and edges are - access path elements. *) - ; stack: AliasingDomain.astate - (** Symbolic representation of the stack: which memory address do variables point to. No other - values are being tracked. *) - ; invalids: InvalidAddressesDomain.astate - (** Set of addresses known to be in an invalid state. *) } +type t include AbstractDomain.S with type astate = t diff --git a/infer/tests/codetoanalyze/cpp/pulse/issues.exp b/infer/tests/codetoanalyze/cpp/pulse/issues.exp index 9831a68f0..733539526 100644 --- a/infer/tests/codetoanalyze/cpp/pulse/issues.exp +++ b/infer/tests/codetoanalyze/cpp/pulse/issues.exp @@ -1,6 +1,9 @@ +codetoanalyze/cpp/ownership/basics.cpp, multiple_invalidations_branch_bad, 6, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `ptr` here,accessed `*(ptr)` here] +codetoanalyze/cpp/ownership/basics.cpp, multiple_invalidations_loop_bad, 8, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `ptr` here,accessed `*(ptr)` here] codetoanalyze/cpp/ownership/returns.cpp, returns::return_deleted_bad, 4, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `x` here,accessed `x` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, delete_in_branch_bad, 5, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, delete_in_loop_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] +codetoanalyze/cpp/ownership/use_after_delete.cpp, delete_ref_in_loop_ok, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `v.__internal_array[_]` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, deref_deleted_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, double_delete_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, gated_delete_abort_ok, 6, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] @@ -10,4 +13,5 @@ codetoanalyze/cpp/ownership/use_after_delete.cpp, reassign_field_of_deleted_bad, codetoanalyze/cpp/ownership/use_after_delete.cpp, return_deleted_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, use_in_branch_bad, 4, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s` here] codetoanalyze/cpp/ownership/use_after_delete.cpp, use_in_loop_bad, 4, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `s` here,accessed `s->f` here] +codetoanalyze/cpp/pulse/join.cpp, visit_list, 11, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `result` here,accessed `*(result)` here] codetoanalyze/cpp/pulse/vector.cpp, deref_vector_element_after_lifetime_bad, 4, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidated `&(x).__internal_array[_]` here,accessed `*(y)` here] diff --git a/infer/tests/codetoanalyze/cpp/pulse/join.cpp b/infer/tests/codetoanalyze/cpp/pulse/join.cpp new file mode 100644 index 000000000..c5f3d57a0 --- /dev/null +++ b/infer/tests/codetoanalyze/cpp/pulse/join.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +struct foo { + int* val; +}; + +struct list { + struct list* next; + struct foo* foo; +}; + +int visit_list(struct list* head, int cond) { + int* result = 0; + struct list* x = head; + if (cond) { + result = x->next->foo->val; + delete result; + } else { + x = x->next; + struct list* y = x->next; + result = x->foo->val; + } + return *result; +}