From d0cf7e313552975d260683a0b97df2005bfca6fa Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Wed, 17 Mar 2021 03:51:53 -0700 Subject: [PATCH] [pulse] refine canonicalisation Summary: Mostly refactoring, get rid of some minor TODOs in the process. Reviewed By: skcho Differential Revision: D26916013 fbshipit-source-id: 53c34af05 --- infer/src/IR/Var.ml | 2 + infer/src/IR/Var.mli | 2 + infer/src/absint/AbstractDomain.ml | 2 + infer/src/istd/PrettyPrintable.ml | 17 ++++ infer/src/istd/PrettyPrintable.mli | 4 + infer/src/pulse/PulseAbductiveDomain.ml | 109 +++++++++++------------- infer/src/pulse/PulseBaseDomain.ml | 2 +- infer/src/pulse/PulseBaseMemory.ml | 32 ++++--- infer/src/pulse/PulseBaseStack.ml | 36 +++++--- infer/src/pulse/PulseBaseStack.mli | 4 +- 10 files changed, 126 insertions(+), 84 deletions(-) diff --git a/infer/src/IR/Var.ml b/infer/src/IR/Var.ml index cac1821d5..d540a0a2d 100644 --- a/infer/src/IR/Var.ml +++ b/infer/src/IR/Var.ml @@ -36,6 +36,8 @@ let get_ident = function ProgramVar _ -> None | LogicalVar id -> Some id let get_pvar = function ProgramVar pvar -> Some pvar | LogicalVar _ -> None +let is_pvar = function ProgramVar _ -> true | LogicalVar _ -> false + let is_global = function ProgramVar pvar -> Pvar.is_global pvar | LogicalVar _ -> false let is_return = function ProgramVar pvar -> Pvar.is_return pvar | LogicalVar _ -> false diff --git a/infer/src/IR/Var.mli b/infer/src/IR/Var.mli index 484edeb07..027a52fa9 100644 --- a/infer/src/IR/Var.mli +++ b/infer/src/IR/Var.mli @@ -31,6 +31,8 @@ val get_ident : t -> Ident.t option val get_pvar : t -> Pvar.t option +val is_pvar : t -> bool + val is_global : t -> bool val is_local_to_procedure : Procname.t -> t -> bool diff --git a/infer/src/absint/AbstractDomain.ml b/infer/src/absint/AbstractDomain.ml index 1af8ed3c5..8a4b83ea7 100644 --- a/infer/src/absint/AbstractDomain.ml +++ b/infer/src/absint/AbstractDomain.ml @@ -641,6 +641,8 @@ module SafeInvertedMap (Key : PrettyPrintable.PrintableOrderedType) (ValueDomain let fold_map = M.fold_map + let fold_mapi = M.fold_mapi + let of_seq = M.of_seq let mapi f m = diff --git a/infer/src/istd/PrettyPrintable.ml b/infer/src/istd/PrettyPrintable.ml index ccbb7e7b1..13aec2d18 100644 --- a/infer/src/istd/PrettyPrintable.ml +++ b/infer/src/istd/PrettyPrintable.ml @@ -123,6 +123,8 @@ module type MonoMap = sig val fold_map : t -> init:'a -> f:('a -> value -> 'a * value) -> 'a * t + val fold_mapi : t -> init:'a -> f:(key -> 'a -> value -> 'a * value) -> 'a * t + val of_seq : (key * value) Seq.t -> t end @@ -131,6 +133,8 @@ module type PPMap = sig val fold_map : 'a t -> init:'b -> f:('b -> 'a -> 'b * 'c) -> 'b * 'c t + val fold_mapi : 'a t -> init:'b -> f:(key -> 'b -> 'a -> 'b * 'c) -> 'b * 'c t + val is_singleton_or_more : 'a t -> (key * 'a) IContainer.singleton_or_more val pp_key : F.formatter -> key -> unit @@ -159,6 +163,19 @@ end module MakePPMap (Ord : PrintableOrderedType) = struct include Caml.Map.Make (Ord) + let fold_mapi m ~init ~f = + let acc = ref init in + let new_map = + mapi + (fun key value -> + let acc', res = f key !acc value in + acc := acc' ; + res ) + m + in + (!acc, new_map) + + let fold_map m ~init ~f = let acc = ref init in let new_map = diff --git a/infer/src/istd/PrettyPrintable.mli b/infer/src/istd/PrettyPrintable.mli index 5c0bd0ef8..0001cab25 100644 --- a/infer/src/istd/PrettyPrintable.mli +++ b/infer/src/istd/PrettyPrintable.mli @@ -125,6 +125,8 @@ module type MonoMap = sig val fold_map : t -> init:'a -> f:('a -> value -> 'a * value) -> 'a * t + val fold_mapi : t -> init:'a -> f:(key -> 'a -> value -> 'a * value) -> 'a * t + val of_seq : (key * value) Seq.t -> t end @@ -133,6 +135,8 @@ module type PPMap = sig val fold_map : 'a t -> init:'b -> f:('b -> 'a -> 'b * 'c) -> 'b * 'c t + val fold_mapi : 'a t -> init:'b -> f:(key -> 'b -> 'a -> 'b * 'c) -> 'b * 'c t + val is_singleton_or_more : 'a t -> (key * 'a) IContainer.singleton_or_more val pp_key : F.formatter -> key -> unit diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index 6a37f9313..8ae08a7af 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -673,47 +673,44 @@ let invalidate_locals pdesc astate : t = type summary = t [@@deriving compare, equal, yojson_of] -let is_allocated {post; pre} v = - let is_pvar = function Var.ProgramVar _ -> true | Var.LogicalVar _ -> false in - let is_stack_allocated base_mem v = - BaseStack.exists - (fun var (address, _) -> is_pvar var && AbstractValue.equal address v) - base_mem.stack - in - (* OPTIM: the post stack contains at least the pre stack so no need to check both *) +let is_allocated stack_allocations {post; pre} v = BaseMemory.is_allocated (post :> BaseDomain.t).heap v || BaseMemory.is_allocated (pre :> BaseDomain.t).heap v - || is_stack_allocated (post :> BaseDomain.t) v + || AbstractValue.Set.mem v (Lazy.force stack_allocations) -let subst_var subst astate = +let subst_var_in_post subst astate = let open SatUnsat.Import in let+ post = PostDomain.subst_var subst astate.post in if phys_equal astate.post post then astate else {astate with post} +let get_stack_allocated {post} = + (* OPTIM: the post stack contains at least the pre stack so no need to check both *) + BaseStack.fold + (fun var (address, _) allocated -> + if Var.is_pvar var then AbstractValue.Set.add address allocated else allocated ) + (post :> BaseDomain.t).stack AbstractValue.Set.empty + + let incorporate_new_eqs astate new_eqs = + let stack_allocations = lazy (get_stack_allocated astate) in List.fold_until new_eqs ~init:astate ~finish:(fun astate -> Sat astate) ~f:(fun astate (new_eq : PulseFormula.new_eq) -> match new_eq with - | EqZero v when is_allocated astate v -> - L.d_printfln "CONTRADICTION: %a = 0 but is allocated" AbstractValue.pp v ; - Stop Unsat | Equal (v1, v2) when AbstractValue.equal v1 v2 -> Continue astate | Equal (v1, v2) -> ( - if is_allocated astate v1 && is_allocated astate v2 then ( - L.d_printfln "CONTRADICTION: %a = %a but both are separately allocated" AbstractValue.pp - v1 AbstractValue.pp v2 ; - Stop Unsat ) - else - match subst_var (v1, v2) astate with - | Unsat -> - Stop Unsat - | Sat astate' -> - Continue astate' ) - | _ -> + match subst_var_in_post (v1, v2) astate with + | Unsat -> + Stop Unsat + | Sat astate' -> + Continue astate' ) + | EqZero v when is_allocated stack_allocations astate v -> + L.d_printfln "CONTRADICTION: %a = 0 but is allocated" AbstractValue.pp v ; + Stop Unsat + | EqZero _ (* [v] not allocated *) -> Continue astate ) @@ -722,59 +719,57 @@ let canonicalize astate = let open SatUnsat.Import in let get_var_repr v = PathCondition.get_var_repr astate.path_condition v in let canonicalize_pre (pre : PreDomain.t) = - (* TODO: detect contradictions when equal addresses are pointing to different locations *) - let heap' = - BaseMemory.filter - (fun _ edges -> not (BaseMemory.Edges.is_empty edges)) - (pre :> BaseDomain.t).heap - in - PreDomain.update ~heap:heap' pre + (* (ab)use canonicalization to filter out empty edges in the heap and detect aliasing + contradictions *) + let* stack' = BaseStack.canonicalize ~get_var_repr:Fn.id (pre :> BaseDomain.t).stack in + let+ heap' = BaseMemory.canonicalize ~get_var_repr:Fn.id (pre :> BaseDomain.t).heap in + PreDomain.update ~stack:stack' ~heap:heap' pre in let canonicalize_post (post : PostDomain.t) = - let stack' = BaseStack.canonicalize ~get_var_repr (post :> BaseDomain.t).stack in + let* stack' = BaseStack.canonicalize ~get_var_repr (post :> BaseDomain.t).stack in (* note: this step also de-registers addresses pointing to empty edges *) let+ heap' = BaseMemory.canonicalize ~get_var_repr (post :> BaseDomain.t).heap in let attrs' = BaseAddressAttributes.canonicalize ~get_var_repr (post :> BaseDomain.t).attrs in PostDomain.update ~stack:stack' ~heap:heap' ~attrs:attrs' post in - let pre = canonicalize_pre astate.pre in + let* pre = canonicalize_pre astate.pre in let+ post = canonicalize_post astate.post in {astate with pre; post} +let filter_for_summary tenv astate0 = + let open SatUnsat.Import in + L.d_printfln "Canonicalizing...@\n" ; + let* astate_before_filter = canonicalize astate0 in + L.d_printfln "Canonicalized state: %a@\n" pp astate_before_filter ; + let astate = filter_stack_for_summary astate_before_filter in + let astate = {astate with topl= PulseTopl.filter_for_summary astate.path_condition astate.topl} in + let astate, live_addresses, _ = discard_unreachable astate in + let+ path_condition, new_eqs = + PathCondition.simplify tenv + ~get_dynamic_type: + (BaseAddressAttributes.get_dynamic_type (astate_before_filter.post :> BaseDomain.t).attrs) + ~keep:live_addresses astate.path_condition + in + ({astate with path_condition; topl= PulseTopl.simplify ~keep:live_addresses astate.topl}, new_eqs) + + let summary_of_post tenv pdesc astate = let open SatUnsat.Import in (* NOTE: we normalize (to strengthen the equality relation used by canonicalization) then canonicalize *before* garbage collecting unused addresses in case we detect any last-minute contradictions about addresses we are about to garbage collect *) - let attrs = (astate.post :> BaseDomain.t).attrs in let path_condition, is_unsat, new_eqs = PathCondition.is_unsat_expensive tenv - ~get_dynamic_type:(BaseAddressAttributes.get_dynamic_type attrs) + ~get_dynamic_type:(BaseAddressAttributes.get_dynamic_type (astate.post :> BaseDomain.t).attrs) astate.path_condition in - if is_unsat then Unsat - else - let astate = {astate with path_condition} in - let* astate = incorporate_new_eqs astate new_eqs in - L.d_printfln "Canonicalizing...@\n" ; - let* astate = canonicalize astate in - L.d_printfln "Canonicalized state: %a@\n" pp astate ; - let astate = filter_stack_for_summary astate in - let astate = - {astate with topl= PulseTopl.filter_for_summary astate.path_condition astate.topl} - in - let astate, live_addresses, _ = discard_unreachable astate in - let* path_condition, new_eqs = - PathCondition.simplify tenv ~keep:live_addresses - ~get_dynamic_type:(BaseAddressAttributes.get_dynamic_type attrs) - astate.path_condition - in - let+ astate = incorporate_new_eqs astate new_eqs in - let astate = - {astate with path_condition; topl= PulseTopl.simplify ~keep:live_addresses astate.topl} - in - invalidate_locals pdesc astate + let* () = if is_unsat then Unsat else Sat () in + let astate = {astate with path_condition} in + let* astate = incorporate_new_eqs astate new_eqs in + let* astate, new_eqs = filter_for_summary tenv astate in + let+ astate = incorporate_new_eqs astate new_eqs in + invalidate_locals pdesc astate let get_pre {pre} = (pre :> BaseDomain.t) diff --git a/infer/src/pulse/PulseBaseDomain.ml b/infer/src/pulse/PulseBaseDomain.ml index 134c2eaee..dcf529be4 100644 --- a/infer/src/pulse/PulseBaseDomain.ml +++ b/infer/src/pulse/PulseBaseDomain.ml @@ -305,8 +305,8 @@ let reachable_addresses_from addresses astate = let subst_var subst ({heap; stack; attrs} as astate) = let open SatUnsat.Import in + let* stack' = Stack.subst_var subst stack in let+ heap' = Memory.subst_var subst heap in - let stack' = Stack.subst_var subst stack in let attrs' = AddressAttributes.subst_var subst attrs in if phys_equal heap heap' && phys_equal stack stack' && phys_equal attrs attrs' then astate else {heap= heap'; stack= stack'; attrs= attrs'} diff --git a/infer/src/pulse/PulseBaseMemory.ml b/infer/src/pulse/PulseBaseMemory.ml index 0dfed7479..6ea76b71c 100644 --- a/infer/src/pulse/PulseBaseMemory.ml +++ b/infer/src/pulse/PulseBaseMemory.ml @@ -23,7 +23,8 @@ module Access = struct let canonicalize ~get_var_repr (access : t) = match access with | ArrayAccess (typ, addr) -> - HilExp.Access.ArrayAccess (typ, get_var_repr addr) + let addr' = get_var_repr addr in + if AbstractValue.equal addr addr' then access else HilExp.Access.ArrayAccess (typ, addr') | FieldAccess _ | TakeAddress | Dereference -> access end @@ -49,10 +50,16 @@ module Edges = struct let yojson_of_t edges = [%yojson_of: (Access.t * AddrTrace.t) list] (M.bindings edges) let canonicalize ~get_var_repr edges = - M.fold ~init:M.empty edges ~f:(fun edges' (access, (addr, hist)) -> - let addr' = get_var_repr addr in - let access' = Access.canonicalize ~get_var_repr access in - M.add access' (addr', hist) edges' ) + let edges', changed = + M.fold edges ~init:(M.empty, false) ~f:(fun (edges', changed) (access, (addr, hist)) -> + let addr' = get_var_repr addr in + let access' = Access.canonicalize ~get_var_repr access in + let changed = + changed || not (AbstractValue.equal addr addr' && phys_equal access access') + in + (M.add access' (addr', hist) edges', changed) ) + in + if changed then edges' else edges let subst_var (v, v') edges = @@ -89,10 +96,10 @@ let is_allocated memory v = let canonicalize ~get_var_repr memory = let exception AliasingContradiction in try - let memory = + let memory', changed = Graph.fold - (fun addr edges g -> - if Edges.is_empty edges then g + (fun addr edges (g, changed) -> + if Edges.is_empty edges then (g, true) else let addr' = get_var_repr addr in if is_allocated g addr' then ( @@ -101,10 +108,13 @@ let canonicalize ~get_var_repr memory = raise_notrace AliasingContradiction ) else let edges' = Edges.canonicalize ~get_var_repr edges in - Graph.add addr' edges' g ) - memory Graph.empty + let changed = + changed || not (AbstractValue.equal addr addr' && phys_equal edges edges') + in + (Graph.add addr' edges' g, changed) ) + memory (Graph.empty, false) in - Sat memory + if changed then Sat memory' else Sat memory with AliasingContradiction -> Unsat diff --git a/infer/src/pulse/PulseBaseStack.ml b/infer/src/pulse/PulseBaseStack.ml index 2d7dead17..fade3d746 100644 --- a/infer/src/pulse/PulseBaseStack.ml +++ b/infer/src/pulse/PulseBaseStack.ml @@ -6,6 +6,7 @@ *) open! IStd module F = Format +module L = Logging open PulseBasicInterface (** Stacks: map addresses of variables to values and histoy. *) @@ -32,19 +33,28 @@ module M = PrettyPrintable.MakePPMonoMap (VarAddress) (AddrHistPair) let yojson_of_t m = [%yojson_of: (VarAddress.t * AddrHistPair.t) list] (M.bindings m) let canonicalize ~get_var_repr stack = - (* TODO: detect contradictions when the addresses of two different program variables are equal *) - let changed = ref false in - let stack' = - M.map - (fun ((addr, hist) as addr_hist) -> - let addr' = get_var_repr addr in - if phys_equal addr addr' then addr_hist - else ( - changed := true ; - (addr', hist) ) ) - stack - in - if !changed then stack' else stack + let exception AliasingContradiction in + try + let (_allocated, changed), stack' = + M.fold_mapi stack ~init:(AbstractValue.Set.empty, false) + ~f:(fun var (allocated, changed) ((addr, hist) as addr_hist) -> + let addr' = get_var_repr addr in + if Var.is_pvar var && AbstractValue.Set.mem addr' allocated then ( + L.d_printfln + "CONTRADICTION: %a = %a makes two stack variables' addresses equal (%a=%a) in %a@\n" + AbstractValue.pp addr AbstractValue.pp addr' AbstractValue.pp addr Var.pp var M.pp + stack ; + raise AliasingContradiction ) ; + let allocated = + if Var.is_pvar var then AbstractValue.Set.add addr' allocated else allocated + in + if phys_equal addr addr' then ((allocated, changed), addr_hist) + else + let changed = true in + ((allocated, changed), (addr', hist)) ) + in + Sat (if changed then stack' else stack) + with AliasingContradiction -> Unsat let subst_var (v, v') stack = diff --git a/infer/src/pulse/PulseBaseStack.mli b/infer/src/pulse/PulseBaseStack.mli index 6b7f6243a..2e5c2a443 100644 --- a/infer/src/pulse/PulseBaseStack.mli +++ b/infer/src/pulse/PulseBaseStack.mli @@ -21,8 +21,8 @@ val pp : F.formatter -> t -> unit val yojson_of_t : t -> Yojson.Safe.t -val canonicalize : get_var_repr:(AbstractValue.t -> AbstractValue.t) -> t -> t +val canonicalize : get_var_repr:(AbstractValue.t -> AbstractValue.t) -> t -> t SatUnsat.t (** replace each address in the stack by its canonical representative according to the current equality relation, represented by [get_var_repr] *) -val subst_var : AbstractValue.t * AbstractValue.t -> t -> t +val subst_var : AbstractValue.t * AbstractValue.t -> t -> t SatUnsat.t