From 3332dc1a42fa091fa5a4fe2653f4dd860d9021f6 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Wed, 22 Apr 2020 01:48:36 -0700 Subject: [PATCH] [AI] improve disjunctive domain Summary: Replace horrible hack with ok hack. The main difficulty in implementing the disjunctive domain is to avoid the quadratic time complexity of executing the same disjuncts over and over again when going around loops: First time around a loop, assuming for example a single disjunct `d`: ``` [d] loop body [d1' \/ d2'] ``` Second time around the same loop: the new pre will be the join of the posts of predecessor nodes, so `old_pre \/ post(loop,old_pre)`, i.e. `d \/ d1' \/ d2'`. Now we need to execute `loop body` again *without running the symbolic execution of `d` again* (and the time after that we'll want to not execute `d`, `d1'`, or `d2'`). Horrible hack (before): Disjuncts have a boolean "visited" attached that does its best to keep track of whether a given disjunct is old or new. When executing a single *instruction* look at the flag and skip the state if it's old. Of course we have no way to know for sure so it turns out it was often wrongly re-executing old disjuncts. This was also producing the wrong results over even simple loops: only the last iteration would make it outside the loop for some reason. Overall, the semantics were pretty untractable and shady at best. New hack (this diff): only run instructions of a given *node* on disjuncts that are not physically equal to the "pre" ones already in the invariant map for the current node. This gives the correct result over simple loops and a nice performance improvement in general (probably the old heuristic was hitting the quadratic bad case more often). Reviewed By: skcho Differential Revision: D21154063 fbshipit-source-id: 5ee38c68c --- infer/src/absint/AbstractInterpreter.ml | 148 +++++++++--------- infer/src/absint/AbstractInterpreter.mli | 20 +-- infer/src/pulse/Pulse.ml | 6 +- .../tests/codetoanalyze/cpp/pulse/issues.exp | 1 + .../codetoanalyze/java/impurity/issues.exp | 2 +- 5 files changed, 84 insertions(+), 93 deletions(-) diff --git a/infer/src/absint/AbstractInterpreter.ml b/infer/src/absint/AbstractInterpreter.ml index 7de5734eb..d7de2da69 100644 --- a/infer/src/absint/AbstractInterpreter.ml +++ b/infer/src/absint/AbstractInterpreter.ml @@ -83,6 +83,29 @@ end module type Make = functor (TransferFunctions : TransferFunctions.SIL) -> S with module TransferFunctions = TransferFunctions +(** internal module that extends transfer functions *) +module type NodeTransferFunctions = sig + include TransferFunctions.SIL + + val exec_node_instrs : + Domain.t State.t option + -> exec_instr:(Domain.t -> Sil.instr -> Domain.t) + -> Domain.t + -> _ Instrs.t + -> Domain.t + (** specifies how to symbolically execute the instructions of a node, using [exec_instr] to go + over a single instruction *) +end + +(** most transfer functions will use this simple [Instrs.fold] approach *) +module SimpleNodeTransferFunctions (T : TransferFunctions.SIL) = struct + include T + + let exec_node_instrs _old_state_opt ~exec_instr pre instrs = + Instrs.fold ~init:pre instrs ~f:exec_instr +end + +(** build a disjunctive domain and transfer functions *) module MakeDisjunctiveTransferFunctions (T : TransferFunctions.DisjReady) (DConfig : TransferFunctions.DisjunctiveConfig) = @@ -91,47 +114,18 @@ struct type extras = T.extras - module Disjuncts = struct - (* The disjunctive domain transformer should really be part of the Abstract Interpreter instead - of the Transfer Functions as the scheduler needs to know which disjuncts have already been - explored. But for now let's emulate that with a disgusting hack in the transfer functions - that try to skip disjuncts we have already explored, using the [visited] flag. *) - type disjunct = {visited: bool; astate: T.Domain.t} - - let pp_disjunct fmt ({visited; astate}[@warning "+9"]) = - F.fprintf fmt "@[visited=%b;@;@[%a@]@]" visited T.Domain.pp astate - - - type t = disjunct list - - let mk_disjunct astate = {visited= false; astate} - - let singleton astate = [mk_disjunct astate] - - let elements disjuncts = List.map disjuncts ~f:(fun {astate} -> astate) - - let pp f disjuncts = - let pp_disjuncts f disjuncts = - List.iteri disjuncts ~f:(fun i disjunct -> - F.fprintf f "#%d: @[%a@]@;" i pp_disjunct disjunct ) - in - F.fprintf f "@[%d disjuncts:@;%a@]" (List.length disjuncts) pp_disjuncts disjuncts - end - - type disjunct_t = Disjuncts.disjunct = {visited: bool; astate: T.Domain.t} - module Domain = struct - type t = Disjuncts.t + (** a list [\[x1; x2; ...; xN\]] represents a disjunction [x1 ∨ x2 ∨ ... ∨ xN] *) + type t = T.Domain.t list let rev_filter_not_over_approximated disjuncts ~not_in = List.rev_filter disjuncts ~f:(fun disjunct -> - List.exists not_in ~f:(fun disj_not_in -> - T.Domain.leq ~lhs:disjunct.astate ~rhs:disj_not_in.astate ) + List.exists not_in ~f:(fun disj_not_in -> T.Domain.leq ~lhs:disjunct ~rhs:disj_not_in) |> not ) - (* Ignore states in [lhs] that are over-approximated in [rhs] and vice-versa. Favors keeping - states in [lhs]. *) + (** Ignore states in [lhs] that are over-approximated in [rhs] and vice-versa. Favors keeping + states in [lhs]. *) let join_up_to_imply lhs rhs = let rev_rhs_not_in_lhs = rev_filter_not_over_approximated rhs ~not_in:lhs in (* cheeky: this is only used in pulse, whose (<=) is actually a symmetric relation so there's @@ -151,18 +145,13 @@ struct if lhs_length >= n then lhs else List.rev_append rhs lhs - let set_visited b disjuncts = - if List.for_all disjuncts ~f:(fun {visited} -> Bool.equal visited b) then disjuncts - else List.map disjuncts ~f:(fun disjunct -> {disjunct with visited= b}) - - (** check if elements of [disj] appear in [of_] in the same order, using pointer equality on abstract states to compare elements quickly *) let rec is_trivial_subset disj ~of_ = match (disj, of_) with | [], _ -> true - | x :: disj', y :: of' when phys_equal x.astate y.astate -> + | x :: disj', y :: of' when phys_equal x y -> is_trivial_subset disj' ~of_:of' | _, _ :: of' -> is_trivial_subset disj ~of_:of' @@ -174,41 +163,57 @@ struct let widen ~prev ~next ~num_iters = let (`UnderApproximateAfterNumIterations max_iter) = DConfig.widen_policy in - if phys_equal prev next || num_iters > max_iter then set_visited false prev + if phys_equal prev next then prev + else if num_iters > max_iter then ( + L.d_printfln "Iteration %d is greater than max iter %d, stopping." num_iters max_iter ; + prev ) else - let visited_prev = set_visited true prev in - let post = join_up_to_imply visited_prev next in - if leq ~lhs:post ~rhs:prev then set_visited false prev else post + let post = join_up_to_imply prev next in + if leq ~lhs:post ~rhs:prev then prev else post - let pp = Disjuncts.pp + let pp f disjuncts = + let pp_disjuncts f disjuncts = + List.iteri disjuncts ~f:(fun i disjunct -> + F.fprintf f "#%d: @[%a@]@;" i T.Domain.pp disjunct ) + in + F.fprintf f "@[%d disjuncts:@;%a@]" (List.length disjuncts) pp_disjuncts disjuncts end let exec_instr pre_disjuncts extras node instr = List.foldi pre_disjuncts ~init:[] ~f:(fun i post_disjuncts pre_disjunct -> - if pre_disjunct.visited then - (* SUBTLE/WORST HACK EVER: ignore pres that we know we have gone through already. This - means that the invariant map at that program point will be junk since they are going to - miss some states, but the overall result will still be ok because the loop heads are - ok. - - This should really be implemented in {!AbstractInterpreter}. *) - post_disjuncts + L.d_printfln "@[Executing instruction from disjunct #%d@;" i ; + let disjuncts' = T.exec_instr pre_disjunct extras node instr in + ( if Config.write_html then + let n = List.length disjuncts' in + L.d_printfln "@]@\n@[Got %d disjunct%s back@]" n (if Int.equal n 1 then "" else "s") ) ; + Domain.join post_disjuncts disjuncts' ) + + + let exec_node_instrs old_state_opt ~exec_instr pre instrs = + let is_new_pre disjunct = + match old_state_opt with + | None -> + true + | Some {State.pre= previous_pre; _} -> + not (List.mem ~equal:phys_equal previous_pre disjunct) + in + let current_post = match old_state_opt with None -> [] | Some {State.post; _} -> post in + List.foldi pre ~init:current_post ~f:(fun i post_disjuncts pre_disjunct -> + if is_new_pre pre_disjunct then ( + L.d_printfln "@[Executing node from disjunct #%d@;" i ; + let disjuncts' = Instrs.fold ~init:[pre_disjunct] instrs ~f:exec_instr in + L.d_printfln "@]@\n" ; + Domain.join post_disjuncts disjuncts' ) else ( - L.d_printfln "@[Executing from disjunct #%d@;" i ; - let disjuncts' = - T.exec_instr pre_disjunct.astate extras node instr |> List.map ~f:Disjuncts.mk_disjunct - in - ( if Config.write_html then - let n = List.length disjuncts' in - L.d_printfln "@]@\n@[Got %d disjunct%s back@]" n (if Int.equal n 1 then "" else "s") ) ; - Domain.join post_disjuncts disjuncts' ) ) + L.d_printfln "@[Skipping already-visited disjunct #%d@]@;" i ; + post_disjuncts ) ) let pp_session_name node f = T.pp_session_name node f end -module AbstractInterpreterCommon (TransferFunctions : TransferFunctions.SIL) = struct +module AbstractInterpreterCommon (TransferFunctions : NodeTransferFunctions) = struct module CFG = TransferFunctions.CFG module Node = CFG.Node module TransferFunctions = TransferFunctions @@ -277,10 +282,10 @@ module AbstractInterpreterCommon (TransferFunctions : TransferFunctions.SIL) = s L.d_printfln_escaped "%t" pp_all - let exec_instrs ~pp_instr proc_data node pre = + let exec_node_instrs old_state_opt ~pp_instr proc_data node pre = let instrs = CFG.instrs node in if Config.write_html then L.d_printfln_escaped "PRE STATE:@\n@[%a@]@\n" Domain.pp pre ; - let compute_post pre instr = + let exec_instr pre instr = AnalysisState.set_instr instr ; let result = try @@ -307,7 +312,7 @@ module AbstractInterpreterCommon (TransferFunctions : TransferFunctions.SIL) = s in (* hack to ensure that we call `exec_instr` on a node even if it has no instructions *) let instrs = if Instrs.is_empty instrs then Instrs.singleton Sil.skip_instr else instrs in - Instrs.fold ~init:pre ~f:compute_post instrs + TransferFunctions.exec_node_instrs old_state_opt ~exec_instr pre instrs (* Note on narrowing operations: we defines the narrowing operations simply to take a smaller one. @@ -316,7 +321,7 @@ module AbstractInterpreterCommon (TransferFunctions : TransferFunctions.SIL) = s astate_pre inv_map = let node_id = Node.id node in let update_inv_map inv_map new_pre old_state_opt = - let new_post = exec_instrs ~pp_instr proc_data node new_pre in + let new_post = exec_node_instrs old_state_opt ~pp_instr proc_data node new_pre in let new_visit_count = match old_state_opt with | None -> @@ -416,7 +421,7 @@ module MakeWithScheduler (Scheduler : Scheduler.S) (TransferFunctions : TransferFunctions.SIL with module CFG = Scheduler.CFG) = struct - include AbstractInterpreterCommon (TransferFunctions) + include AbstractInterpreterCommon (SimpleNodeTransferFunctions (TransferFunctions)) let rec exec_worklist ~pp_instr cfg ({ProcData.summary} as proc_data) work_queue inv_map = match Scheduler.pop work_queue with @@ -464,7 +469,7 @@ end module MakeRPO (T : TransferFunctions.SIL) = MakeWithScheduler (Scheduler.ReversePostorder (T.CFG)) (T) -module MakeWTO (TransferFunctions : TransferFunctions.SIL) = struct +module MakeWTONode (TransferFunctions : NodeTransferFunctions) = struct include AbstractInterpreterCommon (TransferFunctions) let debug_wto wto node = @@ -584,11 +589,8 @@ module MakeWTO (TransferFunctions : TransferFunctions.SIL) = struct let compute_post ?(do_narrowing = false) = make_compute_post ~exec_cfg_internal ~do_narrowing end +module MakeWTO (T : TransferFunctions.SIL) = MakeWTONode (SimpleNodeTransferFunctions (T)) module MakeDisjunctive (T : TransferFunctions.DisjReady) (DConfig : TransferFunctions.DisjunctiveConfig) = -struct - module DisjT = MakeDisjunctiveTransferFunctions (T) (DConfig) - module Disjuncts = DisjT.Disjuncts - include MakeWTO (DisjT) -end + MakeWTONode (MakeDisjunctiveTransferFunctions (T) (DConfig)) diff --git a/infer/src/absint/AbstractInterpreter.mli b/infer/src/absint/AbstractInterpreter.mli index 35184ab4f..cd2eabe9e 100644 --- a/infer/src/absint/AbstractInterpreter.mli +++ b/infer/src/absint/AbstractInterpreter.mli @@ -75,18 +75,8 @@ module MakeWTO : Make [DConfig]. *) module MakeDisjunctive (T : TransferFunctions.DisjReady) - (DConfig : TransferFunctions.DisjunctiveConfig) : sig - module Disjuncts : sig - type t - - val singleton : T.Domain.t -> t - - val elements : t -> T.Domain.t list - end - - include - S - with type TransferFunctions.extras = T.extras - and module TransferFunctions.CFG = T.CFG - and type TransferFunctions.Domain.t = Disjuncts.t -end + (DConfig : TransferFunctions.DisjunctiveConfig) : + S + with type TransferFunctions.extras = T.extras + and module TransferFunctions.CFG = T.CFG + and type TransferFunctions.Domain.t = T.Domain.t list diff --git a/infer/src/pulse/Pulse.ml b/infer/src/pulse/Pulse.ml index ff08af419..b8939ee40 100644 --- a/infer/src/pulse/Pulse.ml +++ b/infer/src/pulse/Pulse.ml @@ -232,16 +232,14 @@ let checker {Callbacks.exe_env; summary} = let tenv = Exe_env.get_tenv exe_env (Summary.get_proc_name summary) in AbstractValue.State.reset () ; let pdesc = Summary.get_proc_desc summary in - let initial = DisjunctiveAnalyzer.Disjuncts.singleton (ExecutionDomain.mk_initial pdesc) in + let initial = [ExecutionDomain.mk_initial pdesc] in let get_formals callee_pname = Ondemand.get_proc_desc callee_pname |> Option.map ~f:Procdesc.get_pvar_formals in let proc_data = ProcData.make summary tenv get_formals in match DisjunctiveAnalyzer.compute_post proc_data ~initial with | Some posts -> - PulsePayload.update_summary - (PulseSummary.of_posts pdesc (DisjunctiveAnalyzer.Disjuncts.elements posts)) - summary + PulsePayload.update_summary (PulseSummary.of_posts pdesc posts) summary | None -> summary | exception exn -> diff --git a/infer/tests/codetoanalyze/cpp/pulse/issues.exp b/infer/tests/codetoanalyze/cpp/pulse/issues.exp index 5a8206eb9..6b4c8e350 100644 --- a/infer/tests/codetoanalyze/cpp/pulse/issues.exp +++ b/infer/tests/codetoanalyze/cpp/pulse/issues.exp @@ -2,6 +2,7 @@ codetoanalyze/cpp/pulse/basic_string.cpp, use_range_of_invalidated_temporary_str codetoanalyze/cpp/pulse/basics.cpp, multiple_invalidations_branch_bad, 6, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `ptr` of multiple_invalidations_branch_bad,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `ptr` of multiple_invalidations_branch_bad,invalid access occurs here] codetoanalyze/cpp/pulse/basics.cpp, multiple_invalidations_branch_bad, 6, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `ptr` of multiple_invalidations_branch_bad,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `ptr` of multiple_invalidations_branch_bad,invalid access occurs here] codetoanalyze/cpp/pulse/basics.cpp, multiple_invalidations_loop_bad, 5, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `ptr` of multiple_invalidations_loop_bad,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `ptr` of multiple_invalidations_loop_bad,invalid access occurs here] +codetoanalyze/cpp/pulse/basics.cpp, multiple_invalidations_loop_bad, 8, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `ptr` of multiple_invalidations_loop_bad,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `ptr` of multiple_invalidations_loop_bad,invalid access occurs here] codetoanalyze/cpp/pulse/closures.cpp, call_lambda_bad, 4, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,assigned,was invalidated by `delete`,use-after-lifetime part of the trace starts here,assigned,when calling `call_lambda_bad::lambda_closures.cpp:163:12::operator()` here,parameter `s` of call_lambda_bad::lambda_closures.cpp:163:12::operator(),invalid access occurs here] codetoanalyze/cpp/pulse/closures.cpp, implicit_ref_capture_destroy_invoke_bad, 6, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidation part of the trace starts here,variable `s` declared here,is the address of a stack variable `s` whose lifetime has ended,use-after-lifetime part of the trace starts here,variable `s` declared here,value captured as `s`,invalid access occurs here] codetoanalyze/cpp/pulse/closures.cpp, reassign_lambda_capture_destroy_invoke_bad, 9, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidation part of the trace starts here,variable `s` declared here,is the address of a stack variable `s` whose lifetime has ended,use-after-lifetime part of the trace starts here,variable `s` declared here,value captured as `s`,invalid access occurs here] diff --git a/infer/tests/codetoanalyze/java/impurity/issues.exp b/infer/tests/codetoanalyze/java/impurity/issues.exp index 894365845..7eb9213a1 100644 --- a/infer/tests/codetoanalyze/java/impurity/issues.exp +++ b/infer/tests/codetoanalyze/java/impurity/issues.exp @@ -16,7 +16,7 @@ codetoanalyze/java/impurity/Localities.java, Localities.modify_first_el_impure(j codetoanalyze/java/impurity/Localities.java, Localities.modify_via_call_impure(java.util.ArrayList):void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void Localities.modify_via_call_impure(ArrayList),when calling `void Localities$Foo.inc_impure()` here,parameter `list` modified here] codetoanalyze/java/impurity/Localities.java, Localities.newHashCode_impure():int, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function int Localities.newHashCode_impure(),call to skipped function int Object.hashCode() occurs here] codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.append_impure(java.lang.StringBuilder):void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.append_impure(StringBuilder),parameter `strBuilder` modified here] -codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.call_timing_symb_impure(int):void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.call_timing_symb_impure(int) with empty pulse summary] +codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.call_timing_symb_impure(int):void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.call_timing_symb_impure(int),when calling `void PurityModeled.timing_call_in_loop_symb_impure(int)` here,call to skipped function long System.nanoTime() occurs here] codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.call_timing_symb_unrelated_impure(int,int):void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.call_timing_symb_unrelated_impure(int,int),when calling `void PurityModeled.timing_call_in_loop_symb_impure(int)` here,call to skipped function long System.nanoTime() occurs here] codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.call_write_impure():void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.call_write_impure(),when calling `void PurityModeled.write_impure()` here,call to skipped function void PrintStream.write(byte[],int,int) occurs here] codetoanalyze/java/impurity/PurityModeled.java, PurityModeled.constant_loop_pure_FP():void, 0, IMPURE_FUNCTION, no_bucket, ERROR, [Impure function void PurityModeled.constant_loop_pure_FP() with empty pulse summary]