From 94ceebfef801d8ae5590675f556129e122e05972 Mon Sep 17 00:00:00 2001 From: Sam Blackshear Date: Tue, 22 Aug 2017 10:38:28 -0700 Subject: [PATCH] [quandary] represent footprint as unified set of access path rather than conjunction of special sources Summary: In looking at summaries that Quandary took a long time to compute, one thing I notice frequently is redundancy in the footprint sources (e.g., I might see `Footprint(x), Footprint(x.f), Footprint(x*)`). `sudo perf top` indicates that joining big sets of sources is a major performance bottleneck, and a large number of footprint sources is surely a big part of this (since we expect the number of non-footprint sources to be small). This diff addresses the redundancy issue by using a more complex representation for a set of sources. The "known" sources are still in a set, but the footprint sources are now represented as a set of access paths (via an access trie). The access path trie is a minimal representation of a set of access paths, so it would represent the example above as a simple `x*`. This should make join/widen/<= faster and improve performance Reviewed By: jberdine Differential Revision: D5663980 fbshipit-source-id: 9fb66f8 --- infer/src/absint/AbstractDomain.ml | 2 + infer/src/absint/AbstractDomain.mli | 2 +- infer/src/checkers/Source.ml | 2 +- infer/src/checkers/Trace.ml | 148 +++++++++++++----- infer/src/checkers/Trace.mli | 27 +++- infer/src/checkers/accessPath.ml | 11 +- infer/src/checkers/accessPath.mli | 6 +- infer/src/checkers/accessTree.ml | 2 + infer/src/checkers/accessTree.mli | 4 + infer/src/quandary/TaintAnalysis.ml | 67 ++++---- infer/src/unit/TaintTests.ml | 4 +- infer/src/unit/TraceTests.ml | 4 +- .../codetoanalyze/cpp/quandary/execs.cpp | 2 +- .../codetoanalyze/cpp/quandary/issues.exp | 1 - 14 files changed, 191 insertions(+), 91 deletions(-) diff --git a/infer/src/absint/AbstractDomain.ml b/infer/src/absint/AbstractDomain.ml index e62a3df38..675b6ab73 100644 --- a/infer/src/absint/AbstractDomain.ml +++ b/infer/src/absint/AbstractDomain.ml @@ -262,6 +262,8 @@ end module BooleanOr = struct type astate = bool + let empty = false + let ( <= ) ~lhs ~rhs = not lhs || rhs let join = ( || ) diff --git a/infer/src/absint/AbstractDomain.mli b/infer/src/absint/AbstractDomain.mli index 168602349..a1793af12 100644 --- a/infer/src/absint/AbstractDomain.mli +++ b/infer/src/absint/AbstractDomain.mli @@ -99,4 +99,4 @@ module BooleanAnd : S with type astate = bool (** Boolean domain ordered by ~p || q. Useful when you want a boolean that's true only when it's true in one conditional branch. *) -module BooleanOr : S with type astate = bool +module BooleanOr : WithBottom with type astate = bool diff --git a/infer/src/checkers/Source.ml b/infer/src/checkers/Source.ml index 00898116c..7dc8c452a 100644 --- a/infer/src/checkers/Source.ml +++ b/infer/src/checkers/Source.ml @@ -120,7 +120,7 @@ module Dummy = struct let make_footprint _ _ = assert false - let get_footprint_access_path _ = assert false + let get_footprint_access_path _ = None let get _ _ _ = None diff --git a/infer/src/checkers/Trace.ml b/infer/src/checkers/Trace.ml index 3fc5f3cf5..baf24112e 100644 --- a/infer/src/checkers/Trace.ml +++ b/infer/src/checkers/Trace.ml @@ -29,7 +29,26 @@ module type S = sig include AbstractDomain.WithBottom with type astate := astate - module Sources = Source.Set + module Sources : sig + module Known : module type of AbstractDomain.FiniteSet (Source) + + module Footprint = AccessTree.PathSet + + type astate = {known: Known.astate; footprint: Footprint.astate} + + type t = astate + + val empty : t + + val is_empty : t -> bool + + val of_source : Source.t -> t + + val add : Source.t -> t -> t + + val get_footprint_indexes : t -> IntSet.t + end + module Sinks = Sink.Set module Passthroughs = Passthrough.Set @@ -87,10 +106,6 @@ module type S = sig val is_empty : t -> bool (** return true if this trace has no source or sink data *) - val compare : t -> t -> int - - val equal : t -> t -> bool - val pp : F.formatter -> t -> unit val pp_path : Typ.Procname.t -> F.formatter -> path -> unit @@ -133,7 +148,73 @@ end module Make (Spec : Spec) = struct include Spec - module Sources = Source.Set + + module Sources = struct + module Known = AbstractDomain.FiniteSet (Source) + module Footprint = AccessTree.PathSet + + type astate = {known: Known.astate; footprint: Footprint.astate} + + type t = astate + + let ( <= ) ~lhs ~rhs = + if phys_equal lhs rhs then true + else Known.( <= ) ~lhs:lhs.known ~rhs:rhs.known + && Footprint.( <= ) ~lhs:lhs.footprint ~rhs:rhs.footprint + + let join astate1 astate2 = + if phys_equal astate1 astate2 then astate1 + else + let known = Known.join astate1.known astate2.known in + let footprint = Footprint.join astate1.footprint astate2.footprint in + {known; footprint} + + let widen ~prev ~next ~num_iters = + if phys_equal prev next then prev + else + let known = Known.widen ~prev:prev.known ~next:next.known ~num_iters in + let footprint = Footprint.widen ~prev:prev.footprint ~next:next.footprint ~num_iters in + {known; footprint} + + let pp fmt {known; footprint} = + F.fprintf fmt "Known: %a@\nFootprint: %a@\n" Known.pp known Footprint.pp footprint + + let empty = {known= Known.empty; footprint= Footprint.empty} + + let is_empty {known; footprint} = Known.is_empty known && Footprint.BaseMap.is_empty footprint + + let add_footprint_access_path access_path footprint = + Footprint.add_trace access_path true footprint + + let of_source source = + match Source.get_footprint_access_path source with + | Some access_path + -> let footprint = add_footprint_access_path access_path Footprint.empty in + {empty with footprint} + | None + -> let known = Known.singleton source in + {empty with known} + + let add source astate = + match Source.get_footprint_access_path source with + | Some access_path + -> let footprint = add_footprint_access_path access_path astate.footprint in + {astate with footprint} + | None + -> let known = Known.add source astate.known in + {astate with known} + + let get_footprint_indexes {footprint} = + Footprint.BaseMap.fold + (fun base _ acc -> + match AccessPath.Abs.get_footprint_index_base base with + | Some footprint_index + -> IntSet.add footprint_index acc + | None + -> acc) + footprint IntSet.empty + end + module Sinks = Sink.Set module Passthroughs = Passthrough.Set module SourceExpander = Expander (Source) @@ -144,9 +225,6 @@ module Make (Spec : Spec) = struct ; sinks: Sinks.t (** last callees in the trace that transitively called a tainted function (if any) *) ; passthroughs: Passthrough.Set.t (** calls that occurred between source and sink *) } - [@@deriving compare] - - let equal = [%compare.equal : t] type astate = t @@ -190,7 +268,7 @@ module Make (Spec : Spec) = struct Sinks.fold report_one sinks acc0 in let report_sources source acc = report_source source t.sinks acc in - Sources.fold report_sources t.sources [] + Sources.Known.fold report_sources t.sources.known [] let pp_path cur_pname fmt (cur_passthroughs, sources_passthroughs, sinks_passthroughs) = let pp_passthroughs fmt passthroughs = @@ -241,7 +319,7 @@ module Make (Spec : Spec) = struct let expand_path source sink = let sources_of_pname pname = let trace = trace_of_pname pname in - (Sources.elements (sources trace), passthroughs trace) + (Sources.Known.elements (sources trace).known, passthroughs trace) in let sinks_of_pname pname = let trace = trace_of_pname pname in @@ -331,7 +409,7 @@ module Make (Spec : Spec) = struct ~init:trace_prefix sources_with_level let of_source source = - let sources = Sources.singleton source in + let sources = Sources.of_source source in let passthroughs = Passthroughs.empty in let sinks = Sinks.empty in {sources; passthroughs; sinks} @@ -348,38 +426,24 @@ module Make (Spec : Spec) = struct let update_sinks t sinks = {t with sinks} - let get_footprint_index source = - match Source.get_footprint_access_path source with - | Some access_path - -> AccessPath.Abs.get_footprint_index access_path - | None - -> None - - let get_footprint_indexes trace = - Sources.fold - (fun source acc -> - match get_footprint_index source with - | Some footprint_index - -> IntSet.add footprint_index acc - | None - -> acc) - (sources trace) IntSet.empty + let get_footprint_indexes trace = Sources.get_footprint_indexes trace.sources (** compute caller_trace + callee_trace *) let append caller_trace callee_trace callee_site = if is_empty callee_trace then caller_trace else - let non_footprint_callee_sources = - Sources.filter (fun source -> not (Source.is_footprint source)) callee_trace.sources - in + let non_footprint_callee_sources = callee_trace.sources.known in let sources = - if Sources.subset non_footprint_callee_sources caller_trace.sources then + if Sources.Known.subset non_footprint_callee_sources caller_trace.sources.known then caller_trace.sources else - List.map - ~f:(fun sink -> Source.with_callsite sink callee_site) - (Sources.elements non_footprint_callee_sources) - |> Sources.of_list |> Sources.union caller_trace.sources + let known = + List.map + ~f:(fun sink -> Source.with_callsite sink callee_site) + (Sources.Known.elements non_footprint_callee_sources) + |> Sources.Known.of_list |> Sources.Known.union caller_trace.sources.known + in + {caller_trace.sources with Sources.known= known} in let sinks = if Sinks.subset callee_trace.sinks caller_trace.sinks then caller_trace.sinks @@ -407,16 +471,22 @@ module Make (Spec : Spec) = struct let ( <= ) ~lhs ~rhs = phys_equal lhs rhs - || Sources.subset lhs.sources rhs.sources && Sinks.subset lhs.sinks rhs.sinks + || Sources.( <= ) ~lhs:lhs.sources ~rhs:rhs.sources && Sinks.subset lhs.sinks rhs.sinks && Passthroughs.subset lhs.passthroughs rhs.passthroughs let join t1 t2 = if phys_equal t1 t2 then t1 else - let sources = Sources.union t1.sources t2.sources in + let sources = Sources.join t1.sources t2.sources in let sinks = Sinks.union t1.sinks t2.sinks in let passthroughs = Passthroughs.union t1.passthroughs t2.passthroughs in {sources; sinks; passthroughs} - let widen ~prev ~next ~num_iters:_ = join prev next + let widen ~prev ~next ~num_iters = + if phys_equal prev next then prev + else + let sources = Sources.widen ~prev:prev.sources ~next:next.sources ~num_iters in + let sinks = Sinks.union prev.sinks next.sinks in + let passthroughs = Passthroughs.union prev.passthroughs next.passthroughs in + {sources; sinks; passthroughs} end diff --git a/infer/src/checkers/Trace.mli b/infer/src/checkers/Trace.mli index 73b57b999..13a4b76d4 100644 --- a/infer/src/checkers/Trace.mli +++ b/infer/src/checkers/Trace.mli @@ -29,7 +29,28 @@ module type S = sig include AbstractDomain.WithBottom with type astate := astate - module Sources = Source.Set + module Sources : sig + (** Set of sources returned by callees of the current function *) + module Known : module type of AbstractDomain.FiniteSet (Source) + + (** Set of access paths representing the sources that may flow in from the caller *) + module Footprint = AccessTree.PathSet + + type astate = {known: Known.astate; footprint: Footprint.astate} + + type t = astate + + val empty : t + + val is_empty : t -> bool + + val of_source : Source.t -> t + + val add : Source.t -> t -> t + + val get_footprint_indexes : t -> IntSet.t + end + module Sinks = Sink.Set module Passthroughs = Passthrough.Set @@ -90,10 +111,6 @@ module type S = sig val is_empty : t -> bool (** return true if this trace has no source or sink data *) - val compare : t -> t -> int - - val equal : t -> t -> bool - val pp : F.formatter -> t -> unit val pp_path : Typ.Procname.t -> F.formatter -> path -> unit diff --git a/infer/src/checkers/accessPath.ml b/infer/src/checkers/accessPath.ml index 243ac0a88..4212c73b6 100644 --- a/infer/src/checkers/accessPath.ml +++ b/infer/src/checkers/accessPath.ml @@ -210,14 +210,17 @@ module Abs = struct let _, base_typ = fst (extract access_path) in with_base (Var.of_formal_index formal_index, base_typ) access_path - let get_footprint_index access_path = - let raw_access_path = extract access_path in - match raw_access_path with - | (Var.LogicalVar id, _), _ when Ident.is_footprint id + let get_footprint_index_base base = + match base with + | Var.LogicalVar id, _ when Ident.is_footprint id -> Some (Ident.get_stamp id) | _ -> None + let get_footprint_index access_path = + let base, _ = extract access_path in + get_footprint_index_base base + let is_exact = function Exact _ -> true | Abstracted _ -> false let ( <= ) ~lhs ~rhs = diff --git a/infer/src/checkers/accessPath.mli b/infer/src/checkers/accessPath.mli index bacd0d54a..f2896c9fc 100644 --- a/infer/src/checkers/accessPath.mli +++ b/infer/src/checkers/accessPath.mli @@ -93,10 +93,14 @@ module Abs : sig val to_footprint : int -> t -> t (** replace the base var with a footprint variable rooted at formal index [formal_index] *) - val get_footprint_index : t -> int option + val get_footprint_index_base : base -> int option (** return the formal index associated with the base of this access path if there is one, or None otherwise *) + val get_footprint_index : t -> int option + (** return the formal index associated with the base of this access path if there is one, or None + otherwise *) + val with_base : base -> t -> t (** swap base of existing access path for [base_var] (e.g., `with_base_bvar x y.f.g` produces `x.f.g` *) diff --git a/infer/src/checkers/accessTree.ml b/infer/src/checkers/accessTree.ml index 99c8d8a24..da7e44ea8 100644 --- a/infer/src/checkers/accessTree.ml +++ b/infer/src/checkers/accessTree.ml @@ -319,3 +319,5 @@ module Make (TraceDomain : AbstractDomain.WithBottom) = struct let pp fmt base_tree = BaseMap.pp ~pp_value:pp_node fmt base_tree end + +module PathSet = Make (AbstractDomain.BooleanOr) diff --git a/infer/src/checkers/accessTree.mli b/infer/src/checkers/accessTree.mli index 94642067e..72b1f34ed 100644 --- a/infer/src/checkers/accessTree.mli +++ b/infer/src/checkers/accessTree.mli @@ -14,6 +14,7 @@ module type S = sig module TraceDomain : AbstractDomain.WithBottom module AccessMap : PrettyPrintable.PPMap with type key = AccessPath.access + module BaseMap = AccessPath.BaseMap type node = TraceDomain.astate * tree @@ -78,3 +79,6 @@ module type S = sig end module Make (TraceDomain : AbstractDomain.WithBottom) : S with module TraceDomain = TraceDomain + +(** Concise representation of a set of access paths *) +module PathSet : module type of Make (AbstractDomain.BooleanOr) diff --git a/infer/src/quandary/TaintAnalysis.ml b/infer/src/quandary/TaintAnalysis.ml index f7451be88..335e381ab 100644 --- a/infer/src/quandary/TaintAnalysis.ml +++ b/infer/src/quandary/TaintAnalysis.ml @@ -138,7 +138,7 @@ module Make (TaintSpecification : TaintSpec.S) = struct ~f:(fun source -> [%compare.equal : Source.Kind.t] kind (Source.kind source) && not (is_recursive source)) - (Sources.elements (sources trace)) + (Sources.Known.elements (sources trace).Sources.known) with | Some matching_source -> (Some access_path, matching_source) :: acc @@ -312,19 +312,17 @@ module Make (TaintSpecification : TaintSpec.S) = struct Option.map (get_caller_ap ap) ~f:get_caller_node in let replace_footprint_sources callee_trace caller_trace access_tree = - let replace_footprint_source source acc = - match TraceDomain.Source.get_footprint_access_path source with - | Some footprint_access_path -> ( + let replace_footprint_source acc footprint_access_path (is_mem, _) = + if is_mem then match get_caller_ap_node_opt footprint_access_path access_tree with | Some (_, (caller_ap_trace, _)) -> TraceDomain.join caller_ap_trace acc | None - -> acc ) - | None - -> acc + -> acc + else acc in - TraceDomain.Sources.fold replace_footprint_source (TraceDomain.sources callee_trace) - caller_trace + let {TraceDomain.Sources.footprint} = TraceDomain.sources callee_trace in + TraceDomain.Sources.Footprint.fold replace_footprint_source footprint caller_trace in let instantiate_and_report callee_trace caller_trace access_tree = let caller_trace' = replace_footprint_sources callee_trace caller_trace access_tree in @@ -427,18 +425,19 @@ module Make (TaintSpecification : TaintSpec.S) = struct let trace_with_propagation = List.fold ~f:exp_join_traces ~init:initial_trace actuals in - let filtered_sources = - TraceDomain.Sources.filter - (fun source -> - match TraceDomain.Source.get_footprint_access_path source with - | Some access_path - -> Option.exists - (AccessPath.get_typ (AccessPath.Abs.extract access_path) proc_data.tenv) - ~f:should_taint_typ - | None - -> true) - (TraceDomain.sources trace_with_propagation) + let sources = TraceDomain.sources trace_with_propagation in + let filtered_footprint = + TraceDomain.Sources.Footprint.fold + (fun acc access_path (is_mem, _) -> + if is_mem + && Option.exists + (AccessPath.get_typ (AccessPath.Abs.extract access_path) proc_data.tenv) + ~f:should_taint_typ + then TraceDomain.Sources.Footprint.add_trace access_path true acc + else acc) + sources.footprint TraceDomain.Sources.Footprint.empty in + let filtered_sources = {sources with footprint= filtered_footprint} in if TraceDomain.Sources.is_empty filtered_sources then access_tree else let trace' = TraceDomain.update_sources trace_with_propagation filtered_sources in @@ -608,21 +607,19 @@ module Make (TaintSpecification : TaintSpec.S) = struct (* if this trace has no sinks, we don't need to attach it to anything *) access_tree_acc else - TraceDomain.Sources.fold - (fun source acc -> - match TraceDomain.Source.get_footprint_access_path source with - | Some footprint_access_path - -> let node' = - match TaintDomain.get_node footprint_access_path acc with - | Some n - -> TaintDomain.node_join node n - | None - -> node - in - TaintDomain.add_node footprint_access_path node' acc - | None - -> acc) - (TraceDomain.sources trace) access_tree_acc) + TraceDomain.Sources.Footprint.fold + (fun acc footprint_access_path (is_mem, _) -> + if is_mem then + let node' = + match TaintDomain.get_node footprint_access_path acc with + | Some n + -> TaintDomain.node_join node n + | None + -> node + in + TaintDomain.add_node footprint_access_path node' acc + else acc) + (TraceDomain.sources trace).TraceDomain.Sources.footprint access_tree_acc) access_tree access_tree in (* should only be used on nodes associated with a footprint base *) diff --git a/infer/src/unit/TaintTests.ml b/infer/src/unit/TaintTests.ml index 10378a24d..e4f9850f3 100644 --- a/infer/src/unit/TaintTests.ml +++ b/infer/src/unit/TaintTests.ml @@ -69,9 +69,9 @@ let tests = let pp_sources fmt sources = if MockTrace.Sources.is_empty sources then F.fprintf fmt "?" else - MockTrace.Sources.iter + MockTrace.Sources.Known.iter (fun source -> pp_call_site fmt (MockTrace.Source.call_site source)) - sources + sources.MockTrace.Sources.known in let pp_sinks fmt sinks = if MockTrace.Sinks.is_empty sinks then F.fprintf fmt "?" diff --git a/infer/src/unit/TraceTests.ml b/infer/src/unit/TraceTests.ml index aa8e25fd7..3df384f82 100644 --- a/infer/src/unit/TraceTests.ml +++ b/infer/src/unit/TraceTests.ml @@ -83,6 +83,8 @@ module MockTrace = Trace.Make (struct [%compare.equal : MockTraceElem.t] (Source.kind source) (Sink.kind sink) end) +let trace_equal t1 t2 = MockTrace.( <= ) ~lhs:t1 ~rhs:t2 && MockTrace.( <= ) ~lhs:t2 ~rhs:t1 + let tests = let source1 = MockSource.make MockTraceElem.Kind1 CallSite.dummy in let source2 = MockSource.make MockTraceElem.Kind2 CallSite.dummy in @@ -125,7 +127,7 @@ let tests = let footprint_trace = MockTrace.of_source footprint_source |> MockTrace.add_sink sink1 in let expected_trace = MockTrace.of_source source1 |> MockTrace.add_sink sink1 in assert_bool "Appended trace should contain source and sink" - (MockTrace.equal (MockTrace.append source_trace footprint_trace call_site) expected_trace) + (trace_equal (MockTrace.append source_trace footprint_trace call_site) expected_trace) in "append" >:: append_ in diff --git a/infer/tests/codetoanalyze/cpp/quandary/execs.cpp b/infer/tests/codetoanalyze/cpp/quandary/execs.cpp index 90c835b9f..f5cba3a19 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/execs.cpp +++ b/infer/tests/codetoanalyze/cpp/quandary/execs.cpp @@ -99,7 +99,7 @@ void customGetEnvOk() { return execl(NULL, source); } -void exec_flag_bad() { execl(FLAGS_cli_string, NULL); } +void FN_exec_flag_bad() { execl(FLAGS_cli_string, NULL); } void sql_on_env_var_bad() { std::string source = (std::string)std::getenv("ENV_VAR"); diff --git a/infer/tests/codetoanalyze/cpp/quandary/issues.exp b/infer/tests/codetoanalyze/cpp/quandary/issues.exp index ded6da025..4bb406b51 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/issues.exp +++ b/infer/tests/codetoanalyze/cpp/quandary/issues.exp @@ -47,7 +47,6 @@ codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 29, QUANDARY_TAINT_ERR codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 31, QUANDARY_TAINT_ERROR, [Return from getenv,Call to execve] codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 33, QUANDARY_TAINT_ERROR, [Return from getenv,Call to system] codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 35, QUANDARY_TAINT_ERROR, [Return from getenv,Call to popen] -codetoanalyze/cpp/quandary/execs.cpp, execs::exec_flag_bad, 0, QUANDARY_TAINT_ERROR, [Return from execs::exec_flag_bad,Call to execl] codetoanalyze/cpp/quandary/execs.cpp, execs::sql_on_env_var_bad, 2, QUANDARY_TAINT_ERROR, [Return from getenv,Call to __infer_sql_sink] codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad1, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream>_read,Call to execle] codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad2, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream>_readsome,Call to execle]