[quandary] represent footprint as unified set of access path rather than conjunction of special sources

Summary:
In looking at summaries that Quandary took a long time to compute, one thing I notice frequently is redundancy in the footprint sources (e.g., I might see `Footprint(x), Footprint(x.f), Footprint(x*)`).
`sudo perf top` indicates that joining big sets of sources is a major performance bottleneck, and a large number of footprint sources is surely a big part of this (since we expect the number of non-footprint sources to be small).

This diff addresses the redundancy issue by using a more complex representation for a set of sources. The "known" sources are still in a set, but the footprint sources are now represented as a set of access paths (via an access trie).
The access path trie is a minimal representation of a set of access paths, so it would represent the example above as a simple `x*`.
This should make join/widen/<= faster and improve performance

Reviewed By: jberdine

Differential Revision: D5663980

fbshipit-source-id: 9fb66f8
master
Sam Blackshear 7 years ago committed by Facebook Github Bot
parent 3e6e76a2b2
commit 94ceebfef8

@ -262,6 +262,8 @@ end
module BooleanOr = struct
type astate = bool
let empty = false
let ( <= ) ~lhs ~rhs = not lhs || rhs
let join = ( || )

@ -99,4 +99,4 @@ module BooleanAnd : S with type astate = bool
(** Boolean domain ordered by ~p || q. Useful when you want a boolean that's true only when it's
true in one conditional branch. *)
module BooleanOr : S with type astate = bool
module BooleanOr : WithBottom with type astate = bool

@ -120,7 +120,7 @@ module Dummy = struct
let make_footprint _ _ = assert false
let get_footprint_access_path _ = assert false
let get_footprint_access_path _ = None
let get _ _ _ = None

@ -29,7 +29,26 @@ module type S = sig
include AbstractDomain.WithBottom with type astate := astate
module Sources = Source.Set
module Sources : sig
module Known : module type of AbstractDomain.FiniteSet (Source)
module Footprint = AccessTree.PathSet
type astate = {known: Known.astate; footprint: Footprint.astate}
type t = astate
val empty : t
val is_empty : t -> bool
val of_source : Source.t -> t
val add : Source.t -> t -> t
val get_footprint_indexes : t -> IntSet.t
end
module Sinks = Sink.Set
module Passthroughs = Passthrough.Set
@ -87,10 +106,6 @@ module type S = sig
val is_empty : t -> bool
(** return true if this trace has no source or sink data *)
val compare : t -> t -> int
val equal : t -> t -> bool
val pp : F.formatter -> t -> unit
val pp_path : Typ.Procname.t -> F.formatter -> path -> unit
@ -133,7 +148,73 @@ end
module Make (Spec : Spec) = struct
include Spec
module Sources = Source.Set
module Sources = struct
module Known = AbstractDomain.FiniteSet (Source)
module Footprint = AccessTree.PathSet
type astate = {known: Known.astate; footprint: Footprint.astate}
type t = astate
let ( <= ) ~lhs ~rhs =
if phys_equal lhs rhs then true
else Known.( <= ) ~lhs:lhs.known ~rhs:rhs.known
&& Footprint.( <= ) ~lhs:lhs.footprint ~rhs:rhs.footprint
let join astate1 astate2 =
if phys_equal astate1 astate2 then astate1
else
let known = Known.join astate1.known astate2.known in
let footprint = Footprint.join astate1.footprint astate2.footprint in
{known; footprint}
let widen ~prev ~next ~num_iters =
if phys_equal prev next then prev
else
let known = Known.widen ~prev:prev.known ~next:next.known ~num_iters in
let footprint = Footprint.widen ~prev:prev.footprint ~next:next.footprint ~num_iters in
{known; footprint}
let pp fmt {known; footprint} =
F.fprintf fmt "Known: %a@\nFootprint: %a@\n" Known.pp known Footprint.pp footprint
let empty = {known= Known.empty; footprint= Footprint.empty}
let is_empty {known; footprint} = Known.is_empty known && Footprint.BaseMap.is_empty footprint
let add_footprint_access_path access_path footprint =
Footprint.add_trace access_path true footprint
let of_source source =
match Source.get_footprint_access_path source with
| Some access_path
-> let footprint = add_footprint_access_path access_path Footprint.empty in
{empty with footprint}
| None
-> let known = Known.singleton source in
{empty with known}
let add source astate =
match Source.get_footprint_access_path source with
| Some access_path
-> let footprint = add_footprint_access_path access_path astate.footprint in
{astate with footprint}
| None
-> let known = Known.add source astate.known in
{astate with known}
let get_footprint_indexes {footprint} =
Footprint.BaseMap.fold
(fun base _ acc ->
match AccessPath.Abs.get_footprint_index_base base with
| Some footprint_index
-> IntSet.add footprint_index acc
| None
-> acc)
footprint IntSet.empty
end
module Sinks = Sink.Set
module Passthroughs = Passthrough.Set
module SourceExpander = Expander (Source)
@ -144,9 +225,6 @@ module Make (Spec : Spec) = struct
; sinks: Sinks.t
(** last callees in the trace that transitively called a tainted function (if any) *)
; passthroughs: Passthrough.Set.t (** calls that occurred between source and sink *) }
[@@deriving compare]
let equal = [%compare.equal : t]
type astate = t
@ -190,7 +268,7 @@ module Make (Spec : Spec) = struct
Sinks.fold report_one sinks acc0
in
let report_sources source acc = report_source source t.sinks acc in
Sources.fold report_sources t.sources []
Sources.Known.fold report_sources t.sources.known []
let pp_path cur_pname fmt (cur_passthroughs, sources_passthroughs, sinks_passthroughs) =
let pp_passthroughs fmt passthroughs =
@ -241,7 +319,7 @@ module Make (Spec : Spec) = struct
let expand_path source sink =
let sources_of_pname pname =
let trace = trace_of_pname pname in
(Sources.elements (sources trace), passthroughs trace)
(Sources.Known.elements (sources trace).known, passthroughs trace)
in
let sinks_of_pname pname =
let trace = trace_of_pname pname in
@ -331,7 +409,7 @@ module Make (Spec : Spec) = struct
~init:trace_prefix sources_with_level
let of_source source =
let sources = Sources.singleton source in
let sources = Sources.of_source source in
let passthroughs = Passthroughs.empty in
let sinks = Sinks.empty in
{sources; passthroughs; sinks}
@ -348,38 +426,24 @@ module Make (Spec : Spec) = struct
let update_sinks t sinks = {t with sinks}
let get_footprint_index source =
match Source.get_footprint_access_path source with
| Some access_path
-> AccessPath.Abs.get_footprint_index access_path
| None
-> None
let get_footprint_indexes trace =
Sources.fold
(fun source acc ->
match get_footprint_index source with
| Some footprint_index
-> IntSet.add footprint_index acc
| None
-> acc)
(sources trace) IntSet.empty
let get_footprint_indexes trace = Sources.get_footprint_indexes trace.sources
(** compute caller_trace + callee_trace *)
let append caller_trace callee_trace callee_site =
if is_empty callee_trace then caller_trace
else
let non_footprint_callee_sources =
Sources.filter (fun source -> not (Source.is_footprint source)) callee_trace.sources
in
let non_footprint_callee_sources = callee_trace.sources.known in
let sources =
if Sources.subset non_footprint_callee_sources caller_trace.sources then
if Sources.Known.subset non_footprint_callee_sources caller_trace.sources.known then
caller_trace.sources
else
List.map
~f:(fun sink -> Source.with_callsite sink callee_site)
(Sources.elements non_footprint_callee_sources)
|> Sources.of_list |> Sources.union caller_trace.sources
let known =
List.map
~f:(fun sink -> Source.with_callsite sink callee_site)
(Sources.Known.elements non_footprint_callee_sources)
|> Sources.Known.of_list |> Sources.Known.union caller_trace.sources.known
in
{caller_trace.sources with Sources.known= known}
in
let sinks =
if Sinks.subset callee_trace.sinks caller_trace.sinks then caller_trace.sinks
@ -407,16 +471,22 @@ module Make (Spec : Spec) = struct
let ( <= ) ~lhs ~rhs =
phys_equal lhs rhs
|| Sources.subset lhs.sources rhs.sources && Sinks.subset lhs.sinks rhs.sinks
|| Sources.( <= ) ~lhs:lhs.sources ~rhs:rhs.sources && Sinks.subset lhs.sinks rhs.sinks
&& Passthroughs.subset lhs.passthroughs rhs.passthroughs
let join t1 t2 =
if phys_equal t1 t2 then t1
else
let sources = Sources.union t1.sources t2.sources in
let sources = Sources.join t1.sources t2.sources in
let sinks = Sinks.union t1.sinks t2.sinks in
let passthroughs = Passthroughs.union t1.passthroughs t2.passthroughs in
{sources; sinks; passthroughs}
let widen ~prev ~next ~num_iters:_ = join prev next
let widen ~prev ~next ~num_iters =
if phys_equal prev next then prev
else
let sources = Sources.widen ~prev:prev.sources ~next:next.sources ~num_iters in
let sinks = Sinks.union prev.sinks next.sinks in
let passthroughs = Passthroughs.union prev.passthroughs next.passthroughs in
{sources; sinks; passthroughs}
end

@ -29,7 +29,28 @@ module type S = sig
include AbstractDomain.WithBottom with type astate := astate
module Sources = Source.Set
module Sources : sig
(** Set of sources returned by callees of the current function *)
module Known : module type of AbstractDomain.FiniteSet (Source)
(** Set of access paths representing the sources that may flow in from the caller *)
module Footprint = AccessTree.PathSet
type astate = {known: Known.astate; footprint: Footprint.astate}
type t = astate
val empty : t
val is_empty : t -> bool
val of_source : Source.t -> t
val add : Source.t -> t -> t
val get_footprint_indexes : t -> IntSet.t
end
module Sinks = Sink.Set
module Passthroughs = Passthrough.Set
@ -90,10 +111,6 @@ module type S = sig
val is_empty : t -> bool
(** return true if this trace has no source or sink data *)
val compare : t -> t -> int
val equal : t -> t -> bool
val pp : F.formatter -> t -> unit
val pp_path : Typ.Procname.t -> F.formatter -> path -> unit

@ -210,14 +210,17 @@ module Abs = struct
let _, base_typ = fst (extract access_path) in
with_base (Var.of_formal_index formal_index, base_typ) access_path
let get_footprint_index access_path =
let raw_access_path = extract access_path in
match raw_access_path with
| (Var.LogicalVar id, _), _ when Ident.is_footprint id
let get_footprint_index_base base =
match base with
| Var.LogicalVar id, _ when Ident.is_footprint id
-> Some (Ident.get_stamp id)
| _
-> None
let get_footprint_index access_path =
let base, _ = extract access_path in
get_footprint_index_base base
let is_exact = function Exact _ -> true | Abstracted _ -> false
let ( <= ) ~lhs ~rhs =

@ -93,10 +93,14 @@ module Abs : sig
val to_footprint : int -> t -> t
(** replace the base var with a footprint variable rooted at formal index [formal_index] *)
val get_footprint_index : t -> int option
val get_footprint_index_base : base -> int option
(** return the formal index associated with the base of this access path if there is one, or None
otherwise *)
val get_footprint_index : t -> int option
(** return the formal index associated with the base of this access path if there is one, or None
otherwise *)
val with_base : base -> t -> t
(** swap base of existing access path for [base_var] (e.g., `with_base_bvar x y.f.g` produces
`x.f.g` *)

@ -319,3 +319,5 @@ module Make (TraceDomain : AbstractDomain.WithBottom) = struct
let pp fmt base_tree = BaseMap.pp ~pp_value:pp_node fmt base_tree
end
module PathSet = Make (AbstractDomain.BooleanOr)

@ -14,6 +14,7 @@ module type S = sig
module TraceDomain : AbstractDomain.WithBottom
module AccessMap : PrettyPrintable.PPMap with type key = AccessPath.access
module BaseMap = AccessPath.BaseMap
type node = TraceDomain.astate * tree
@ -78,3 +79,6 @@ module type S = sig
end
module Make (TraceDomain : AbstractDomain.WithBottom) : S with module TraceDomain = TraceDomain
(** Concise representation of a set of access paths *)
module PathSet : module type of Make (AbstractDomain.BooleanOr)

@ -138,7 +138,7 @@ module Make (TaintSpecification : TaintSpec.S) = struct
~f:(fun source ->
[%compare.equal : Source.Kind.t] kind (Source.kind source)
&& not (is_recursive source))
(Sources.elements (sources trace))
(Sources.Known.elements (sources trace).Sources.known)
with
| Some matching_source
-> (Some access_path, matching_source) :: acc
@ -312,19 +312,17 @@ module Make (TaintSpecification : TaintSpec.S) = struct
Option.map (get_caller_ap ap) ~f:get_caller_node
in
let replace_footprint_sources callee_trace caller_trace access_tree =
let replace_footprint_source source acc =
match TraceDomain.Source.get_footprint_access_path source with
| Some footprint_access_path -> (
let replace_footprint_source acc footprint_access_path (is_mem, _) =
if is_mem then
match get_caller_ap_node_opt footprint_access_path access_tree with
| Some (_, (caller_ap_trace, _))
-> TraceDomain.join caller_ap_trace acc
| None
-> acc )
| None
-> acc
-> acc
else acc
in
TraceDomain.Sources.fold replace_footprint_source (TraceDomain.sources callee_trace)
caller_trace
let {TraceDomain.Sources.footprint} = TraceDomain.sources callee_trace in
TraceDomain.Sources.Footprint.fold replace_footprint_source footprint caller_trace
in
let instantiate_and_report callee_trace caller_trace access_tree =
let caller_trace' = replace_footprint_sources callee_trace caller_trace access_tree in
@ -427,18 +425,19 @@ module Make (TaintSpecification : TaintSpec.S) = struct
let trace_with_propagation =
List.fold ~f:exp_join_traces ~init:initial_trace actuals
in
let filtered_sources =
TraceDomain.Sources.filter
(fun source ->
match TraceDomain.Source.get_footprint_access_path source with
| Some access_path
-> Option.exists
(AccessPath.get_typ (AccessPath.Abs.extract access_path) proc_data.tenv)
~f:should_taint_typ
| None
-> true)
(TraceDomain.sources trace_with_propagation)
let sources = TraceDomain.sources trace_with_propagation in
let filtered_footprint =
TraceDomain.Sources.Footprint.fold
(fun acc access_path (is_mem, _) ->
if is_mem
&& Option.exists
(AccessPath.get_typ (AccessPath.Abs.extract access_path) proc_data.tenv)
~f:should_taint_typ
then TraceDomain.Sources.Footprint.add_trace access_path true acc
else acc)
sources.footprint TraceDomain.Sources.Footprint.empty
in
let filtered_sources = {sources with footprint= filtered_footprint} in
if TraceDomain.Sources.is_empty filtered_sources then access_tree
else
let trace' = TraceDomain.update_sources trace_with_propagation filtered_sources in
@ -608,21 +607,19 @@ module Make (TaintSpecification : TaintSpec.S) = struct
(* if this trace has no sinks, we don't need to attach it to anything *)
access_tree_acc
else
TraceDomain.Sources.fold
(fun source acc ->
match TraceDomain.Source.get_footprint_access_path source with
| Some footprint_access_path
-> let node' =
match TaintDomain.get_node footprint_access_path acc with
| Some n
-> TaintDomain.node_join node n
| None
-> node
in
TaintDomain.add_node footprint_access_path node' acc
| None
-> acc)
(TraceDomain.sources trace) access_tree_acc)
TraceDomain.Sources.Footprint.fold
(fun acc footprint_access_path (is_mem, _) ->
if is_mem then
let node' =
match TaintDomain.get_node footprint_access_path acc with
| Some n
-> TaintDomain.node_join node n
| None
-> node
in
TaintDomain.add_node footprint_access_path node' acc
else acc)
(TraceDomain.sources trace).TraceDomain.Sources.footprint access_tree_acc)
access_tree access_tree
in
(* should only be used on nodes associated with a footprint base *)

@ -69,9 +69,9 @@ let tests =
let pp_sources fmt sources =
if MockTrace.Sources.is_empty sources then F.fprintf fmt "?"
else
MockTrace.Sources.iter
MockTrace.Sources.Known.iter
(fun source -> pp_call_site fmt (MockTrace.Source.call_site source))
sources
sources.MockTrace.Sources.known
in
let pp_sinks fmt sinks =
if MockTrace.Sinks.is_empty sinks then F.fprintf fmt "?"

@ -83,6 +83,8 @@ module MockTrace = Trace.Make (struct
[%compare.equal : MockTraceElem.t] (Source.kind source) (Sink.kind sink)
end)
let trace_equal t1 t2 = MockTrace.( <= ) ~lhs:t1 ~rhs:t2 && MockTrace.( <= ) ~lhs:t2 ~rhs:t1
let tests =
let source1 = MockSource.make MockTraceElem.Kind1 CallSite.dummy in
let source2 = MockSource.make MockTraceElem.Kind2 CallSite.dummy in
@ -125,7 +127,7 @@ let tests =
let footprint_trace = MockTrace.of_source footprint_source |> MockTrace.add_sink sink1 in
let expected_trace = MockTrace.of_source source1 |> MockTrace.add_sink sink1 in
assert_bool "Appended trace should contain source and sink"
(MockTrace.equal (MockTrace.append source_trace footprint_trace call_site) expected_trace)
(trace_equal (MockTrace.append source_trace footprint_trace call_site) expected_trace)
in
"append" >:: append_
in

@ -99,7 +99,7 @@ void customGetEnvOk() {
return execl(NULL, source);
}
void exec_flag_bad() { execl(FLAGS_cli_string, NULL); }
void FN_exec_flag_bad() { execl(FLAGS_cli_string, NULL); }
void sql_on_env_var_bad() {
std::string source = (std::string)std::getenv("ENV_VAR");

@ -47,7 +47,6 @@ codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 29, QUANDARY_TAINT_ERR
codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 31, QUANDARY_TAINT_ERROR, [Return from getenv,Call to execve]
codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 33, QUANDARY_TAINT_ERROR, [Return from getenv,Call to system]
codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 35, QUANDARY_TAINT_ERROR, [Return from getenv,Call to popen]
codetoanalyze/cpp/quandary/execs.cpp, execs::exec_flag_bad, 0, QUANDARY_TAINT_ERROR, [Return from execs::exec_flag_bad,Call to execl]
codetoanalyze/cpp/quandary/execs.cpp, execs::sql_on_env_var_bad, 2, QUANDARY_TAINT_ERROR, [Return from getenv,Call to __infer_sql_sink]
codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad1, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream<char,std::char_traits<char>>_read,Call to execle]
codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad2, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream<char,std::char_traits<char>>_readsome,Call to execle]

Loading…
Cancel
Save