[quandary] summaries are access trees too

Summary:
Previously, summaries worked by flattening the access tree representing the post of the procedure into (in essence) a list of functions from caller input traces to callee output traces.
This is inefficient in many ways, and is also much more complex than just using the original access tree as the summary.

One big inefficiency of the old way is this: calling `Trace.append` is slow, and we want to do it as few times as possible.
Under the old summary system, we would do it at most once for each "function" in the summary list.
Now, we'll do it at most once for each node in the access tree summary.
This will be a smaller number of calls, since each node can summarize many input/output relationships.

Reviewed By: jeremydubreil

Differential Revision: D4271579

fbshipit-source-id: 34e407a
master
Sam Blackshear 8 years ago committed by Facebook Github Bot
parent cd1c9750f4
commit 2a567d3abe

@ -234,7 +234,7 @@ module Make (Spec : Spec) = struct
let between_start_and_end passthrough =
let passthrough_line = line_number (Passthrough.site passthrough) in
match passthrough_kind with
| Source -> passthrough_line <= start_line
| Source -> passthrough_line >= end_line
| Sink -> passthrough_line <= end_line
| Top_level -> passthrough_line >= start_line && passthrough_line <= end_line in
Passthrough.Set.filter between_start_and_end passthroughs in

@ -15,11 +15,12 @@ module L = Logging
include
TaintAnalysis.Make(struct
module Trace = CppTrace
module AccessTree = AccessTree.Make(Trace)
let to_summary_trace trace = QuandarySummary.Cpp trace
let to_summary_access_tree tree = QuandarySummary.AccessTree.Cpp tree
let of_summary_trace = function
| QuandarySummary.Cpp trace -> trace
let of_summary_access_tree = function
| QuandarySummary.AccessTree.Cpp tree -> tree
| _ -> assert false
let handle_unknown_call _ _ =

@ -15,11 +15,12 @@ module L = Logging
include
TaintAnalysis.Make(struct
module Trace = JavaTrace
module AccessTree = AccessTree.Make(Trace)
let to_summary_trace trace = QuandarySummary.Java trace
let to_summary_access_tree access_tree = QuandarySummary.AccessTree.Java access_tree
let of_summary_trace = function
| QuandarySummary.Java trace -> trace
let of_summary_access_tree = function
| QuandarySummary.AccessTree.Java access_tree -> access_tree
| _ -> assert false
let handle_unknown_call pname ret_typ_opt =

@ -14,94 +14,19 @@ open! IStd
module F = Format
module L = Logging
type input =
| In_formal of int * AccessPath.t
(** input flows via parameter at given index at offset in given access path *)
| In_global of AccessPath.t
(** input flows from global at offset in given access path *)
| In_empty
(** no input value *)
module Java = AccessTree.Make(JavaTrace)
module Cpp = AccessTree.Make(CppTrace)
type output =
| Out_formal of int * AccessPath.t
(** output flows into parameter at given index at offset in given access path *)
| Out_global of AccessPath.t
(** output flows into global at offset in given access path *)
| Out_return of AccessPath.t
(** output flows into return value at offset in given access path *)
module AccessTree = struct
type t =
| Java of Java.t
| Cpp of Cpp.t
(** enumeration of all the different trace types that are possible (just Java, for now). needed
because we can't functorize Specs.summary's *)
type summary_trace =
| Java of JavaTrace.t
| Cpp of CppTrace.t
| Unknown
let pp fmt = function
| Java access_tree -> Java.pp fmt access_tree
| Cpp access_tree -> Cpp.pp fmt access_tree
end
(** input-output summary for a pair of values. intuitively, read it as
"if [input] is associated with trace T before the call, then [output] is
associated with trace (T + [output_trace]) after the call" (where `+` denotes trace
concatenation). *)
type in_out_summary =
{
input : input;
output : output;
output_trace : summary_trace;
}
type t = AccessTree.t
type t = in_out_summary list
let make_formal_input index access_path =
In_formal (index, access_path)
let make_global_input access_path =
In_global access_path
let empty_input = In_empty
let make_formal_output index access_path =
Out_formal (index, access_path)
let make_global_output access_path =
Out_global access_path
let make_return_output access_path =
Out_return access_path
let make_in_out_summary input output output_trace =
{ input; output; output_trace; }
let pp_trace fmt = function
| Java trace -> JavaTrace.pp fmt trace
| Cpp trace -> CppTrace.pp fmt trace
| Unknown -> assert false
let pp_input fmt = function
| In_formal (index, access_path) -> F.fprintf fmt "%a (formal %d)" AccessPath.pp access_path index
| In_global access_path -> F.fprintf fmt "%a (global)" AccessPath.pp access_path
| In_empty -> F.fprintf fmt "_"
let pp_output fmt = function
| Out_formal (index, access_path) ->
F.fprintf fmt "%a (formal %d)" AccessPath.pp access_path index
| Out_global access_path ->
F.fprintf fmt "%a (global)" AccessPath.pp access_path
| Out_return access_path ->
F.fprintf fmt "%a (return)" AccessPath.pp access_path
let pp_in_out_summary fmt summary =
match summary.input with
| In_empty ->
F.fprintf fmt
"%a => (%a, %a)"
pp_input summary.input
pp_output summary.output
pp_trace summary.output_trace
| In_formal _ | In_global _ ->
F.fprintf fmt
"(%a, T) => (%a, T :: %a)"
pp_input summary.input
pp_output summary.output
pp_trace summary.output_trace
let pp fmt t =
PrettyPrintable.pp_collection ~pp_item:pp_in_out_summary fmt t
let pp = AccessTree.pp

@ -14,65 +14,15 @@ open! IStd
module F = Format
type input =
private
| In_formal of int * AccessPath.t
(** input flows via parameter at given index at offset in given access path *)
| In_global of AccessPath.t
(** input flows from global at offset in given access path *)
| In_empty
(** no input value *)
module Java : module type of (AccessTree.Make(JavaTrace))
module Cpp : module type of (AccessTree.Make(CppTrace))
type output =
private
| Out_formal of int * AccessPath.t
(** output flows into parameter at given index at offset in given access path *)
| Out_global of AccessPath.t
(** output flows into global at offset in given access path *)
| Out_return of AccessPath.t
(** output flows into return value at offset in given access path *)
module AccessTree : sig
type t =
| Java of Java.t
| Cpp of Cpp.t
end
(** enumeration of all the different trace types that are possible (just Java, for now). needed
because we can't functorize Specs.summary's *)
type summary_trace =
| Java of JavaTrace.t
| Cpp of CppTrace.t
| Unknown
(** input-output summary for a pair of values. intuitively, read it as
"if [input] is associated with trace T before the call, then [output] is
associated with trace (T + [output_trace]) after the call" (where `+` denotes trace
concatenation). *)
type in_out_summary =
private
{
input : input;
output : output;
output_trace : summary_trace;
}
type t = in_out_summary list
val make_formal_input : int -> AccessPath.t -> input
val make_global_input : AccessPath.t -> input
val empty_input : input
val make_formal_output : int -> AccessPath.t -> output
val make_global_output : AccessPath.t -> output
val make_return_output : AccessPath.t -> output
val make_in_out_summary : input -> output -> summary_trace -> in_out_summary
val pp_trace : F.formatter -> summary_trace -> unit
val pp_input : F.formatter -> input -> unit
val pp_output : F.formatter -> output -> unit
val pp_in_out_summary : F.formatter -> in_out_summary -> unit
type t = AccessTree.t
val pp : F.formatter -> t -> unit

@ -12,7 +12,15 @@ open! IStd
module F = Format
module L = Logging
(** Create a taint analysis from a trace domain *)
(** Create a taint analysis from a specification *)
module Make (TaintSpecification : TaintSpec.S) = struct
module TraceDomain = TaintSpecification.Trace
module TaintDomain = TaintSpecification.AccessTree
module IdMapDomain = IdAccessPathMapDomain
(** map from formals to their position *)
type formal_map = int AccessPath.BaseMap.t
module Summary = Summary.Make(struct
type summary = QuandarySummary.t
@ -22,22 +30,10 @@ module Summary = Summary.Make(struct
let read_from_payload payload =
match payload.Specs.quandary with
| None ->
(* abstract/native/interface methods will have None as a summary. treat them as skip *)
Some []
| summary_opt ->
summary_opt
| None -> Some (TaintSpecification.to_summary_access_tree TaintDomain.initial)
| summary_opt -> summary_opt
end)
module Make (TaintSpecification : TaintSpec.S) = struct
module TraceDomain = TaintSpecification.Trace
module TaintDomain = AccessTree.Make (TraceDomain)
module IdMapDomain = IdAccessPathMapDomain
(** map from formals to their position *)
type formal_map = int AccessPath.BaseMap.t
module Domain = struct
type astate =
{
@ -82,6 +78,12 @@ module Make (TaintSpecification : TaintSpec.S) = struct
| Var.ProgramVar pvar -> Pvar.is_global pvar
| Var.LogicalVar _ -> false
let make_footprint_var formal_num =
Var.of_id (Ident.create_footprint Ident.name_spec formal_num)
let make_footprint_access_path formal_num access_path =
AccessPath.with_base_var (make_footprint_var formal_num) access_path
module TransferFunctions (CFG : ProcCfg.S) = struct
module CFG = CFG
module Domain = Domain
@ -104,13 +106,22 @@ module Make (TaintSpecification : TaintSpec.S) = struct
match TaintDomain.get_node access_path access_tree with
| Some _ as node_opt ->
node_opt
| None when is_rooted_in_environment access_path proc_data.extras ->
let call_site = CallSite.make (Procdesc.get_proc_name proc_data.ProcData.pdesc) loc in
let trace =
TraceDomain.of_source (TraceDomain.Source.make_footprint access_path call_site) in
Some (TaintDomain.make_normal_leaf trace)
| None ->
None
let make_footprint_trace footprint_ap =
let call_site =
CallSite.make (Procdesc.get_proc_name proc_data.ProcData.pdesc) loc in
let trace =
TraceDomain.of_source
(TraceDomain.Source.make_footprint footprint_ap call_site) in
Some (TaintDomain.make_normal_leaf trace) in
let root, _ = AccessPath.extract access_path in
try
let formal_num = AccessPath.BaseMap.find root proc_data.extras in
make_footprint_trace (make_footprint_access_path formal_num access_path)
with Not_found ->
if is_global root
then make_footprint_trace access_path
else None
(* get the trace associated with [access_path] in [access_tree]. *)
let access_path_get_trace access_path access_tree proc_data loc =
@ -159,9 +170,10 @@ module Make (TaintSpecification : TaintSpec.S) = struct
let trace_of_pname pname =
match Summary.read_summary proc_data.pdesc pname with
| Some summary ->
let join_output_trace acc { QuandarySummary.output_trace; } =
TraceDomain.join (TaintSpecification.of_summary_trace output_trace) acc in
IList.fold_left join_output_trace TraceDomain.initial summary
TaintDomain.fold
(fun acc _ trace -> TraceDomain.join trace acc)
(TaintSpecification.of_summary_access_tree summary)
TraceDomain.initial
| None ->
TraceDomain.initial in
@ -219,81 +231,95 @@ module Make (TaintSpecification : TaintSpec.S) = struct
{ astate with Domain.access_tree = access_tree'; }
let apply_summary
ret_id
ret_opt
actuals
summary
(astate_in : Domain.astate)
proc_data
callee_site =
let callee_loc = CallSite.loc callee_site in
let caller_access_tree = astate_in.access_tree in
let apply_return ret_ap = function
let get_caller_ap formal_ap =
let apply_return ret_ap = match ret_opt with
| Some (ret_id, _) -> AccessPath.with_base_var (Var.of_id ret_id) ret_ap
| None -> failwith "Have summary for retval, but no ret id to bind it to!" in
let get_actual_ap_trace formal_num formal_ap access_tree =
let get_actual_ap formal_num =
let f_resolve_id = resolve_id astate_in.id_map in
try
let actual_exp, actual_typ =
try IList.nth actuals formal_num
with Failure _ -> failwithf "Bad formal number %d" formal_num in
AccessPath.of_lhs_exp actual_exp actual_typ ~f_resolve_id in
IList.nth actuals formal_num in
AccessPath.of_lhs_exp actual_exp actual_typ ~f_resolve_id
with Failure _ ->
None in
let project ~formal_ap ~actual_ap =
let projected_ap = AccessPath.append actual_ap (snd (AccessPath.extract formal_ap)) in
if AccessPath.is_exact formal_ap
then AccessPath.Exact projected_ap
else AccessPath.Abstracted projected_ap in
match get_actual_ap formal_num with
let base_var, _ = fst (AccessPath.extract formal_ap) in
match base_var with
| Var.ProgramVar pvar ->
if Pvar.is_return pvar
then Some (apply_return formal_ap)
else Some formal_ap
| Var.LogicalVar id ->
begin
(* summaries store the index of the formal parameter in the ident stamp *)
match get_actual_ap (Ident.get_stamp id) with
| Some actual_ap ->
let projected_ap = project ~formal_ap ~actual_ap in
let projected_trace =
access_path_get_trace projected_ap access_tree proc_data callee_loc in
Some (projected_ap, projected_trace)
Some projected_ap
| None ->
None in
None
end in
let apply_one access_tree (in_out_summary : QuandarySummary.in_out_summary) =
let in_trace = match in_out_summary.input with
| In_empty ->
TraceDomain.initial
| In_formal (formal_num, formal_ap) ->
begin
match get_actual_ap_trace formal_num formal_ap access_tree with
| Some (_, actual_trace) -> actual_trace
| None -> TraceDomain.initial
end
| In_global global_ap ->
access_path_get_trace global_ap access_tree proc_data callee_loc in
let caller_ap_trace_opt =
match in_out_summary.output with
| Out_return ret_ap ->
let caller_ret_ap = apply_return ret_ap ret_id in
let ret_trace =
access_path_get_trace caller_ret_ap access_tree proc_data callee_loc in
Some (caller_ret_ap, ret_trace)
| Out_formal (formal_num, formal_ap) ->
get_actual_ap_trace formal_num formal_ap access_tree
| Out_global global_ap ->
let global_trace = access_path_get_trace global_ap access_tree proc_data callee_loc in
Some (global_ap, global_trace) in
match caller_ap_trace_opt with
| Some (caller_ap, caller_trace) ->
let output_trace = TaintSpecification.of_summary_trace in_out_summary.output_trace in
let appended_trace = TraceDomain.append in_trace output_trace callee_site in
let joined_trace = TraceDomain.join caller_trace appended_trace in
if phys_equal appended_trace joined_trace
then
access_tree
else
begin
report_trace joined_trace callee_site proc_data;
TaintDomain.add_trace caller_ap joined_trace access_tree
end
let get_caller_ap_node ap =
match get_caller_ap ap with
| Some caller_ap ->
let caller_node_opt =
access_path_get_node caller_ap caller_access_tree proc_data callee_loc in
let caller_node = match caller_node_opt with
| Some caller_node -> caller_node | None -> TaintDomain.empty_node in
caller_ap, caller_node
| None ->
access_tree in
ap, TaintDomain.empty_node in
let access_tree = IList.fold_left apply_one astate_in.access_tree summary in
let replace_footprint_sources callee_trace caller_trace =
let replace_footprint_source source acc =
match TraceDomain.Source.get_footprint_access_path source with
| Some footprint_access_path ->
let _, (caller_ap_trace, _) = get_caller_ap_node footprint_access_path in
TraceDomain.join caller_ap_trace acc
| None ->
acc in
TraceDomain.Sources.fold
replace_footprint_source (TraceDomain.sources callee_trace) caller_trace in
let add_to_caller_tree acc callee_ap callee_trace =
let can_assign ap =
let (base, _), accesses = AccessPath.extract ap in
match base with
| Var.LogicalVar id ->
(* Java is pass-by-value, so we can't assign directly to a formal *)
Ident.is_footprint id && accesses <> []
| Var.ProgramVar pvar ->
(* can't assign to callee locals in the caller *)
Pvar.is_global pvar || Pvar.is_return pvar in
let caller_ap, (caller_trace, caller_tree) = get_caller_ap_node callee_ap in
let caller_trace' = replace_footprint_sources callee_trace caller_trace in
let appended_trace =
TraceDomain.append caller_trace' callee_trace callee_site in
report_trace appended_trace callee_site proc_data;
if can_assign callee_ap
then TaintDomain.add_node caller_ap (appended_trace, caller_tree) acc
else acc in
let access_tree =
TaintDomain.fold
add_to_caller_tree
(TaintSpecification.of_summary_access_tree summary)
caller_access_tree in
{ astate_in with access_tree; }
let exec_instr (astate : Domain.astate) (proc_data : formal_map ProcData.t) _ instr =
@ -305,7 +331,7 @@ module Make (TaintSpecification : TaintSpec.S) = struct
analyze_id_assignment (Var.of_pvar lhs_pvar) rhs_exp lhs_typ astate
| Sil.Store (Exp.Lvar lhs_pvar, _, Exp.Exn _, _) when Pvar.is_return lhs_pvar ->
(* the Java frontend translates `throw Exception` as `return Exception`, which is a bit
wonky. this tranlsation causes problems for us in computing a summary when an
wonky. this translation causes problems for us in computing a summary when an
exception is "returned" from a void function. skip code like this for now
(fix via t14159157 later *)
astate
@ -470,83 +496,21 @@ module Make (TaintSpecification : TaintSpec.S) = struct
(Scheduler.ReversePostorder)
(TransferFunctions)
(** grab footprint traces in [access_tree] and make them into inputs for the summary. for each
trace Footprint(T_out, a.b.c) associated with access path x.z.y, we will produce a summary of
the form (x.z.y, T_in) => (T_in + T_out, a.b.c) *)
let make_summary formal_map access_tree =
let is_return (var, _) = match var with
| Var.ProgramVar pvar -> Pvar.is_return pvar
| Var.LogicalVar _ -> false in
let add_summaries_for_trace summary_acc access_path trace =
let summary_trace = TaintSpecification.to_summary_trace trace in
let output_opt =
let base, accesses = AccessPath.extract access_path in
match AccessPath.BaseMap.find base formal_map with
| index ->
(* Java is pass-by-value. thus, summaries should not include assignments to the formal
parameters (first part of the check) . however, they should reflect when a formal
passes through a sink (second part of the check) *)
if accesses = [] && TraceDomain.Sinks.is_empty (TraceDomain.sinks trace)
(* TODO: and if [base] is not passed by reference, for C/C++/Obj-C *)
then None
else Some (QuandarySummary.make_formal_output index access_path)
| exception Not_found ->
if is_return base
then Some (QuandarySummary.make_return_output access_path)
else if is_global base
then Some (QuandarySummary.make_global_output access_path)
else None in
let add_summary_for_source source acc =
match TraceDomain.Source.get_footprint_access_path source with
| Some footprint_ap ->
let footprint_ap_base = fst (AccessPath.extract footprint_ap) in
begin
match AccessPath.BaseMap.find footprint_ap_base formal_map with
| formal_index ->
let input = QuandarySummary.make_formal_input formal_index footprint_ap in
begin
match output_opt with
| Some output ->
(QuandarySummary.make_in_out_summary input output summary_trace) :: acc
| None ->
if not (TraceDomain.Sinks.is_empty (TraceDomain.sinks trace))
then
let output =
QuandarySummary.make_formal_output formal_index footprint_ap in
(QuandarySummary.make_in_out_summary input output summary_trace) :: acc
else
(* output access path is same as input access path and there were no sinks
in this function. summary would be the identity function *)
acc
end
| exception Not_found ->
if is_global footprint_ap_base
then
let input = QuandarySummary.make_global_input footprint_ap in
let output =
match output_opt with
| Some output -> output
| None -> QuandarySummary.make_global_output footprint_ap in
(QuandarySummary.make_in_out_summary input output summary_trace) :: acc
else
failwithf
"Couldn't find formal number for %a@." AccessPath.pp_base footprint_ap_base
end
| None ->
begin
match output_opt with
| Some output ->
let summary =
QuandarySummary.make_in_out_summary
QuandarySummary.empty_input output summary_trace in
summary :: acc
| None ->
acc
end in
let access_tree' =
AccessPath.BaseMap.fold
(fun base node acc ->
let base' =
try
let formal_index = AccessPath.BaseMap.find base formal_map in
make_footprint_var formal_index, snd base
with Not_found ->
base in
AccessPath.BaseMap.add base' node acc)
access_tree
TaintDomain.initial in
TraceDomain.Source.Set.fold add_summary_for_source (TraceDomain.sources trace) summary_acc in
TaintDomain.fold add_summaries_for_trace access_tree []
TaintSpecification.to_summary_access_tree access_tree'
module Interprocedural = AbstractInterpreter.Interprocedural(Summary)

@ -18,14 +18,13 @@ type handle_unknown =
module type S = sig
module Trace : Trace.S
module AccessTree : module type of AccessTree.Make(Trace)
(** return a summary for handling an unknown call at the given site with the given return type
and actuals *)
val handle_unknown_call : Procname.t -> Typ.t option -> handle_unknown list
(** convert a trace type into a summary trace. can be killed if we functorize specs.ml *)
val to_summary_trace : Trace.t -> QuandarySummary.summary_trace
val to_summary_access_tree : AccessTree.t -> QuandarySummary.AccessTree.t
(** convert summary trace into a trace type. can be killed if we functorized specs.ml *)
val of_summary_trace : QuandarySummary.summary_trace -> Trace.t
val of_summary_access_tree : QuandarySummary.AccessTree.t -> AccessTree.t
end

@ -64,9 +64,10 @@ module MockTrace = Trace.Make(struct
module MockTaintAnalysis = TaintAnalysis.Make(struct
module Trace = MockTrace
module AccessTree = AccessTree.Make(Trace)
let of_summary_trace _ = assert false
let to_summary_trace _ = assert false
let of_summary_access_tree _ = assert false
let to_summary_access_tree _ = assert false
let handle_unknown_call _ _ = []
end)

@ -115,7 +115,7 @@ codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.callSinkP
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.callSinkParam2Bad(), 1, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void Interprocedural.callSinkParam2(Object,Object),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.callSinkVariadicBad(), 1, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void Interprocedural.callSinkVariadic(java.lang.Object[]),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.doublePassthroughBad(), 4, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),flow through Object Interprocedural.id(Object),flow through Object Interprocedural.id(Object),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.getGlobalThenCallSinkBad(), 2, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),flow through void Interprocedural.getGlobalThenCallSink(),call to void Interprocedural.getGlobalThenCallSink(),flow through Object Interprocedural.getGlobal(),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.getGlobalThenCallSinkBad(), 2, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void Interprocedural.getGlobalThenCallSink(),flow through Object Interprocedural.getGlobal(),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.returnSourceDirectBad(), 1, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),return from Object Interprocedural.returnSourceDirect(),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.returnSourceDirectViaVarBad(), 2, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),return from Object Interprocedural.returnSourceDirect(),call to void InferTaint.inferSensitiveSink(Object)]
codetoanalyze/java/quandary/Interprocedural.java, void Interprocedural.returnSourceIndirectBad(), 1, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),return from Object Interprocedural.returnSourceDirect(),return from Object Interprocedural.returnSourceIndirect(),call to void InferTaint.inferSensitiveSink(Object)]

Loading…
Cancel
Save