From 14aef012f6b9ae32abd469a6d6d7e1c67f3874be Mon Sep 17 00:00:00 2001 From: Sam Blackshear Date: Wed, 6 Sep 2017 14:04:16 -0700 Subject: [PATCH] [quandary] allow specifying globals as sources Summary: We supported globals as sources before, but we did so by allowing ClangTrace etc. to match against any access path in the footprint of the trace. This is very powerful/flexible, but it's ultimately not a good idea because it leads to traces that are hard to read. This is because a footprint source doesn't have any information about its provenance: we might know that the value came from a global, but we don't know where the read occurred. The mechanism for handling procedure calls as sources already knows how to solve this problem. This diff implements globals as sources as a special case of procedure call sources instead. This will give us much nicer traces with full provenance of the read from the global. Reviewed By: mbouaziz Differential Revision: D5772299 fbshipit-source-id: 491ae81 --- infer/src/IR/BUILTINS.ml | 2 + infer/src/IR/BuiltinDecl.ml | 2 + infer/src/backend/BuiltinDefn.ml | 3 + infer/src/checkers/SinkTrace.ml | 2 - infer/src/checkers/Trace.ml | 11 --- infer/src/checkers/Trace.mli | 4 -- infer/src/quandary/ClangTrace.ml | 69 +++++++++++-------- infer/src/quandary/JavaTrace.ml | 2 - infer/src/quandary/TaintAnalysis.ml | 32 +++++++-- infer/src/unit/TaintTests.ml | 2 - infer/src/unit/TraceTests.ml | 2 - .../codetoanalyze/cpp/quandary/execs.cpp | 10 +++ .../codetoanalyze/cpp/quandary/issues.exp | 3 +- 13 files changed, 85 insertions(+), 59 deletions(-) diff --git a/infer/src/IR/BUILTINS.ml b/infer/src/IR/BUILTINS.ml index 6d91833ac..2b1218099 100644 --- a/infer/src/IR/BUILTINS.ml +++ b/infer/src/IR/BUILTINS.ml @@ -44,6 +44,8 @@ module type S = sig val __get_type_of : t + val __global_access : t + val __infer_assume : t val __infer_fail : t diff --git a/infer/src/IR/BuiltinDecl.ml b/infer/src/IR/BuiltinDecl.ml index d95ed1bec..ba7b40c3d 100644 --- a/infer/src/IR/BuiltinDecl.ml +++ b/infer/src/IR/BuiltinDecl.ml @@ -60,6 +60,8 @@ let __get_hidden_field = create_procname "__get_hidden_field" let __get_type_of = create_procname "__get_type_of" +let __global_access = create_procname "__global_access" + let __infer_assume = create_procname "__infer_assume" let __infer_fail = create_procname "__infer_fail" diff --git a/infer/src/backend/BuiltinDefn.ml b/infer/src/backend/BuiltinDefn.ml index 0fb26c534..d636e83e4 100644 --- a/infer/src/backend/BuiltinDefn.ml +++ b/infer/src/backend/BuiltinDefn.ml @@ -1064,6 +1064,9 @@ let __get_hidden_field = Builtin.register BuiltinDecl.__get_hidden_field execute let __get_type_of = Builtin.register BuiltinDecl.__get_type_of execute___get_type_of +(* only used in Quandary, so ok to skip *) +let __global_access = Builtin.register BuiltinDecl.__global_access execute_skip + (* infer assume, diverging on inconsistencies *) let __infer_assume = Builtin.register BuiltinDecl.__infer_assume execute___infer_assume diff --git a/infer/src/checkers/SinkTrace.ml b/infer/src/checkers/SinkTrace.ml index 53e09dc48..73d5bf0cf 100644 --- a/infer/src/checkers/SinkTrace.ml +++ b/infer/src/checkers/SinkTrace.ml @@ -43,8 +43,6 @@ module Make (TraceElem : TraceElem.S) = struct module Sink = MakeSink (TraceElem) let should_report _ _ = true - - let should_report_footprint _ _ = false end) type sink_path = Passthroughs.t * (Sink.t * Passthroughs.t) list diff --git a/infer/src/checkers/Trace.ml b/infer/src/checkers/Trace.ml index 6cd300e94..4d66cc30d 100644 --- a/infer/src/checkers/Trace.ml +++ b/infer/src/checkers/Trace.ml @@ -18,8 +18,6 @@ module type Spec = sig val should_report : Source.t -> Sink.t -> bool (** should a flow originating at source and entering sink be reported? *) - - val should_report_footprint : AccessPath.Abs.t -> Sink.t -> bool end module type S = sig @@ -291,17 +289,8 @@ module Make (Spec : Spec) = struct in Sinks.fold report_one sinks acc0 in - let report_footprint acc0 footprint_access_path (is_mem, _) = - let report_one sink acc = - if is_mem && Spec.should_report_footprint footprint_access_path sink then - (Footprint footprint_access_path, sink, t.passthroughs) :: acc - else acc - in - Sinks.fold report_one t.sinks acc0 - in let report_sources source acc = report_source source t.sinks acc in Sources.Known.fold report_sources t.sources.known [] - |> Sources.Footprint.fold report_footprint t.sources.footprint let pp_path_source fmt = function | Known source diff --git a/infer/src/checkers/Trace.mli b/infer/src/checkers/Trace.mli index a2290875d..5d7b8354f 100644 --- a/infer/src/checkers/Trace.mli +++ b/infer/src/checkers/Trace.mli @@ -18,10 +18,6 @@ module type Spec = sig val should_report : Source.t -> Sink.t -> bool (** should a flow originating at source and entering sink be reported? *) - - val should_report_footprint : AccessPath.Abs.t -> Sink.t -> bool - (** should a flow read from the environment via the given access path and entering sink be - reported? *) end module type S = sig diff --git a/infer/src/quandary/ClangTrace.ml b/infer/src/quandary/ClangTrace.ml index 855d8a833..c2a731001 100644 --- a/infer/src/quandary/ClangTrace.ml +++ b/infer/src/quandary/ClangTrace.ml @@ -13,6 +13,7 @@ module L = Logging module SourceKind = struct type t = + | CommandLineFlag of Var.t (** source that was read from a command line flag *) | Endpoint of (Mangled.t * Typ.desc) (** source originating from formal of an endpoint *) | EnvironmentVariable (** source that was read from an environment variable *) | File (** source that was read from a file *) @@ -20,6 +21,8 @@ module SourceKind = struct [@@deriving compare] let of_string = function + | "CommandLineFlag" + -> L.die UserError "User-specified CommandLineFlag sources are not supported" | "Endpoint" -> Endpoint (Mangled.from_string "NONE", Typ.Tvoid) | "EnvironmentVariable" @@ -51,7 +54,7 @@ module SourceKind = struct else None) external_sources - let get pname _ _ = + let get pname actuals _ = let return = None in match pname with | Typ.Procname.ObjC_Cpp cpp_name @@ -67,6 +70,27 @@ module SourceKind = struct -> Some (File, Some 1) | _ -> get_external_source qualified_pname ) + | Typ.Procname.C _ when Typ.Procname.equal pname BuiltinDecl.__global_access + -> ( + (* is this var a command line flag created by the popular C++ gflags library for creating + command-line flags (https://github.com/gflags/gflags)? *) + let is_gflag access_path = + let pvar_is_gflag pvar = + String.is_substring ~substring:"FLAGS_" (Pvar.get_simplified_name pvar) + in + match access_path with + | (Var.ProgramVar pvar, _), _ + -> Pvar.is_global pvar && pvar_is_gflag pvar + | _ + -> false + in + (* accessed global will be passed to us as the only parameter *) + match actuals with + | [(HilExp.AccessPath access_path)] when is_gflag access_path + -> let (global_pvar, _), _ = access_path in + Some (CommandLineFlag global_pvar, None) + | _ + -> None ) | Typ.Procname.C _ -> ( match Typ.Procname.to_string pname with | "getenv" @@ -98,17 +122,17 @@ module SourceKind = struct | _ -> Source.all_formals_untainted pdesc - let pp fmt kind = - F.fprintf fmt "%s" - ( match kind with - | Endpoint (formal_name, _) - -> F.sprintf "Endpoint[%s]" (Mangled.to_string formal_name) - | EnvironmentVariable - -> "EnvironmentVariable" - | File - -> "File" - | Other - -> "Other" ) + let pp fmt = function + | Endpoint (formal_name, _) + -> F.fprintf fmt "Endpoint[%s]" (Mangled.to_string formal_name) + | EnvironmentVariable + -> F.fprintf fmt "EnvironmentVariable" + | File + -> F.fprintf fmt "File" + | CommandLineFlag var + -> F.fprintf fmt "CommandLineFlag[%a]" Var.pp var + | Other + -> F.fprintf fmt "Other" end module CppSource = Source.Make (SourceKind) @@ -254,27 +278,12 @@ include Trace.Make (struct | (Endpoint _ | EnvironmentVariable | File), Allocation -> (* untrusted data flowing to memory allocation *) true + | CommandLineFlag _, (Allocation | BufferAccess | Other | ShellExec | SQL) + -> (* data controlled by a command line flag flowing somewhere sensitive *) + true | Other, _ -> (* Other matches everything *) true | _, Other -> true - - let should_report_footprint footprint_access_path sink = - (* is this var a command line flag created by the popular C++ gflags library for creating - command-line flags (https://github.com/gflags/gflags)? *) - let is_gflag access_path = - let pvar_is_gflag pvar = - String.is_substring ~substring:"FLAGS_" (Pvar.get_simplified_name pvar) - in - match AccessPath.Abs.extract access_path with - | (Var.ProgramVar pvar, _), _ - -> Pvar.is_global pvar && pvar_is_gflag pvar - | _ - -> false - in - match Sink.kind sink - with Allocation | BufferAccess | Other | ShellExec | SQL -> - (* gflags globals come from the environment; treat them as sources for everything *) - is_gflag footprint_access_path end) diff --git a/infer/src/quandary/JavaTrace.ml b/infer/src/quandary/JavaTrace.ml index 04b55398c..8b94377fe 100644 --- a/infer/src/quandary/JavaTrace.ml +++ b/infer/src/quandary/JavaTrace.ml @@ -335,6 +335,4 @@ include Trace.Make (struct true | _ -> false - - let should_report_footprint _ _ = false end) diff --git a/infer/src/quandary/TaintAnalysis.ml b/infer/src/quandary/TaintAnalysis.ml index ced711497..a74829492 100644 --- a/infer/src/quandary/TaintAnalysis.ml +++ b/infer/src/quandary/TaintAnalysis.ml @@ -381,10 +381,30 @@ module Make (TaintSpecification : TaintSpec.S) = struct in List.fold ~f:add_sinks_for_access ~init:astate accesses in - let add_sinks_for_exp exp loc astate = + let add_sources_for_access_path ((var, _), _ as ap) loc astate = + L.d_strln "adding sources for access path" ; + if Var.is_global var then + let dummy_call_site = CallSite.make BuiltinDecl.__global_access loc in + match TraceDomain.Source.get dummy_call_site [HilExp.AccessPath ap] proc_data.tenv with + | Some {TraceDomain.Source.source} + -> L.d_strln "Got source to add" ; + let access_path = AccessPath.Abs.Exact ap in + let trace, subtree = + Option.value ~default:TaintDomain.empty_node + (TaintDomain.get_node access_path astate) + in + TaintDomain.add_node access_path (TraceDomain.add_source source trace, subtree) + astate + | None + -> astate + else astate + in + let add_sources_sinks_for_exp exp loc astate = + L.d_strln "adding source/sinks" ; match exp with | HilExp.AccessPath access_path -> add_sinks_for_access_path access_path loc astate + |> add_sources_for_access_path access_path loc | _ -> astate in @@ -408,13 +428,15 @@ module Make (TaintSpecification : TaintSpec.S) = struct `return null` in a void function *) astate | Assign (lhs_access_path, rhs_exp, loc) - -> add_sinks_for_exp rhs_exp loc astate |> add_sinks_for_access_path lhs_access_path loc - |> exec_write lhs_access_path rhs_exp + -> add_sources_sinks_for_exp rhs_exp loc astate + |> add_sinks_for_access_path lhs_access_path loc |> exec_write lhs_access_path rhs_exp | Assume (assume_exp, _, _, loc) - -> add_sinks_for_exp assume_exp loc astate + -> add_sources_sinks_for_exp assume_exp loc astate | Call (ret_opt, Direct called_pname, actuals, call_flags, callee_loc) -> let astate = - List.fold ~f:(fun acc exp -> add_sinks_for_exp exp callee_loc acc) actuals ~init:astate + List.fold + ~f:(fun acc exp -> add_sources_sinks_for_exp exp callee_loc acc) + actuals ~init:astate in let handle_model callee_pname access_tree model = let is_variadic = diff --git a/infer/src/unit/TaintTests.ml b/infer/src/unit/TaintTests.ml index 2c439ea19..91fe34eb6 100644 --- a/infer/src/unit/TaintTests.ml +++ b/infer/src/unit/TaintTests.ml @@ -34,8 +34,6 @@ module MockTrace = Trace.Make (struct end) let should_report _ _ = false - - let should_report_footprint _ _ = false end) module MockTaintAnalysis = TaintAnalysis.Make (struct diff --git a/infer/src/unit/TraceTests.ml b/infer/src/unit/TraceTests.ml index ab761cd34..e91da2997 100644 --- a/infer/src/unit/TraceTests.ml +++ b/infer/src/unit/TraceTests.ml @@ -77,8 +77,6 @@ module MockTrace = Trace.Make (struct let should_report source sink = [%compare.equal : MockTraceElem.t] (Source.kind source) (Sink.kind sink) - - let should_report_footprint _ _ = false end) let trace_equal t1 t2 = MockTrace.( <= ) ~lhs:t1 ~rhs:t2 && MockTrace.( <= ) ~lhs:t2 ~rhs:t1 diff --git a/infer/tests/codetoanalyze/cpp/quandary/execs.cpp b/infer/tests/codetoanalyze/cpp/quandary/execs.cpp index 90c835b9f..17c113220 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/execs.cpp +++ b/infer/tests/codetoanalyze/cpp/quandary/execs.cpp @@ -101,6 +101,16 @@ void customGetEnvOk() { void exec_flag_bad() { execl(FLAGS_cli_string, NULL); } +char* return_global() { + char* local = FLAGS_cli_string; + return local; +} + +void exec_flag_interproc_bad() { + char* flag = return_global(); + execl(flag, NULL); +} + void sql_on_env_var_bad() { std::string source = (std::string)std::getenv("ENV_VAR"); __infer_sql_sink(source, 0); diff --git a/infer/tests/codetoanalyze/cpp/quandary/issues.exp b/infer/tests/codetoanalyze/cpp/quandary/issues.exp index 9a8907af6..ec6b2c83d 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/issues.exp +++ b/infer/tests/codetoanalyze/cpp/quandary/issues.exp @@ -54,7 +54,8 @@ codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 29, QUANDARY_TAINT_ERR codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 31, QUANDARY_TAINT_ERROR, [Return from getenv,Call to execve] codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 33, QUANDARY_TAINT_ERROR, [Return from getenv,Call to system] codetoanalyze/cpp/quandary/execs.cpp, execs::callExecBad, 35, QUANDARY_TAINT_ERROR, [Return from getenv,Call to popen] -codetoanalyze/cpp/quandary/execs.cpp, execs::exec_flag_bad, 0, QUANDARY_TAINT_ERROR, [Read from &#GB$execs::FLAGS_cli_string*,Call to execl] +codetoanalyze/cpp/quandary/execs.cpp, execs::exec_flag_bad, 0, QUANDARY_TAINT_ERROR, [Return from __global_access,Call to execl] +codetoanalyze/cpp/quandary/execs.cpp, execs::exec_flag_interproc_bad, 2, QUANDARY_TAINT_ERROR, [Return from __global_access with tainted data &return,Return from execs::return_global,Call to execl] codetoanalyze/cpp/quandary/execs.cpp, execs::sql_on_env_var_bad, 2, QUANDARY_TAINT_ERROR, [Return from getenv,Call to __infer_sql_sink] codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad1, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream>_read,Call to execle] codetoanalyze/cpp/quandary/files.cpp, files::read_file_call_exec_bad2, 5, QUANDARY_TAINT_ERROR, [Return from std::basic_istream>_readsome,Call to execle]