diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index 3dafe94e9..9a4081eb2 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -1255,6 +1255,11 @@ and quandary_endpoints = ~in_help:CLOpt.[Analyze, manual_quandary] "Specify endpoint classes for Quandary" +and quandary_sanitizers = + CLOpt.mk_json ~long:"quandary-sanitizers" + ~in_help:CLOpt.[Analyze, manual_quandary] + "Specify custom sanitizers for Quandary" + and quandary_sources = CLOpt.mk_json ~long:"quandary-sources" ~in_help:CLOpt.[Analyze, manual_quandary] @@ -1847,6 +1852,7 @@ and procs_xml = !procs_xml and project_root = !project_root and quandary = !quandary and quandary_endpoints = !quandary_endpoints +and quandary_sanitizers = !quandary_sanitizers and quandary_sources = !quandary_sources and quandary_sinks = !quandary_sinks and quiet = !quiet diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 13f7f7697..1052578b6 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -300,6 +300,7 @@ val procs_xml : string option val project_root : string val quandary : bool val quandary_endpoints : Yojson.Basic.json +val quandary_sanitizers : Yojson.Basic.json val quandary_sources : Yojson.Basic.json val quandary_sinks : Yojson.Basic.json val quiet : bool diff --git a/infer/src/quandary/ClangTaintAnalysis.ml b/infer/src/quandary/ClangTaintAnalysis.ml index 1921fadb5..397d0ce4b 100644 --- a/infer/src/quandary/ClangTaintAnalysis.ml +++ b/infer/src/quandary/ClangTaintAnalysis.ml @@ -64,9 +64,23 @@ include [TaintSpec.Propagate_to_receiver; TaintSpec.Propagate_to_return] | "sprintf" -> [TaintSpec.Propagate_to_receiver] - | other -> - L.d_strln ("generic unknown " ^ other); + | _ -> handle_generic_unknown ret_typ_opt actuals + let external_sanitizers = + List.map + ~f:(fun { QuandaryConfig.Sanitizer.procedure; } -> + QualifiedCppName.Match.of_fuzzy_qual_names [procedure]) + (QuandaryConfig.Sanitizer.of_json Config.quandary_sanitizers) + + let get_sanitizer pname = + let qualified_pname = Typ.Procname.get_qualifiers pname in + List.find_map + ~f:(fun qualifiers -> + if QualifiedCppName.Match.match_qualifiers qualifiers qualified_pname + then Some TaintSpec.Return + else None) + external_sanitizers + let is_taintable_type _ = true end) diff --git a/infer/src/quandary/JavaTaintAnalysis.ml b/infer/src/quandary/JavaTaintAnalysis.ml index ce750057d..b3d9dd758 100644 --- a/infer/src/quandary/JavaTaintAnalysis.ml +++ b/infer/src/quandary/JavaTaintAnalysis.ml @@ -74,7 +74,27 @@ include | pname -> failwithf "Non-Java procname %a in Java analysis@." Typ.Procname.pp pname - let is_taintable_type typ= + let external_sanitizers = + List.map + ~f:(fun { QuandaryConfig.Sanitizer.procedure; } -> Str.regexp procedure) + (QuandaryConfig.Sanitizer.of_json Config.quandary_sanitizers) + + let get_sanitizer = function + | Typ.Procname.Java java_pname -> + let procedure_string = + Printf.sprintf "%s.%s" + (Typ.Procname.java_get_class_name java_pname) + (Typ.Procname.java_get_method java_pname) in + List.find_map + ~f:(fun procedure_regex -> + if Str.string_match procedure_regex procedure_string 0 + then Some TaintSpec.Return + else None) + external_sanitizers + | _ -> + None + + let is_taintable_type typ = match typ.Typ.desc with | Typ.Tptr ({desc=Tstruct (JavaClass typename)}, _) | Tstruct (JavaClass typename) -> begin diff --git a/infer/src/quandary/QuandaryConfig.ml b/infer/src/quandary/QuandaryConfig.ml index 978fc25c9..816d7d87d 100644 --- a/infer/src/quandary/QuandaryConfig.ml +++ b/infer/src/quandary/QuandaryConfig.ml @@ -31,7 +31,7 @@ module Source = struct end module Sink = struct - type t = { procedure : string; kind : string; index : string} + type t = { procedure : string; kind : string; index : string; } let of_json = function | `List sinks -> @@ -47,6 +47,20 @@ module Sink = struct [] end +module Sanitizer = struct + type t = { procedure : string; } + + let of_json = function + | `List sinks -> + let parse_sanitizer json = + let open Yojson.Basic in + let procedure = Util.member "procedure" json |> Util.to_string in + { procedure; } in + List.map ~f:parse_sanitizer sinks + | _ -> + [] +end + module Endpoint = struct type t = string diff --git a/infer/src/quandary/QuandaryConfig.mli b/infer/src/quandary/QuandaryConfig.mli index 949a68e8c..5b2a5c579 100644 --- a/infer/src/quandary/QuandaryConfig.mli +++ b/infer/src/quandary/QuandaryConfig.mli @@ -23,6 +23,12 @@ module Sink : sig val of_json : [> `List of Yojson.Basic.json list ] -> t list end +module Sanitizer : sig + type t = { procedure : string; } + + val of_json : [> `List of Yojson.Basic.json list ] -> t list +end + module Endpoint : sig type t = string (** name of endpoint class *) diff --git a/infer/src/quandary/TaintAnalysis.ml b/infer/src/quandary/TaintAnalysis.ml index 15fdb4ca1..d690923f7 100644 --- a/infer/src/quandary/TaintAnalysis.ml +++ b/infer/src/quandary/TaintAnalysis.ml @@ -17,7 +17,6 @@ module Make (TaintSpecification : TaintSpec.S) = struct module TraceDomain = TaintSpecification.Trace module TaintDomain = TaintSpecification.AccessTree - module IdMapDomain = IdAccessPathMapDomain module Summary = Summary.Make(struct type payload = QuandarySummary.t @@ -377,6 +376,21 @@ module Make (TaintSpecification : TaintSpec.S) = struct proc_data.tenv in List.fold ~f:handle_unknown_call_ ~init:access_tree propagations in + let dummy_ret_opt = match ret_opt with + | None when not (Typ.Procname.is_java called_pname) -> + (* the C++ frontend handles returns of non-pointers by adding a dummy + pass-by-reference variable as the last actual, then returning the value by + assigning to it. understand this pattern by pretending it's the return value *) + begin + match List.last actuals with + | Some (HilExp.AccessPath ((Var.ProgramVar pvar, _) as ret_base, [])) + when Pvar.is_frontend_tmp pvar -> + Some ret_base + | _ -> None + end + | _ -> + ret_opt in + let analyze_call astate_acc callee_pname = let call_site = CallSite.make callee_pname callee_loc in @@ -384,34 +398,17 @@ module Make (TaintSpecification : TaintSpec.S) = struct let astate_with_sink = match sinks with | [] -> astate | sinks -> add_sinks sinks actuals astate proc_data call_site in - let source = TraceDomain.Source.get call_site proc_data.tenv in let astate_with_source = - match source, ret_opt with - | Some { TraceDomain.Source.source; index=None; }, Some ret_base -> - add_return_source source ret_base astate_with_sink - | Some { TraceDomain.Source.source; index=Some index; }, _ -> + match source with + | Some { TraceDomain.Source.source; index=None; } -> + Option.value_map + ~f:(fun ret_base -> add_return_source source ret_base astate_with_sink) + ~default:astate_with_sink + dummy_ret_opt + | Some { TraceDomain.Source.source; index=Some index; } -> add_actual_source source index actuals astate_with_sink proc_data - | Some { TraceDomain.Source.source; index=None; }, None -> - let warn_invalid_source () = - L.stderr - "Warning: %a is marked as a source, but has no return value" - Typ.Procname.pp callee_pname; - astate_with_sink in - if not (Typ.Procname.is_java callee_pname) - then - (* the C++ frontend handles returns of non-pointers by adding a dummy - pass-by-reference variable as the last actual, then returning the value - by assigning to it. make sure we understand this pattern *) - match List.last actuals with - | Some (HilExp.AccessPath ((Var.ProgramVar pvar, _) as ret_base, [])) - when Pvar.is_frontend_tmp pvar -> - add_return_source source ret_base astate_with_sink - | _ -> - warn_invalid_source () - else - warn_invalid_source () - | None, _ -> + | None -> astate_with_sink in let astate_with_summary = @@ -433,7 +430,22 @@ module Make (TaintSpecification : TaintSpec.S) = struct apply_summary ret_opt actuals summary astate_with_source proc_data call_site | _ -> handle_unknown_call callee_pname astate_with_source in - Domain.join astate_acc astate_with_summary in + + let astate_with_sanitizer = + match dummy_ret_opt with + | None -> astate_with_summary + | Some ret_base -> + match TaintSpecification.get_sanitizer callee_pname with + | Some Return -> + (* clear the trace associated with the return value. ideally, we would + associate a kind with the sanitizer and only clear the trace when its + kind matches the source. but this gets complicated to do properly with + footprint sources, since we don't know their kind. so do the simple + thing for now. *) + TaintDomain.BaseMap.remove ret_base astate_with_summary + | None -> astate_with_summary in + + Domain.join astate_acc astate_with_sanitizer in (* highly polymorphic call sites stress reactive mode too much by using too much memory. here, we choose an arbitrary call limit that allows us to finish the analysis in diff --git a/infer/src/quandary/TaintSpec.ml b/infer/src/quandary/TaintSpec.ml index e23bb6854..ef2700fa7 100644 --- a/infer/src/quandary/TaintSpec.ml +++ b/infer/src/quandary/TaintSpec.ml @@ -20,6 +20,8 @@ type handle_unknown = | Propagate_to_return (** Propagate taint from all actuals to the return value *) +type sanitizer = + | Return (** a sanitizer that removes taint from its return value *) module type S = sig module Trace : Trace.S @@ -33,6 +35,9 @@ module type S = sig (** return true if the given typ can be tainted *) val is_taintable_type : Typ.t -> bool + (** get the sanitizer associated with the given type, if any *) + val get_sanitizer : Typ.Procname.t -> sanitizer option + val to_summary_access_tree : AccessTree.t -> QuandarySummary.AccessTree.t val of_summary_access_tree : QuandarySummary.AccessTree.t -> AccessTree.t diff --git a/infer/src/unit/TaintTests.ml b/infer/src/unit/TaintTests.ml index 19a15d5b0..24c3763bd 100644 --- a/infer/src/unit/TaintTests.ml +++ b/infer/src/unit/TaintTests.ml @@ -47,6 +47,7 @@ module MockTaintAnalysis = TaintAnalysis.Make(struct let to_summary_access_tree _ = assert false let handle_unknown_call _ _ _ _ = [] let is_taintable_type _ = true + let get_sanitizer _ = None end) module TestInterpreter = diff --git a/infer/tests/codetoanalyze/cpp/quandary/.inferconfig b/infer/tests/codetoanalyze/cpp/quandary/.inferconfig index 378189339..77f04f417 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/.inferconfig +++ b/infer/tests/codetoanalyze/cpp/quandary/.inferconfig @@ -57,6 +57,14 @@ "index": "1" } ], + "quandary-sanitizers": [ + { + "procedure": "basics::Obj::sanitizer1" + }, + { + "procedure": "basics::Obj::sanitizer2" + } + ], "quandary-endpoints": [ "basics::Obj::endpoint" ] diff --git a/infer/tests/codetoanalyze/cpp/quandary/basics.cpp b/infer/tests/codetoanalyze/cpp/quandary/basics.cpp index a2eb779c3..b04e31202 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/basics.cpp +++ b/infer/tests/codetoanalyze/cpp/quandary/basics.cpp @@ -23,6 +23,9 @@ class Obj { std::string string_source(int i) { return ""; } static int taint_arg_source(int* arg) { return 1; } void string_sink(std::string) {} + static std::string* sanitizer1(std::string* input) { return input; } + static std::string sanitizer2(const std::string& input) { return input; } + std::string field1; std::string field2; @@ -147,4 +150,23 @@ void taint_arg_source_ok() { int ret = Obj::taint_arg_source(&source); __infer_taint_sink((void*)ret); // return value is not a source } + +void via_sanitizer_ok1(Obj* obj) { + std::string* source = &obj->string_source(0); + std::string* sanitized = Obj::sanitizer1(source); + obj->string_sink(*sanitized); +} + +void via_sanitizer_ok2(Obj* obj) { + std::string source = obj->string_source(0); + std::string sanitized = obj->sanitizer2(source); + obj->string_sink(sanitized); +} + +std::string* unsanitized_bad(Obj* obj) { + std::string* source = &obj->string_source(0); + std::string* sanitized = Obj::sanitizer1(source); + obj->string_sink(*source); + return sanitized; +} } diff --git a/infer/tests/codetoanalyze/cpp/quandary/issues.exp b/infer/tests/codetoanalyze/cpp/quandary/issues.exp index c041a38a8..692aa1e40 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/issues.exp +++ b/infer/tests/codetoanalyze/cpp/quandary/issues.exp @@ -16,6 +16,7 @@ codetoanalyze/cpp/quandary/basics.cpp, basics::static_source_sink_bad, 2, QUANDA codetoanalyze/cpp/quandary/basics.cpp, basics::string_source_bad, 2, QUANDARY_TAINT_ERROR, [return from basics::Obj_string_source,call to basics::Obj_string_sink] codetoanalyze/cpp/quandary/basics.cpp, basics::taint_arg_source_bad, 3, QUANDARY_TAINT_ERROR, [return from basics::Obj_taint_arg_source,call to __infer_taint_sink] codetoanalyze/cpp/quandary/basics.cpp, basics::template_source_bad, 2, QUANDARY_TAINT_ERROR, [return from basics::template_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/basics.cpp, basics::unsanitized_bad, 3, QUANDARY_TAINT_ERROR, [return from basics::Obj_string_source,call to basics::Obj_string_sink] codetoanalyze/cpp/quandary/basics.cpp, basics::via_field_bad1, 3, QUANDARY_TAINT_ERROR, [return from basics::template_source_>,call to basics::template_sink_>] codetoanalyze/cpp/quandary/basics.cpp, basics::via_field_bad2, 2, QUANDARY_TAINT_ERROR, [return from basics::template_source_>,call to basics::template_sink_>] codetoanalyze/cpp/quandary/basics.cpp, basics::via_passthrough_bad1, 4, QUANDARY_TAINT_ERROR, [return from basics::Obj_string_source,flow through basics::id1_>,call to basics::Obj_string_sink] diff --git a/infer/tests/codetoanalyze/java/quandary/.inferconfig b/infer/tests/codetoanalyze/java/quandary/.inferconfig index 0ce166ecf..0f9e37d9b 100644 --- a/infer/tests/codetoanalyze/java/quandary/.inferconfig +++ b/infer/tests/codetoanalyze/java/quandary/.inferconfig @@ -16,6 +16,11 @@ "kind": "Logging" } ], + "quandary-sanitizers": [ + { + "procedure": "codetoanalyze.java.quandary.ExternalSpecs.sanitizer" + } + ], "quandary-endpoints": [ "codetoanalyze.java.quandary.MyService" ] diff --git a/infer/tests/codetoanalyze/java/quandary/ExternalSpecs.java b/infer/tests/codetoanalyze/java/quandary/ExternalSpecs.java index 409a27116..11247a98a 100644 --- a/infer/tests/codetoanalyze/java/quandary/ExternalSpecs.java +++ b/infer/tests/codetoanalyze/java/quandary/ExternalSpecs.java @@ -67,4 +67,40 @@ public class ExternalSpecs { loggingSink2(activity.getIntent(), activity.getIntent()); } + static Object sanitizer(Object o) { + return o; + } + + void viaSanitizerOk() { + Object source = InferTaint.inferSecretSource(); + Object sanitized = sanitizer(source); + InferTaint.inferSensitiveSink(sanitized); + } + + void sanitizeFootprint(Object o) { + Object sanitized = sanitizer(o); + InferTaint.inferSensitiveSink(sanitized); + } + + void callSanitizeFootprintOk() { + sanitizeFootprint(InferTaint.inferSecretSource()); + } + + Object returnSanitizedSource() { + Object source = InferTaint.inferSecretSource(); + return sanitizer(source); + } + + void callSinkOnSanitizedSourceOk() { + InferTaint.inferSensitiveSink(returnSanitizedSource()); + } + + Object missedSanitizerBad() { + Object source = InferTaint.inferSecretSource(); + Object sanitized = sanitizer(source); + InferTaint.inferSensitiveSink(source); + return sanitized; + } + + } diff --git a/infer/tests/codetoanalyze/java/quandary/issues.exp b/infer/tests/codetoanalyze/java/quandary/issues.exp index 99ff338ed..aa628764c 100644 --- a/infer/tests/codetoanalyze/java/quandary/issues.exp +++ b/infer/tests/codetoanalyze/java/quandary/issues.exp @@ -58,6 +58,7 @@ codetoanalyze/java/quandary/Exceptions.java, void Exceptions.sinkInCatchBad2(), codetoanalyze/java/quandary/Exceptions.java, void Exceptions.sinkInFinallyBad1(), 5, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void InferTaint.inferSensitiveSink(Object)] codetoanalyze/java/quandary/Exceptions.java, void Exceptions.sinkInFinallyBad2(), 6, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void InferTaint.inferSensitiveSink(Object)] codetoanalyze/java/quandary/Exceptions.java, void Exceptions.sinkInFinallyBad3(), 7, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void InferTaint.inferSensitiveSink(Object)] +codetoanalyze/java/quandary/ExternalSpecs.java, Object ExternalSpecs.missedSanitizerBad(), 3, QUANDARY_TAINT_ERROR, [return from Object InferTaint.inferSecretSource(),call to void InferTaint.inferSensitiveSink(Object)] codetoanalyze/java/quandary/ExternalSpecs.java, void ExternalSpecs.callExternalSink2Bad1(), 1, QUANDARY_TAINT_ERROR, [return from Object ExternalSpecs.privateDataSource(),call to void ExternalSpecs.loggingSink2(Object,Object)] codetoanalyze/java/quandary/ExternalSpecs.java, void ExternalSpecs.callExternalSink2Bad2(), 1, QUANDARY_TAINT_ERROR, [return from Object ExternalSpecs.privateDataSource(),call to void ExternalSpecs.loggingSink2(Object,Object)] codetoanalyze/java/quandary/ExternalSpecs.java, void ExternalSpecs.callExternalSinkBad(), 1, QUANDARY_TAINT_ERROR, [return from Object ExternalSpecs.privateDataSource(),call to void ExternalSpecs.loggingSink1(Object,Object)]