From abc5642c8303f33ebbe62db4196de96656fdfaa3 Mon Sep 17 00:00:00 2001 From: Sam Blackshear Date: Tue, 16 May 2017 11:19:09 -0700 Subject: [PATCH] [quandary] tests for string functionality Summary: String are very important for taint analysis, have to make sure that we have the right models/the right behaviors for unknown code. Reviewed By: jvillard Differential Revision: D5054832 fbshipit-source-id: 7e7ee07 --- infer/src/quandary/ClangTaintAnalysis.ml | 60 +++-- infer/src/quandary/TaintAnalysis.ml | 12 +- .../codetoanalyze/cpp/quandary/issues.exp | 28 +++ .../codetoanalyze/cpp/quandary/strings.cpp | 214 ++++++++++++++++++ 4 files changed, 295 insertions(+), 19 deletions(-) create mode 100644 infer/tests/codetoanalyze/cpp/quandary/strings.cpp diff --git a/infer/src/quandary/ClangTaintAnalysis.ml b/infer/src/quandary/ClangTaintAnalysis.ml index d1ddb9d1c..1921fadb5 100644 --- a/infer/src/quandary/ClangTaintAnalysis.ml +++ b/infer/src/quandary/ClangTaintAnalysis.ml @@ -23,24 +23,50 @@ include | QuandarySummary.AccessTree.Clang tree -> tree | _ -> assert false - let handle_unknown_call _ ret_typ_opt actuals _ = - match ret_typ_opt, List.rev actuals with - | Some _, _ -> - (* propagate taint from actuals to return value *) - [TaintSpec.Propagate_to_return] - | None, [] -> - [] - | None, - HilExp.AccessPath ((Var.ProgramVar pvar, { desc=Typ.Tptr (_, Typ.Pk_pointer) }), []) :: _ - when Pvar.is_frontend_tmp pvar -> - (* no return value, but the frontend has introduced a dummy return variable and will - assign the return value to this variable. So propagate taint to the dummy return - variable *) - let actual_index = List.length actuals - 1 in - [TaintSpec.Propagate_to_actual actual_index] - | None, _ -> - (* no return value; propagate taint from actuals to receiver *) + let handle_unknown_call pname ret_typ_opt actuals _ = + let handle_generic_unknown ret_typ_opt actuals = + match ret_typ_opt, List.rev actuals with + | Some _, _ -> + (* propagate taint from actuals to return value *) + [TaintSpec.Propagate_to_return] + | None, [] -> + [] + | None, _ when Typ.Procname.is_constructor pname -> + (* "this" is always the first arg of a constructor; propagate taint there *) + [TaintSpec.Propagate_to_receiver] + | None, + HilExp.AccessPath ((Var.ProgramVar pvar, { desc=Typ.Tptr (_, Typ.Pk_pointer) }), []) :: _ + when Pvar.is_frontend_tmp pvar -> + (* no return value, but the frontend has introduced a dummy return variable and will + assign the return value to this variable. So propagate taint to the dummy return + variable *) + let actual_index = List.length actuals - 1 in + [TaintSpec.Propagate_to_actual actual_index] + | None, _ -> + (* no return value; propagate taint from actuals to receiver *) + [TaintSpec.Propagate_to_receiver] in + + (* if we have a specific model for a procedure, use that. otherwise, use the generic + heuristics for dealing with unknown code *) + match Typ.Procname.get_method pname with + | "operator+=" + | "operator-=" + | "operator*=" + | "operator/=" + | "operator%=" + | "operator<<=" + | "operator>>=" + | "operator&=" + | "operator^=" + | "operator|=" -> + [TaintSpec.Propagate_to_receiver; TaintSpec.Propagate_to_return] + | "memcpy" | "memmove" | "strcpy" | "strncpy" -> + [TaintSpec.Propagate_to_receiver; TaintSpec.Propagate_to_return] + | "sprintf" -> [TaintSpec.Propagate_to_receiver] + | other -> + L.d_strln ("generic unknown " ^ other); + handle_generic_unknown ret_typ_opt actuals let is_taintable_type _ = true end) diff --git a/infer/src/quandary/TaintAnalysis.ml b/infer/src/quandary/TaintAnalysis.ml index e3d497004..ab742e802 100644 --- a/infer/src/quandary/TaintAnalysis.ml +++ b/infer/src/quandary/TaintAnalysis.ml @@ -414,15 +414,23 @@ module Make (TaintSpecification : TaintSpec.S) = struct astate_with_sink in let astate_with_summary = + (* hack for default C++ constructors, which get translated as an empty body (and + will thus have an empty summary). We don't want that because we want to be able to + propagate taint from comstructor parameters to the constructed object. so we treat + the empty constructor as a skip function instead *) + let is_dummy_cpp_constructor summary pname = + Typ.Procname.is_c_method pname && + Typ.Procname.is_constructor pname && + TaintDomain.BaseMap.is_empty (TaintSpecification.of_summary_access_tree summary) in if sinks <> [] || Option.is_some source then (* don't use a summary for a procedure that is a direct source or sink *) astate_with_source else match Summary.read_summary proc_data.pdesc callee_pname with - | Some summary -> + | Some summary when not (is_dummy_cpp_constructor summary callee_pname) -> apply_summary ret_opt actuals summary astate_with_source proc_data call_site - | None -> + | _ -> handle_unknown_call callee_pname astate_with_source in Domain.join astate_acc astate_with_summary in diff --git a/infer/tests/codetoanalyze/cpp/quandary/issues.exp b/infer/tests/codetoanalyze/cpp/quandary/issues.exp index cb7854c99..0deab8bd6 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/issues.exp +++ b/infer/tests/codetoanalyze/cpp/quandary/issues.exp @@ -42,6 +42,34 @@ codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_pointer_pass_to_sink_b codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad1, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,return from pointers::assign_source_by_reference,call to __infer_taint_sink] codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad2, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,return from pointers::assign_source_by_reference,call to __infer_taint_sink] codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad3, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,return from pointers::assign_source_by_reference,return from pointers::call_assign_source_by_reference,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::append_bad1, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::append_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::assign_bad1, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::assign_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::concat_bad1, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::concat_bad2, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::concat_bad3, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::constructor_bad1, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::constructor_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::constructor_bad3, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,flow through std::__wrap_iter_operator+,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::format_bad1, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,flow through strings::format1_&>,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::format_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,flow through strings::format2_&>,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::format_bad3, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::format_bad4, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::format_varargs_bad, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::insert_bad1, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::insert_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::memchr_bad, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::memcpy_bad, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::memmove_bad, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::replace_bad1, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::replace_bad2, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::sprintf_bad1, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::sprintf_bad2, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::strcpy_bad1, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::strcpy_bad2, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::strncpy_bad, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] +codetoanalyze/cpp/quandary/strings.cpp, strings::swap_bad, 4, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] codetoanalyze/cpp/quandary/unknown_code.cpp, unknown_code::direct_bad, 2, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] codetoanalyze/cpp/quandary/unknown_code.cpp, unknown_code::skip_indirect_bad, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,flow through unknown_code::skip_indirect,call to __infer_taint_sink] codetoanalyze/cpp/quandary/unknown_code.cpp, unknown_code::skip_pointer_bad, 3, QUANDARY_TAINT_ERROR, [return from __infer_taint_source,call to __infer_taint_sink] diff --git a/infer/tests/codetoanalyze/cpp/quandary/strings.cpp b/infer/tests/codetoanalyze/cpp/quandary/strings.cpp new file mode 100644 index 000000000..0d871477e --- /dev/null +++ b/infer/tests/codetoanalyze/cpp/quandary/strings.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2016 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include +#include + +extern std::string __infer_taint_source(); +extern void __infer_taint_sink(std::string); + +// tests related to string manipulation, format strings, etc. +namespace strings { + +void sprintf_bad1() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + sprintf(laundered_source, "%s", source); + __infer_taint_sink(laundered_source); +} + +void sprintf_bad2() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + sprintf(laundered_source, "%s%s%d", "a", source, 1); + __infer_taint_sink(laundered_source); +} + +void strcpy_bad1() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + auto copy = strcpy(laundered_source, source); + __infer_taint_sink(copy); +} + +void strcpy_bad2() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + strcpy(laundered_source, source); + __infer_taint_sink(laundered_source); +} + +void strncpy_bad() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + strncpy(laundered_source, source, 50); + __infer_taint_sink(laundered_source); +} + +void memcpy_bad() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + memcpy(laundered_source, source, 50); + __infer_taint_sink(laundered_source); +} + +void memmove_bad() { + char laundered_source[50]; + auto source = __infer_taint_source().c_str(); + auto copy = (char*)memmove(laundered_source, source, 50); + __infer_taint_sink(copy); +} + +void memchr_bad() { + auto source = __infer_taint_source().c_str(); + auto laundered_source = (char*)memchr(source, 'a', 10); + __infer_taint_sink(laundered_source); +} + +void constructor_bad1() { + auto source = __infer_taint_source(); + auto laundered_source = std::string(source); + __infer_taint_sink(laundered_source); +} + +void constructor_bad2() { + auto source = __infer_taint_source(); + auto laundered_source = std::string(source, 0, 5); + __infer_taint_sink(laundered_source); +} + +void constructor_bad3() { + auto source = __infer_taint_source(); + auto laundered_source = std::string(source.begin(), source.begin() + 5); + __infer_taint_sink(laundered_source); +} + +void concat_bad1() { + auto source = __infer_taint_source(); + source += "other string"; + __infer_taint_sink(source); +} + +void concat_bad2() { + auto source = __infer_taint_source(); + auto laundered_source = std::string("string"); + laundered_source += source; + __infer_taint_sink(laundered_source); +} + +void concat_bad3() { + auto source = __infer_taint_source(); + __infer_taint_sink(source += "string"); +} + +void append_bad1() { + auto source = __infer_taint_source(); + __infer_taint_sink(std::string("string").append(source)); +} + +void append_bad2() { + auto source = __infer_taint_source(); + source.append("string"); + __infer_taint_sink(source); +} + +void assign_bad1() { + auto source = __infer_taint_source(); + __infer_taint_sink(std::string("string").assign(source)); +} + +void assign_bad2() { + auto source = __infer_taint_source(); + source.assign("string"); + __infer_taint_sink(source); +} + +void insert_bad1() { + auto source = __infer_taint_source(); + __infer_taint_sink(std::string("string").assign(source)); +} + +void insert_bad2() { + auto source = __infer_taint_source(); + source.insert(0, "string"); + __infer_taint_sink(source); +} + +void replace_bad1() { + auto source = __infer_taint_source(); + __infer_taint_sink(std::string("string").replace(0, 5, source)); +} + +void replace_bad2() { + auto source = __infer_taint_source(); + source.replace(0, 5, "string"); + __infer_taint_sink(source); +} + +void swap_bad() { + auto source = __infer_taint_source(); + auto laundered_source = std::string("string"); + laundered_source.swap(source); + __infer_taint_sink(laundered_source); +} + +template +class Formatter { + + public: + explicit Formatter(std::string str, Args&&... args); + std::string str(); +}; + +template +Formatter format1(std::string fmt, Args&&... args) { + return Formatter(fmt, std::forward(args)...); +} + +template +Formatter* format2(std::string fmt, Args&&... args) { + return new Formatter(fmt, std::forward(args)...); +} + +template +Formatter format3(std::string fmt, Args&&... args); + +template +Formatter* format4(std::string fmt, Args&&... args); + +void format_bad1() { + auto source = __infer_taint_source(); + auto laundered_source = format1("%s", source).str(); + __infer_taint_sink(laundered_source); +} + +void format_bad2() { + auto source = __infer_taint_source(); + auto laundered_source = format2("%s", source)->str(); + __infer_taint_sink(laundered_source); +} + +void format_bad3() { + auto source = __infer_taint_source(); + auto laundered_source = format3("%s", source).str(); + __infer_taint_sink(laundered_source); +} + +void format_bad4() { + auto source = __infer_taint_source(); + auto laundered_source = format4("%s", source)->str(); + __infer_taint_sink(laundered_source); +} + +void format_varargs_bad() { + auto source = __infer_taint_source(); + auto laundered_source = format3("%s%s", "a", source, "b").str(); + __infer_taint_sink(laundered_source); +} +}