From 10804588b224871412c83c9a8dc6974539ade333 Mon Sep 17 00:00:00 2001 From: Mehdi Bouaziz Date: Sun, 28 Oct 2018 23:47:25 -0700 Subject: [PATCH] New function pointer preanalysis without recursion Reviewed By: jvillard Differential Revision: D10146134 fbshipit-source-id: 3874a403c --- infer/man/man1/infer-full.txt | 4 + infer/src/IR/Cfg.ml | 5 +- infer/src/IR/Procdesc.ml | 13 ++- infer/src/IR/Procdesc.mli | 5 +- infer/src/backend/preanal.ml | 13 ++- infer/src/base/Config.ml | 7 ++ infer/src/base/Config.mli | 2 + infer/src/checkers/functionPointers.ml | 96 +++++++++++++++++++ .../codetoanalyze/c/bufferoverrun/Makefile | 2 +- .../c/bufferoverrun/function_call.c | 5 +- .../codetoanalyze/c/bufferoverrun/issues.exp | 5 +- .../tests/codetoanalyze/cpp/quandary/Makefile | 2 +- .../codetoanalyze/cpp/quandary/issues.exp | 1 + .../codetoanalyze/cpp/quandary/pointers.cpp | 10 +- 14 files changed, 154 insertions(+), 16 deletions(-) create mode 100644 infer/src/checkers/functionPointers.ml diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 7fcd057f1..43f734922 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1138,6 +1138,10 @@ INTERNAL OPTIONS Activates: Output statistics about the capture phase to *.o.astlog (clang only) (Conversely: --no-frontend-stats) + --function-pointer-specialization + Activates: Do function pointer preprocessing (clang only). + (Conversely: --no-function-pointer-specialization) + --gen-previous-build-command-script-reset Cancel the effect of --gen-previous-build-command-script. diff --git a/infer/src/IR/Cfg.ml b/infer/src/IR/Cfg.ml index 3bd8d2a72..09668092d 100644 --- a/infer/src/IR/Cfg.ml +++ b/infer/src/IR/Cfg.ml @@ -114,7 +114,7 @@ let inline_synthetic_method ((ret_id, _) as ret) etl pdesc loc_call : Sil.instr (** Find synthetic (access or bridge) Java methods in the procedure and inline them in the cfg. *) let proc_inline_synthetic_methods cfg pdesc : unit = - let instr_inline_synthetic_method instr = + let instr_inline_synthetic_method _node instr = match instr with | Sil.Call (ret_id_typ, Exp.Const (Const.Cfun (Typ.Procname.Java java_pn as pn)), etl, loc, _) -> ( @@ -132,7 +132,8 @@ let proc_inline_synthetic_methods cfg pdesc : unit = | _ -> instr in - Procdesc.replace_instrs pdesc ~f:instr_inline_synthetic_method + let (_updated : bool) = Procdesc.replace_instrs pdesc ~f:instr_inline_synthetic_method in + () let inline_java_synthetic_methods cfg = diff --git a/infer/src/IR/Procdesc.ml b/infer/src/IR/Procdesc.ml index 4c04cbea4..16c802ec5 100644 --- a/infer/src/IR/Procdesc.ml +++ b/infer/src/IR/Procdesc.ml @@ -210,8 +210,11 @@ module Node = struct (** Map and replace the instructions to be executed *) let replace_instrs node ~f = - let instrs' = Instrs.map_changed ~equal:phys_equal node.instrs ~f in - if not (phys_equal instrs' node.instrs) then node.instrs <- instrs' + let instrs' = Instrs.map_changed ~equal:phys_equal node.instrs ~f:(f node) in + if phys_equal instrs' node.instrs then false + else ( + node.instrs <- instrs' ; + true ) let pp_stmt fmt = function @@ -503,8 +506,10 @@ let find_map_instrs pdesc ~f = let replace_instrs pdesc ~f = - let do_node node = Node.replace_instrs ~f node in - iter_nodes do_node pdesc + let f updated node = + Node.replace_instrs ~f node || (* do not short-circuit [Node.replace_instrs] *) updated + in + fold_nodes pdesc ~init:false ~f (** fold between two nodes or until we reach a branching structure *) diff --git a/infer/src/IR/Procdesc.mli b/infer/src/IR/Procdesc.mli index 4c8d9ca7b..1111bea87 100644 --- a/infer/src/IR/Procdesc.mli +++ b/infer/src/IR/Procdesc.mli @@ -249,8 +249,9 @@ val is_java_synchronized : t -> bool val iter_instrs : (Node.t -> Sil.instr -> unit) -> t -> unit (** iterate over all nodes and their instructions *) -val replace_instrs : t -> f:(Sil.instr -> Sil.instr) -> unit -(** Map and replace the instructions to be executed *) +val replace_instrs : t -> f:(Node.t -> Sil.instr -> Sil.instr) -> bool +(** Map and replace the instructions to be executed. + Returns true if at least one substitution occured. *) val iter_nodes : (Node.t -> unit) -> t -> unit (** iterate over all the nodes of a procedure *) diff --git a/infer/src/backend/preanal.ml b/infer/src/backend/preanal.ml index 45b28fbbf..4d8512258 100644 --- a/infer/src/backend/preanal.ml +++ b/infer/src/backend/preanal.ml @@ -181,5 +181,16 @@ let do_abstraction pdesc = Procdesc.signal_did_preanalysis pdesc +let do_funptr_sub pdesc tenv = + let updated = FunctionPointers.substitute_function_pointers pdesc tenv in + if updated then Attributes.store ~proc_desc:(Some pdesc) (Procdesc.get_attributes pdesc) + + let do_preanalysis pdesc tenv = - if not (Procdesc.did_preanalysis pdesc) then ( do_liveness pdesc tenv ; do_abstraction pdesc ) + if not (Procdesc.did_preanalysis pdesc) then ( + if + Config.function_pointer_specialization + && not (Typ.Procname.is_java (Procdesc.get_proc_name pdesc)) + then do_funptr_sub pdesc tenv ; + do_liveness pdesc tenv ; + do_abstraction pdesc ) diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index 0687ce319..aa99bd93b 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -1339,6 +1339,11 @@ and frontend_stats = "Output statistics about the capture phase to *.o.astlog (clang only)" +and function_pointer_specialization = + CLOpt.mk_bool ~long:"function-pointer-specialization" ~default:false + "Do function pointer preprocessing (clang only)." + + and gen_previous_build_command_script = CLOpt.mk_string_opt ~long:"gen-previous-build-command-script" ~in_help:InferCommand.[(Diff, manual_generic)] @@ -2632,6 +2637,8 @@ and from_json_report = !from_json_report and frontend_stats = !frontend_stats +and function_pointer_specialization = !function_pointer_specialization + and frontend_tests = !frontend_tests and gen_previous_build_command_script = !gen_previous_build_command_script diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index b325d450a..e2ff0f215 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -378,6 +378,8 @@ val frontend_tests : bool val frontend_stats : bool +val function_pointer_specialization : bool + val gen_previous_build_command_script : string option val generated_classes : string option diff --git a/infer/src/checkers/functionPointers.ml b/infer/src/checkers/functionPointers.ml new file mode 100644 index 000000000..043d5faaa --- /dev/null +++ b/infer/src/checkers/functionPointers.ml @@ -0,0 +1,96 @@ +(* + * Copyright (c) 2018-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd +module F = Format + +module Procname = struct + type t = Typ.Procname.t [@@deriving compare] + + let pp = Typ.Procname.pp +end + +module ProcnameSet = AbstractDomain.FiniteSet (Procname) +module Domain = AbstractDomain.Map (String) (ProcnameSet) + +module TransferFunctions (CFG : ProcCfg.S) = struct + module CFG = CFG + module Domain = Domain + + type extras = ProcData.no_extras + + let exec_instr astate _ _ = function + | Sil.Load (lhs_id, _, _, _) when Ident.is_none lhs_id -> + astate + | Sil.Load (lhs_id, Exp.Lvar rhs_pvar, Typ.({desc= Tptr ({desc= Tfun _}, _)}), _) -> + let fun_ptr = + try Domain.find (Pvar.to_string rhs_pvar) astate with Caml.Not_found -> ProcnameSet.empty + in + Domain.add (Ident.to_string lhs_id) fun_ptr astate + | Sil.Store (Lvar lhs_pvar, _, Exp.Const (Const.Cfun pn), _) -> + (* strong update *) + Domain.add (Pvar.to_string lhs_pvar) (ProcnameSet.singleton pn) astate + | Sil.Abstract _ | Call _ | Load _ | Nullify _ | Prune _ | Remove_temps _ | Store _ -> + astate + + + let pp_session_name _node fmt = F.pp_print_string fmt "function pointers" +end + +module CFG = ProcCfg.Normal +module Analyzer = AbstractInterpreter.MakeRPO (TransferFunctions (CFG)) + +let find_procname var astate = + match Domain.find_opt (Ident.to_string var) astate with + | Some procnames -> + if ProcnameSet.is_empty procnames then None + else Some (ProcnameSet.min_elt procnames) + (* TODO: handle multiple procnames, e.g. with non-determinism branching *) + | None -> + None + + +let substitute_expr astate expr = + match expr with + | Exp.Var var -> ( + match find_procname var astate with Some pname -> Exp.Const (Const.Cfun pname) | None -> expr ) + | _ -> + expr + + +let substitute_arg astate arg = + let expr, typ = arg in + let expr' = substitute_expr astate expr in + if phys_equal expr' expr then arg else (expr', typ) + + +let substitute_function_ptrs ~function_pointers node instr = + match instr with + | Sil.Call (ret, e_fun, args, loc, cfs) -> ( + let node_id = CFG.Node.id node in + match Analyzer.extract_post node_id function_pointers with + | None -> + instr + | Some astate -> + let e_fun' = substitute_expr astate e_fun in + let args' = IList.map_changed args ~equal:phys_equal ~f:(substitute_arg astate) in + if phys_equal e_fun' e_fun && phys_equal args' args then instr + else Sil.Call (ret, e_fun', args', loc, cfs) ) + | _ -> + instr + + +let get_function_pointers proc_desc tenv = + let proc_data = ProcData.make_default proc_desc tenv in + let cfg = CFG.from_pdesc proc_desc in + Analyzer.exec_cfg cfg proc_data ~initial:Domain.empty + + +let substitute_function_pointers proc_desc tenv = + let function_pointers = get_function_pointers proc_desc tenv in + let f = substitute_function_ptrs ~function_pointers in + Procdesc.replace_instrs proc_desc ~f diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/Makefile b/infer/tests/codetoanalyze/c/bufferoverrun/Makefile index 908e31361..6e5c8364a 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/Makefile +++ b/infer/tests/codetoanalyze/c/bufferoverrun/Makefile @@ -8,7 +8,7 @@ TESTS_DIR = ../../.. CLANG_OPTIONS = -c -INFER_OPTIONS = -F --project-root $(TESTS_DIR) --bufferoverrun-only +INFER_OPTIONS = -F --project-root $(TESTS_DIR) --bufferoverrun-only --function-pointer-specialization INFERPRINT_OPTIONS = --issues-tests SOURCES = $(wildcard *.c) diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/function_call.c b/infer/tests/codetoanalyze/c/bufferoverrun/function_call.c index 2f6becbbc..faf2727ef 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/function_call.c +++ b/infer/tests/codetoanalyze/c/bufferoverrun/function_call.c @@ -74,15 +74,16 @@ void call_by_struct_ptr_bad() { int ret_zero() { return 0; } -void call_function_ptr_good_FP() { +void call_function_ptr_good() { int (*func_ptr)(void) = &ret_zero; int arr[10]; if ((*func_ptr)() != 0) { + // unreacheable arr[10] = 1; } } -void call_function_ptr_bad() { +void call_function_ptr_bad1() { int (*func_ptr)(void) = &ret_zero; int arr[10]; if ((*func_ptr)() == 0) { diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp index 651a4d397..439d90f4a 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp +++ b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp @@ -53,8 +53,9 @@ codetoanalyze/c/bufferoverrun/for_loop.c, for_loop, 10, BUFFER_OVERRUN_L3, no_bu codetoanalyze/c/bufferoverrun/function_call.c, call_by_arr_bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: -1 Size: 10] codetoanalyze/c/bufferoverrun/function_call.c, call_by_ptr_bad, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: -1 Size: 10] codetoanalyze/c/bufferoverrun/function_call.c, call_by_struct_ptr_bad, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: -1 Size: 10] -codetoanalyze/c/bufferoverrun/function_call.c, call_function_ptr_bad, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,ArrayAccess: Offset: 10 Size: 10] -codetoanalyze/c/bufferoverrun/function_call.c, call_function_ptr_good_FP, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,ArrayAccess: Offset: 10 Size: 10] +codetoanalyze/c/bufferoverrun/function_call.c, call_function_ptr_bad1, 3, CONDITION_ALWAYS_TRUE, no_bucket, WARNING, [] +codetoanalyze/c/bufferoverrun/function_call.c, call_function_ptr_bad1, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,ArrayAccess: Offset: 10 Size: 10] +codetoanalyze/c/bufferoverrun/function_call.c, call_function_ptr_good, 3, CONDITION_ALWAYS_FALSE, no_bucket, WARNING, [] codetoanalyze/c/bufferoverrun/function_call.c, function_call, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [ArrayDeclaration,Call,Parameter: arr,Assignment,ArrayAccess: Offset: 100 Size: 10 by call to `arr_access` ] codetoanalyze/c/bufferoverrun/get_field.c, FP_call_get_field_Good, 3, BUFFER_OVERRUN_L5, no_bucket, ERROR, [ArrayDeclaration,Call,Call,Assignment,Return,Assignment,Return,ArrayAccess: Offset: [-oo, +oo] Size: 5] codetoanalyze/c/bufferoverrun/get_field.c, call_get_field_Bad, 3, BUFFER_OVERRUN_L5, no_bucket, ERROR, [ArrayDeclaration,Call,Call,Assignment,Return,Assignment,Return,ArrayAccess: Offset: [-oo, +oo] Size: 5] diff --git a/infer/tests/codetoanalyze/cpp/quandary/Makefile b/infer/tests/codetoanalyze/cpp/quandary/Makefile index f99e8f9fa..c544dd8be 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/Makefile +++ b/infer/tests/codetoanalyze/cpp/quandary/Makefile @@ -7,7 +7,7 @@ TESTS_DIR = ../../.. # see explanations in cpp/errors/Makefile for the custom isystem CLANG_OPTIONS = -x c++ -std=c++11 -nostdinc++ -isystem$(ROOT_DIR) -isystem$(CLANG_INCLUDES)/c++/v1/ -c -INFER_OPTIONS = \ +INFER_OPTIONS = --function-pointer-specialization \ --quandary-only --passthroughs --debug-exceptions \ --project-root $(TESTS_DIR) \ diff --git a/infer/tests/codetoanalyze/cpp/quandary/issues.exp b/infer/tests/codetoanalyze/cpp/quandary/issues.exp index b744b1b77..c4a39e0ed 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/issues.exp +++ b/infer/tests/codetoanalyze/cpp/quandary/issues.exp @@ -101,6 +101,7 @@ codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_pointer_pass_to_sink_b codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad1, 3, QUANDARY_TAINT_ERROR, no_bucket, ERROR, [Return from __infer_taint_source with tainted data @val$0*,Return from pointers::assign_source_by_reference,Call to __infer_taint_sink with tainted index 0] codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad2, 2, QUANDARY_TAINT_ERROR, no_bucket, ERROR, [Return from __infer_taint_source with tainted data @val$0*,Return from pointers::assign_source_by_reference,Call to __infer_taint_sink with tainted index 0] codetoanalyze/cpp/quandary/pointers.cpp, pointers::assign_source_by_reference_bad3, 3, QUANDARY_TAINT_ERROR, no_bucket, ERROR, [Return from __infer_taint_source with tainted data @val$0*,Return from pointers::assign_source_by_reference with tainted data @val$0*,Return from pointers::call_assign_source_by_reference,Call to __infer_taint_sink with tainted index 0] +codetoanalyze/cpp/quandary/pointers.cpp, pointers::funptr_bad0, 2, QUANDARY_TAINT_ERROR, no_bucket, ERROR, [Return from __infer_taint_source,Call to __infer_taint_sink with tainted index 0] codetoanalyze/cpp/quandary/sanitizers.cpp, sanitizers::dead_sanitizer_bad, 3, QUANDARY_TAINT_ERROR, no_bucket, ERROR, [Return from __infer_taint_source,Call to __infer_taint_sink with tainted index 0] codetoanalyze/cpp/quandary/sanitizers.cpp, sanitizers::escape_shell_to_url_bad, 3, UNTRUSTED_URL_RISK, no_bucket, ERROR, [Return from __infer_taint_source,Call to __infer_url_sink with tainted index 0] codetoanalyze/cpp/quandary/sanitizers.cpp, sanitizers::escape_sql_to_shell_bad, 3, SHELL_INJECTION, no_bucket, ERROR, [Return from __infer_taint_source,Call to system with tainted index 0] diff --git a/infer/tests/codetoanalyze/cpp/quandary/pointers.cpp b/infer/tests/codetoanalyze/cpp/quandary/pointers.cpp index 6f2e5ae79..8710de46f 100644 --- a/infer/tests/codetoanalyze/cpp/quandary/pointers.cpp +++ b/infer/tests/codetoanalyze/cpp/quandary/pointers.cpp @@ -65,11 +65,19 @@ void FP_reuse_pointer_as_local_ok(std::string* pointer) { __infer_taint_sink(*pointer); } +void funptr_bad0() { + auto f = __infer_taint_sink; + f(*(__infer_taint_source())); +} + void funptr_helper_bad1(void (*sink)(std::string)) { sink(*(__infer_taint_source())); } -void funptr_bad1() { funptr_helper_bad1(__infer_taint_sink); } +void funptr_bad1_FN() { + auto f = __infer_taint_sink; + funptr_helper_bad1(f); +} void funptr_helper_bad2(std::string* (*source)()) { __infer_taint_sink(*(source()));