From f0028669eea31e622d160501b876d0e3b3252ed2 Mon Sep 17 00:00:00 2001 From: Sam Blackshear Date: Sat, 20 Aug 2016 08:16:06 -0700 Subject: [PATCH] propagate tainted sources Reviewed By: mbouaziz Differential Revision: D3723878 fbshipit-source-id: 6774e0b --- infer/src/checkers/accessPath.ml | 7 ++- infer/src/checkers/accessPath.mli | 5 +- infer/src/quandary/TaintAnalysis.ml | 94 +++++++++++++++++++++++++++-- infer/src/unit/TaintTests.ml | 84 ++++++++++++++++++++++++++ infer/src/unit/accessPathTests.ml | 4 +- infer/src/unit/analyzerTester.ml | 2 + 6 files changed, 183 insertions(+), 13 deletions(-) diff --git a/infer/src/checkers/accessPath.ml b/infer/src/checkers/accessPath.ml index 7fc69816b..5e4b6bd84 100644 --- a/infer/src/checkers/accessPath.ml +++ b/infer/src/checkers/accessPath.ml @@ -103,8 +103,8 @@ let of_exp exp typ ~(f_resolve_id : Ident.t -> raw option) = None in of_exp_ exp typ [] -let append (base, accesses) access = - base, accesses @ [access] +let append (base, old_accesses) new_accesses = + base, old_accesses @ new_accesses let rec is_prefix_path path1 path2 = if path1 == path2 @@ -143,7 +143,8 @@ let pp_access fmt = function let pp_raw fmt (base, accesses) = let pp_access_list fmt accesses = - F.pp_print_list pp_access fmt accesses in + let pp_sep _ _ = () in + F.pp_print_list ~pp_sep pp_access fmt accesses in F.fprintf fmt "%a%a" pp_base base pp_access_list accesses let pp fmt = function diff --git a/infer/src/checkers/accessPath.mli b/infer/src/checkers/accessPath.mli index e0ffbf840..1b0c005c5 100644 --- a/infer/src/checkers/accessPath.mli +++ b/infer/src/checkers/accessPath.mli @@ -50,8 +50,9 @@ val of_id : Ident.t -> Typ.t -> raw (** convert [exp] to a raw access path, resolving identifiers using [f_resolve_id] *) val of_exp : Exp.t -> Typ.t -> f_resolve_id:(Ident.t -> raw option) -> raw option -(** append a new access to an existing access path; e.g., `append_access g x.f` produces `x.f.g` *) -val append : raw -> access -> raw +(** append new accesses to an existing access path; e.g., `append_access x.f [g, h]` produces + `x.f.g.h` *) +val append : raw -> access list -> raw (** return true if [ap1] is a prefix of [ap2]. returns true for equal access paths *) val is_prefix : raw -> raw -> bool diff --git a/infer/src/quandary/TaintAnalysis.ml b/infer/src/quandary/TaintAnalysis.ml index 3e04c35e7..2fea1ecf0 100644 --- a/infer/src/quandary/TaintAnalysis.ml +++ b/infer/src/quandary/TaintAnalysis.ml @@ -65,22 +65,106 @@ module Make (TraceDomain : Trace.S) = struct type formal_list = AccessPath.base list type extras = formal_list + let is_formal base proc_data = + IList.exists (AccessPath.base_equal base) proc_data.ProcData.extras + + let is_rooted_in_formal ap proc_data = + let root, _ = AccessPath.extract ap in + is_formal root proc_data + + let resolve_id id_map id = + try Some (IdMapDomain.find id id_map) + with Not_found -> None + + (* get the node associated with [access_path] in [access_tree] *) + let access_path_get_node access_path access_tree proc_data loc = + match TaintDomain.get_node access_path access_tree with + | Some _ as node_opt -> + node_opt + | None when is_rooted_in_formal access_path proc_data -> + let call_site = CallSite.make (Cfg.Procdesc.get_proc_name proc_data.ProcData.pdesc) loc in + let trace = + TraceDomain.of_source (TraceDomain.Source.make_footprint access_path call_site) in + Some (TaintDomain.make_normal_leaf trace) + | None -> + None + + (* get the node associated with [exp] in [access_tree] *) + let exp_get_node exp typ { Domain.access_tree; id_map; } proc_data loc = + let f_resolve_id = resolve_id id_map in + match AccessPath.of_exp exp typ ~f_resolve_id with + | Some access_path -> + access_path_get_node (AccessPath.Exact access_path) access_tree proc_data loc + | None -> + (* can't make an access path from [exp] *) + None + + let analyze_assignment lhs_access_path rhs_exp rhs_typ astate proc_data loc = + let rhs_node = + match exp_get_node rhs_exp rhs_typ astate proc_data loc with + | Some node -> node + | None -> TaintDomain.empty_node in + let access_tree = TaintDomain.add_node lhs_access_path rhs_node astate.Domain.access_tree in + { astate with Domain.access_tree; } + let add_source source ret_id ret_typ access_tree = let trace = TraceDomain.of_source source in let id_ap = AccessPath.Exact (AccessPath.of_id ret_id ret_typ) in TaintDomain.add_trace id_ap trace access_tree - let exec_instr ({ Domain.access_tree; } as astate) proc_data _ instr = + let exec_instr ({ Domain.access_tree; id_map; } as astate) proc_data _ instr = + let f_resolve_id = resolve_id id_map in match instr with + | Sil.Letderef (lhs_id, rhs_exp, rhs_typ, _loc) -> + begin + match AccessPath.of_exp rhs_exp rhs_typ ~f_resolve_id with + | Some rhs_access_path -> + let id_map' = IdMapDomain.add lhs_id rhs_access_path id_map in + { astate with Domain.id_map = id_map'; } + | None -> + astate + end + | Sil.Set (lhs_exp, lhs_typ, rhs_exp, loc) -> + let lhs_access_path = + match AccessPath.of_exp lhs_exp lhs_typ ~f_resolve_id with + | Some access_path -> + access_path + | None -> + failwithf + "Assignment to unexpected lhs expression %a in proc %a at loc %a" + (Sil.pp_exp pe_text) lhs_exp + Procname.pp (Cfg.Procdesc.get_proc_name (proc_data.ProcData.pdesc)) + Location.pp loc in + let astate' = + analyze_assignment + (AccessPath.Exact lhs_access_path) rhs_exp lhs_typ astate proc_data loc in + begin + (* direct `exp = id` assignments are treated specially; we update the id map too. this + is so future reads of `exp` will get the subtree associated with `id` (needed to + handle the `id = foo(); exp = id case` and similar). *) + match rhs_exp with + | Exp.Var rhs_id -> + let existing_accesses = + try snd (IdMapDomain.find rhs_id astate'.Domain.id_map) + with Not_found -> [] in + let lhs_ap' = AccessPath.append lhs_access_path existing_accesses in + let id_map' = IdMapDomain.add rhs_id lhs_ap' astate'.Domain.id_map in + { astate' with Domain.id_map = id_map'; } + | _ -> + astate' + end | Sil.Call (ret_ids, Const (Cfun callee_pname), _, callee_loc, _) -> let call_site = CallSite.make callee_pname callee_loc in let ret_typ = match callee_pname with | Procname.Java java_pname -> let ret_typ_str = Procname.java_get_return_type java_pname in - Option.default - Typ.Tvoid - (Tenv.lookup_java_typ_from_string (proc_data.ProcData.tenv) ret_typ_str) + begin + match Tenv.lookup_java_typ_from_string (proc_data.ProcData.tenv) ret_typ_str with + | Some (Typ.Tstruct _ as typ) -> Typ.Tptr (typ, Typ.Pk_pointer) + | Some typ -> typ + | None -> Typ.Tvoid + end | Procname.C _ -> Typ.Tvoid (* for tests only, since tests use C-style procnames *) | _ -> @@ -99,8 +183,6 @@ module Make (TraceDomain : Trace.S) = struct astate_with_source | Sil.Call _ -> failwith "Unimp: non-pname call expressions" - | Sil.Letderef _ | Set _ -> - failwith "Unimp: assignment, load, and store" | Sil.Prune _ | Remove_temps _ | Nullify _ | Abstract _ | Stackop _ | Declare_locals _ -> astate end diff --git a/infer/src/unit/TaintTests.ml b/infer/src/unit/TaintTests.ml index 95d5746ae..005e6fab3 100644 --- a/infer/src/unit/TaintTests.ml +++ b/infer/src/unit/TaintTests.ml @@ -112,6 +112,11 @@ let tests = let assign_to_non_source ret_str = let procname = Procname.from_string_c_fun "NON-SOURCE" in make_call ~procname [ident_of_str ret_str] [] in + let assign_id_to_field root_str fld_str rhs_id_str = + let rhs_exp = Exp.Var (ident_of_str rhs_id_str) in + make_store ~rhs_typ:Typ.Tvoid (Exp.Var (ident_of_str root_str)) fld_str ~rhs_exp in + let read_field_to_id lhs_id_str root_str fld_str = + make_load ~rhs_typ:Typ.Tvoid lhs_id_str fld_str (Exp.Var (ident_of_str root_str)) in let assert_empty = invariant "{ }" in let test_list = [ "source recorded", @@ -124,5 +129,84 @@ let tests = assign_to_non_source "ret_id"; assert_empty; ]; + "source flows to var", + [ + assign_to_source "ret_id"; + var_assign_id "var" "ret_id"; + invariant "{ ret_id$0 => (SOURCE -> ?), &var => (SOURCE -> ?) }"; + ]; + "source flows to field", + [ + assign_to_source "ret_id"; + assign_id_to_field "base_id" "f" "ret_id"; + invariant "{ base_id$0.f => (SOURCE -> ?), ret_id$0 => (SOURCE -> ?) }"; + ]; + "source flows to field then var", + [ + assign_to_source "ret_id"; + assign_id_to_field "base_id" "f" "ret_id"; + read_field_to_id "read_id" "base_id" "f"; + var_assign_id "var" "read_id"; + invariant + "{ base_id$0.f => (SOURCE -> ?), ret_id$0 => (SOURCE -> ?), &var => (SOURCE -> ?) }"; + ]; + "source flows to var then cleared", + [ + assign_to_source "ret_id"; + var_assign_id "var" "ret_id"; + invariant "{ ret_id$0 => (SOURCE -> ?), &var => (SOURCE -> ?) }"; + assign_to_non_source "non_source_id"; + var_assign_id "var" "non_source_id"; + invariant "{ ret_id$0 => (SOURCE -> ?) }"; + ]; + "source flows to field then cleared", + [ + assign_to_source "ret_id"; + assign_id_to_field "base_id" "f" "ret_id"; + invariant "{ base_id$0.f => (SOURCE -> ?), ret_id$0 => (SOURCE -> ?) }"; + assign_to_non_source "non_source_id"; + assign_id_to_field "base_id" "f" "non_source_id"; + invariant "{ ret_id$0 => (SOURCE -> ?) }"; + ]; + "var id alias test", + [ + assign_to_non_source "ret_id"; + var_assign_id "var1" "ret_id"; + assign_to_source "source_id"; + assign_id_to_field "ret_id" "f" "source_id"; + invariant "{ source_id$0 => (SOURCE -> ?), &var1.f => (SOURCE -> ?) }"; + read_field_to_id "read_id" "ret_id" "f"; + invariant "{ source_id$0 => (SOURCE -> ?), &var1.f => (SOURCE -> ?) }"; + var_assign_id "var2" "read_id"; + invariant + "{ source_id$0 => (SOURCE -> ?), &var1.f => (SOURCE -> ?), &var2 => (SOURCE -> ?) }"; + ]; + "field id alias test1", + [ + assign_to_non_source "ret_id"; + assign_to_source "source_id"; + assign_id_to_field "ret_id" "g" "source_id"; + assign_to_non_source "var_id"; + var_assign_id "var" "var_id"; + assign_id_to_field "var_id" "f" "ret_id"; + invariant + "{ ret_id$0.g => (SOURCE -> ?), source_id$0 => (SOURCE -> ?), &var.f.g => (SOURCE -> ?) }"; + ]; + "field id alias test2", + [ + assign_to_non_source "ret_id"; + read_field_to_id "g_id" "ret_id" "g"; + var_assign_id "var1" "g_id"; + assign_to_source "source_id"; + invariant "{ source_id$0 => (SOURCE -> ?) }"; + assign_id_to_field "g_id" "f" "source_id"; + invariant "{ source_id$0 => (SOURCE -> ?), &var1.g.f => (SOURCE -> ?) }"; + id_assign_var "var_id" "var1"; + read_field_to_id "var_g_id" "var_id" "g"; + read_field_to_id "var_g_f_id" "var_g_id" "f"; + var_assign_id "var2" "var_g_f_id"; + invariant + "{ source_id$0 => (SOURCE -> ?), &var1.g.f => (SOURCE -> ?), &var2 => (SOURCE -> ?) }"; + ]; ] |> TestInterpreter.create_tests ~pp_opt:pp_sparse [] in "taint_test_suite">:::test_list diff --git a/infer/src/unit/accessPathTests.ml b/infer/src/unit/accessPathTests.ml index 7f5c39a16..b9c31f3c5 100644 --- a/infer/src/unit/accessPathTests.ml +++ b/infer/src/unit/accessPathTests.ml @@ -55,8 +55,8 @@ let tests = let assert_eq input expected = assert_equal ~cmp:AccessPath.raw_equal ~pp_diff input expected in let append_test_ _ = - assert_eq xF (AccessPath.append x f_access); - assert_eq xFG (AccessPath.append xF g_access) in + assert_eq xF (AccessPath.append x [f_access]); + assert_eq xFG (AccessPath.append xF [g_access]) in "append">::append_test_ in let prefix_test = diff --git a/infer/src/unit/analyzerTester.ml b/infer/src/unit/analyzerTester.ml index 2af439330..b2468b486 100644 --- a/infer/src/unit/analyzerTester.ml +++ b/infer/src/unit/analyzerTester.ml @@ -248,6 +248,8 @@ module Make let post_str = try let state = M.find node_id inv_map in + (*L.stderr "post is %a@." I.A.pp state.post; + L.stderr "and with sparse pringer: %a@." pp_state state.post;*) pp_to_string pp_state state.post with Not_found -> "_|_" in if inv_str <> post_str then