From 58cdefc118c6565decf2d8fc7a4994fc42e7609e Mon Sep 17 00:00:00 2001 From: Sungkeun Cho Date: Sun, 20 Jan 2019 20:51:58 -0800 Subject: [PATCH] [inferbo] Add strlen model Summary: It extends the abstract location for C string length, i.e., the first index of the null character in character array. Reviewed By: mbouaziz Differential Revision: D13634241 fbshipit-source-id: d2727d5f5 --- infer/src/IR/Typ.ml | 4 ++ infer/src/IR/Typ.mli | 5 ++ infer/src/bufferoverrun/absLoc.ml | 18 +++++- .../src/bufferoverrun/bufferOverrunChecker.ml | 7 ++- .../src/bufferoverrun/bufferOverrunDomain.ml | 56 +++++++++++++++---- infer/src/bufferoverrun/bufferOverrunField.ml | 2 + .../src/bufferoverrun/bufferOverrunModels.ml | 10 +++- infer/src/bufferoverrun/bufferOverrunUtils.ml | 13 +++++ .../src/bufferoverrun/bufferOverrunUtils.mli | 2 + .../c/bufferoverrun/array_content.c | 48 ++++++++++++++++ .../codetoanalyze/c/bufferoverrun/issues.exp | 4 +- .../c/bufferoverrun/pointer_arith.c | 11 +++- 12 files changed, 163 insertions(+), 17 deletions(-) diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 662f40d6c..182f72af2 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -260,6 +260,8 @@ let void = mk Tvoid let void_star = mk (Tptr (mk Tvoid, Pk_pointer)) +let uint = mk (Tint IUInt) + let get_ikind_opt {desc} = match desc with Tint ikind -> Some ikind | _ -> None (* TODO: size_t should be implementation-dependent. *) @@ -569,6 +571,8 @@ let is_int typ = match typ.desc with Tint _ -> true | _ -> false let is_unsigned_int typ = match typ.desc with Tint ikind -> ikind_is_unsigned ikind | _ -> false +let is_char typ = match typ.desc with Tint ikind -> ikind_is_char ikind | _ -> false + let has_block_prefix s = match Str.split_delim (Str.regexp_string Config.anonymous_block_prefix) s with | _ :: _ :: _ -> diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index 1ca6a43dd..37ead3360 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -136,6 +136,9 @@ val mk_array : ?default:t -> ?quals:type_quals -> ?length:IntLit.t -> ?stride:In val void : t (** void type *) +val uint : t +(** unsigned int type *) + val void_star : t (** void* type *) @@ -297,6 +300,8 @@ val is_int : t -> bool val is_unsigned_int : t -> bool +val is_char : t -> bool + val has_block_prefix : string -> bool val unsome : string -> t option -> t diff --git a/infer/src/bufferoverrun/absLoc.ml b/infer/src/bufferoverrun/absLoc.ml index 66cbb8d08..6a5d03817 100644 --- a/infer/src/bufferoverrun/absLoc.ml +++ b/infer/src/bufferoverrun/absLoc.ml @@ -162,6 +162,13 @@ module Loc = struct let is_var = function Var _ -> true | _ -> false + let is_c_strlen = function + | Field (_, fn) -> + Typ.Fieldname.equal fn BufferOverrunField.c_strlen + | _ -> + false + + let rec is_pretty = function | Var _ -> true @@ -175,6 +182,8 @@ module Loc = struct let of_allocsite a = Allocsite a + let of_c_strlen a = Field (of_allocsite a, BufferOverrunField.c_strlen) + let of_pvar pvar = Var (Var.of_pvar pvar) let of_id id = Var (Var.of_id id) @@ -202,6 +211,13 @@ module Loc = struct let is_literal_string = function Allocsite a -> Allocsite.is_literal_string a | _ -> None + let is_literal_string_strlen = function + | Field (l, fn) when Typ.Fieldname.equal BufferOverrunField.c_strlen fn -> + is_literal_string l + | _ -> + None + + let rec get_path = function | Var (LogicalVar _) -> None @@ -284,6 +300,6 @@ let can_strong_update : PowLoc.t -> bool = else match PowLoc.is_singleton_or_more ploc with | IContainer.Singleton loc -> - Loc.is_var loc + Loc.is_var loc || Loc.is_c_strlen loc | _ -> false diff --git a/infer/src/bufferoverrun/bufferOverrunChecker.ml b/infer/src/bufferoverrun/bufferOverrunChecker.ml index 7635567b3..a27c8ed12 100644 --- a/infer/src/bufferoverrun/bufferOverrunChecker.ml +++ b/infer/src/bufferoverrun/bufferOverrunChecker.ml @@ -177,7 +177,7 @@ module TransferFunctions = struct BoUtils.ModelEnv.mk_model_env pname ~node_hash location tenv integer_type_widths in BoUtils.Exec.decl_string model_env locs s mem - | Store (exp1, _, exp2, location) -> + | Store (exp1, typ, exp2, location) -> let locs = Sem.eval_locs exp1 mem in let v = Sem.eval integer_type_widths exp2 mem |> Dom.Val.add_assign_trace_elem location locs @@ -193,6 +193,11 @@ module TransferFunctions = struct Dom.Mem.store_relation locs sym_exps mem in let mem = Dom.Mem.update_mem locs v mem in + let mem = + if Language.curr_language_is Clang && Typ.is_char typ then + BoUtils.Exec.set_c_strlen ~tgt:(Sem.eval integer_type_widths exp1 mem) ~src:v mem + else mem + in let mem = if not v.represents_multiple_values then match PowLoc.is_singleton_or_more locs with diff --git a/infer/src/bufferoverrun/bufferOverrunDomain.ml b/infer/src/bufferoverrun/bufferOverrunDomain.ml index 2d3b884f2..4737de413 100644 --- a/infer/src/bufferoverrun/bufferOverrunDomain.ml +++ b/infer/src/bufferoverrun/bufferOverrunDomain.ml @@ -488,20 +488,24 @@ module Val = struct | Some s -> deref_of_literal_string s | None -> ( - match Loc.get_path l with - | None -> - L.d_printfln_escaped "Val.on_demand for %a -> no path" Loc.pp l ; - default - | Some path -> ( - match typ_of_param_path path with + match Loc.is_literal_string_strlen l with + | Some s -> + of_itv (Itv.of_int (String.length s)) + | None -> ( + match Loc.get_path l with | None -> - L.d_printfln_escaped "Val.on_demand for %a -> no type" Loc.pp l ; + L.d_printfln_escaped "Val.on_demand for %a -> no path" Loc.pp l ; default - | Some typ -> - L.d_printfln_escaped "Val.on_demand for %a" Loc.pp l ; - let may_last_field = may_last_field path in - let path = OndemandEnv.canonical_path typ_of_param_path path in - of_path tenv ~may_last_field integer_type_widths entry_location typ path ) ) + | Some path -> ( + match typ_of_param_path path with + | None -> + L.d_printfln_escaped "Val.on_demand for %a -> no type" Loc.pp l ; + default + | Some typ -> + L.d_printfln_escaped "Val.on_demand for %a" Loc.pp l ; + let may_last_field = may_last_field path in + let path = OndemandEnv.canonical_path typ_of_param_path path in + of_path tenv ~may_last_field integer_type_widths entry_location typ path ) ) ) module Itv = struct @@ -1174,6 +1178,22 @@ module MemReach = struct fun subst_map ~caller ~callee -> { caller with relation= Relation.instantiate subst_map ~caller:caller.relation ~callee:callee.relation } + + + (* unsound *) + let set_first_idx_of_null : Allocsite.t -> Val.t -> t -> t = + fun allocsite idx m -> update_mem (PowLoc.singleton (Loc.of_c_strlen allocsite)) idx m + + + (* unsound *) + let unset_first_idx_of_null : Allocsite.t -> Val.t -> t -> t = + fun allocsite idx m -> + let old_c_strlen = find_heap (Loc.of_c_strlen allocsite) m in + let idx_itv = Val.get_itv idx in + if Boolean.is_true (Itv.lt_sem idx_itv (Val.get_itv old_c_strlen)) then m + else + let new_c_strlen = Val.of_itv ~traces:(Val.get_traces idx) (Itv.incr idx_itv) in + set_first_idx_of_null allocsite new_c_strlen m end module Mem = struct @@ -1326,4 +1346,16 @@ module Mem = struct let unset_oenv = map ~f:MemReach.unset_oenv + + let set_first_idx_of_null allocsite idx = map ~f:(MemReach.set_first_idx_of_null allocsite idx) + + let unset_first_idx_of_null allocsite idx = + map ~f:(MemReach.unset_first_idx_of_null allocsite idx) + + + let get_c_strlen locs m = + let get_c_strlen' loc acc = + match loc with Loc.Allocsite a -> Val.join acc (find (Loc.of_c_strlen a) m) | _ -> acc + in + PowLoc.fold get_c_strlen' locs Val.bot end diff --git a/infer/src/bufferoverrun/bufferOverrunField.ml b/infer/src/bufferoverrun/bufferOverrunField.ml index e132e8b97..ab2523e8c 100644 --- a/infer/src/bufferoverrun/bufferOverrunField.ml +++ b/infer/src/bufferoverrun/bufferOverrunField.ml @@ -41,3 +41,5 @@ let mk, get_type = let java_collection_internal_array = mk "java.collection.$elements." Typ.(mk_array void) + +let c_strlen = mk "c.strlen" Typ.uint diff --git a/infer/src/bufferoverrun/bufferOverrunModels.ml b/infer/src/bufferoverrun/bufferOverrunModels.ml index a1999ffba..1196dc7b1 100644 --- a/infer/src/bufferoverrun/bufferOverrunModels.ml +++ b/infer/src/bufferoverrun/bufferOverrunModels.ml @@ -115,6 +115,14 @@ let memset arr_exp size_exp = {exec; check} +let strlen arr_exp = + let exec _ ~ret:(id, _) mem = + let v = Dom.Mem.get_c_strlen (Sem.eval_locs arr_exp mem) mem in + Dom.Mem.add_stack (Loc.of_id id) v mem + in + {exec; check= no_check} + + let realloc src_exp size_exp = let exec ({location; tenv; integer_type_widths} as model_env) ~ret:(id, _) mem = let size_exp = Prop.exp_normalize_noabs tenv Sil.sub_empty size_exp in @@ -548,7 +556,7 @@ module Call = struct ; -"realloc" <>$ capt_exp $+ capt_exp $+...$--> realloc ; -"__get_array_length" <>$ capt_exp $!--> get_array_length ; -"__set_array_length" <>$ capt_arg $+ capt_exp $!--> set_array_length - ; -"strlen" <>--> by_value Dom.Val.Itv.nat + ; -"strlen" <>$ capt_exp $!--> strlen ; -"memcpy" <>$ capt_exp $+ capt_exp $+ capt_exp $+...$--> memcpy ; -"memmove" <>$ capt_exp $+ capt_exp $+ capt_exp $+...$--> memcpy ; -"memset" <>$ capt_exp $+ any_arg $+ capt_exp $!--> memset diff --git a/infer/src/bufferoverrun/bufferOverrunUtils.ml b/infer/src/bufferoverrun/bufferOverrunUtils.ml index 85e7df5a7..ea78e25e5 100644 --- a/infer/src/bufferoverrun/bufferOverrunUtils.ml +++ b/infer/src/bufferoverrun/bufferOverrunUtils.ml @@ -187,8 +187,21 @@ module Exec = struct mem |> Dom.Mem.update_mem (PowLoc.singleton loc) v |> Dom.Mem.add_heap (Loc.of_allocsite allocsite) (Dom.Val.of_itv char_itv) + |> Dom.Mem.set_first_idx_of_null allocsite + (Dom.Val.of_itv ~traces (Itv.of_int (String.length s))) in PowLoc.fold decl locs mem + + + let set_c_strlen ~tgt ~src mem = + let traces = TraceSet.join (Dom.Val.get_traces tgt) (Dom.Val.get_traces src) in + let src_itv = Dom.Val.get_itv src in + let set_c_strlen1 allocsite arrinfo acc = + let idx = Dom.Val.of_itv ~traces (ArrayBlk.ArrInfo.offsetof arrinfo) in + if Itv.( <= ) ~lhs:Itv.zero ~rhs:src_itv then Dom.Mem.set_first_idx_of_null allocsite idx acc + else Dom.Mem.unset_first_idx_of_null allocsite idx acc + in + ArrayBlk.fold set_c_strlen1 (Dom.Val.get_array_blk tgt) mem end module Check = struct diff --git a/infer/src/bufferoverrun/bufferOverrunUtils.mli b/infer/src/bufferoverrun/bufferOverrunUtils.mli index 89c1dd025..7fde0affa 100644 --- a/infer/src/bufferoverrun/bufferOverrunUtils.mli +++ b/infer/src/bufferoverrun/bufferOverrunUtils.mli @@ -42,6 +42,8 @@ module Exec : sig val set_dyn_length : ModelEnv.model_env -> Typ.t -> PowLoc.t -> Itv.t -> Dom.Mem.t -> Dom.Mem.t val decl_string : ModelEnv.model_env -> PowLoc.t -> string -> Dom.Mem.t -> Dom.Mem.t + + val set_c_strlen : tgt:Dom.Val.t -> src:Dom.Val.t -> Dom.Mem.t -> Dom.Mem.t end module Check : sig diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/array_content.c b/infer/tests/codetoanalyze/c/bufferoverrun/array_content.c index 11e30a3a0..9e8d09162 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/array_content.c +++ b/infer/tests/codetoanalyze/c/bufferoverrun/array_content.c @@ -164,3 +164,51 @@ void call_literal_string_parameter2_Good() { } void call_literal_string_parameter2_Bad() { literal_string_parameter("hellp"); } + +void strlen_constant_Good() { + int a[10]; + char* s = "hello"; + a[strlen(s)] = 0; +} + +void strlen_constant_Bad() { + int a[5]; + char* s = "hello"; + a[strlen(s)] = 0; +} + +void strlen_malloc_Good() { + int a[10]; + char* s = (char*)malloc(sizeof(char) * 6); + s[0] = 'h'; + s[1] = 'e'; + s[2] = 'l'; + s[3] = 'l'; + s[4] = 'o'; + s[5] = '\0'; + a[strlen(s)] = 0; +} + +void strlen_malloc_Bad() { + int a[5]; + char* s = (char*)malloc(sizeof(char) * 6); + s[0] = 'h'; + s[1] = 'e'; + s[2] = 'l'; + s[3] = 'l'; + s[4] = 'o'; + s[5] = '\0'; + a[strlen(s)] = 0; +} + +void strlen_malloc_2_Good_FP() { + int a[5]; + char* s = (char*)malloc(sizeof(char) * 6); + s[0] = 'h'; + s[1] = 'e'; + s[2] = '\0'; + s[3] = 'l'; + s[4] = 'o'; + s[5] = '\0'; + a[strlen(s)] = 0; +} diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp index cf21877f9..b9f144844 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp +++ b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp @@ -54,6 +54,9 @@ codetoanalyze/c/bufferoverrun/array_content.c, check_sorted_arr_good_FP, 2, COND codetoanalyze/c/bufferoverrun/array_content.c, check_sorted_ptr_good_FP, 2, CONDITION_ALWAYS_FALSE, no_bucket, WARNING, [Here] codetoanalyze/c/bufferoverrun/array_content.c, literal_string_Good, 4, CONDITION_ALWAYS_FALSE, no_bucket, WARNING, [Here] codetoanalyze/c/bufferoverrun/array_content.c, literal_string_bad, 5, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Array declaration,Array access: Offset: [0, 111] Size: 1] +codetoanalyze/c/bufferoverrun/array_content.c, strlen_constant_Bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,,Array declaration,Array access: Offset: 5 Size: 5] +codetoanalyze/c/bufferoverrun/array_content.c, strlen_malloc_2_Good_FP, 9, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: 5 Size: 5] +codetoanalyze/c/bufferoverrun/array_content.c, strlen_malloc_Bad, 9, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: 5 Size: 5] codetoanalyze/c/bufferoverrun/array_content.c, strong_update_malloc_Bad, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: 10 Size: 10] codetoanalyze/c/bufferoverrun/array_content.c, weak_update_Bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Array access: Offset: 15 Size: 10] codetoanalyze/c/bufferoverrun/array_content.c, weak_update_Good_FP, 2, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: [0, 15] Size: 10] @@ -200,7 +203,6 @@ codetoanalyze/c/bufferoverrun/nested_loop.c, nested_loop_narrowing_Good, 5, COND codetoanalyze/c/bufferoverrun/nested_loop_with_label.c, nested_loop_with_label, 4, CONDITION_ALWAYS_TRUE, no_bucket, WARNING, [Here] codetoanalyze/c/bufferoverrun/nested_loop_with_label.c, nested_loop_with_label, 6, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: [0, 10] Size: 10] codetoanalyze/c/bufferoverrun/nested_loop_with_label.c, nested_loop_with_label, 7, CONDITION_ALWAYS_TRUE, no_bucket, WARNING, [Here] -codetoanalyze/c/bufferoverrun/pointer_arith.c, FP_pointer_arith5_Ok, 5, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Assignment,,Array declaration,Array access: Offset: [3, 2043] (⇐ [0, 1020] + [3, 1023]) Size: 1024] codetoanalyze/c/bufferoverrun/pointer_arith.c, array_pointer_arith_Bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Assignment,Array access: Offset: 10 (⇐ 5 + 5) Size: 10] codetoanalyze/c/bufferoverrun/pointer_arith.c, call_pointer_arith3_Bad, 2, BUFFER_OVERRUN_L1, no_bucket, ERROR, [Array declaration,Call,,Parameter `x`,,Parameter `*p`,Array access: Offset: 10 (⇐ 100 + -90) Size: 5 by call to `pointer_arith3` ] codetoanalyze/c/bufferoverrun/pointer_arith.c, call_pointer_arith4_Bad, 2, BUFFER_OVERRUN_L1, no_bucket, ERROR, [Assignment,Call,,Parameter `*x`,,Array declaration,Array access: Offset: 10 (⇐ 100 + -90) Size: 5 by call to `FN_pointer_arith4_Bad` ] diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/pointer_arith.c b/infer/tests/codetoanalyze/c/bufferoverrun/pointer_arith.c index 6ba2478af..b09fc733a 100644 --- a/infer/tests/codetoanalyze/c/bufferoverrun/pointer_arith.c +++ b/infer/tests/codetoanalyze/c/bufferoverrun/pointer_arith.c @@ -60,7 +60,7 @@ void call_pointer_arith4_Bad() { #include #include -void FP_pointer_arith5_Ok() { +void pointer_arith5_Ok() { char buf[1024]; fgets(buf, 1024, stdin); size_t len = strlen(buf); @@ -68,3 +68,12 @@ void FP_pointer_arith5_Ok() { (buf + len)[sizeof(buf) - len - 1] = '\0'; } } + +void FN_pointer_arith5_Bad() { + char buf[1024]; + fgets(buf, 1024, stdin); + size_t len = strlen(buf); + if (len < sizeof(buf) - 3) { + (buf + len)[sizeof(buf) - len] = '\0'; + } +}