diff --git a/infer/src/bufferoverrun/bufferOverrunDomain.ml b/infer/src/bufferoverrun/bufferOverrunDomain.ml index 9bc2a0037..3fd70ddb3 100644 --- a/infer/src/bufferoverrun/bufferOverrunDomain.ml +++ b/infer/src/bufferoverrun/bufferOverrunDomain.ml @@ -411,6 +411,10 @@ module Val = struct let prune_ge_one : t -> t = lift_prune1 Itv.prune_ge_one + let prune_length_lt : t -> Itv.t -> t = + fun x y -> lift_prune_length1 (fun x -> Itv.prune_lt x y) x + + let prune_length_eq : t -> Itv.t -> t = fun x y -> lift_prune_length1 (fun x -> Itv.prune_eq x y) x @@ -833,27 +837,36 @@ module AliasTarget = struct (* Relations between values of logical variables(registers) and program variables "Simple relation": Since Sil distinguishes logical and program variables, we need a relation for - pruning values of program variables. For example, a C statement "if(x){...}" is translated to - "%r=load(x); if(%r){...}" in Sil. At the load statement, we record the alias between the - values of %r and x, then we can prune not only the value of %r, but also that of x inside the - if branch. The java_tmp field is an additional slot for keeping one more alias of temporary - variable in Java. The i field is to express "%r=load(x)+i". + pruning values of program variables. For example, a C statement [if(x){...}] is translated to + [%r=load(x); if(%r){...}] in Sil. At the load statement, we record the alias between the + values of [%r] and [x], then we can prune not only the value of [%r], but also that of [x] + inside the if branch. The [java_tmp] field is an additional slot for keeping one more alias of + temporary variable in Java. The [i] field is to express [%r=load(x)+i]. - "Empty relation": For pruning vector.length with vector::empty() results, we adopt a specific - relation between %r and v->elements, where %r=v.empty(). So, if %r!=0, v's array length - ([v->elements->length]) is pruned by 0. On the other hand, if %r==0, v's array length is - pruned by >=1. + "Empty relation": For pruning [vector.length] with [vector::empty()] results, we adopt a specific + relation between [%r] and [v->elements], where [%r=v.empty()]. So, if [%r!=0], [v]'s array + length ([v->elements->length]) is pruned by [=0]. On the other hand, if [%r==0], [v]'s array + length is pruned by [>=1]. "Size relation": This is for pruning vector's length. When there is a function call, - %r=x.size(), the alias target for %r becomes AliasTarget.size {l=x.elements}. The java_tmp - field is an additional slot for keeping one more alias of temporary variable in Java. The i - field is to express "%r=x.size()+i", which is required to follow the semantics of `Array.add` - inside loops precisely. *) + [%r=x.size()], the alias target for [%r] becomes [AliasTarget.size {l=x.elements}]. The + [java_tmp] field is an additional slot for keeping one more alias of temporary variable in + Java. The [i] field is to express [%r=x.size()+i], which is required to follow the semantics + of [Array.add] inside loops precisely. + + "Iterator offset relation": This is for tracking a relation between an iterator offset and a + length of array. If [%r] has an alias to [IteratorOffset {l; i}], which means that [%r's + iterator offset] is same to [length(l)+i]. + + "HasNext relation": This is for tracking return values of the [hasNext] function. If [%r] has an + alias to [HasNext {l}], which means that [%r] is a [hasNext] results of the iterator [l]. *) type t = | Simple of {l: Loc.t; i: IntLit.t; java_tmp: Loc.t option} | Empty of Loc.t | Size of {alias_typ: alias_typ; l: Loc.t; i: IntLit.t; java_tmp: Loc.t option} | Fgets of Loc.t + | IteratorOffset of {l: Loc.t; i: IntLit.t; java_tmp: Loc.t option} + | IteratorHasNext of {l: Loc.t; java_tmp: Loc.t option} | Top [@@deriving compare] @@ -876,6 +889,11 @@ module AliasTarget = struct Loc.pp l pp_intlit i | Fgets l -> F.fprintf fmt "%t=fgets(%a)" pp_key Loc.pp l + | IteratorOffset {l; i; java_tmp} -> + F.fprintf fmt "iterator offset(%t%a)=length(%a)%a" pp_key pp_java_tmp java_tmp Loc.pp l + pp_intlit i + | IteratorHasNext {l; java_tmp} -> + F.fprintf fmt "%t%a=hasNext(%a)" pp_key pp_java_tmp java_tmp Loc.pp l | Top -> F.fprintf fmt "%t=?" pp_key @@ -885,9 +903,17 @@ module AliasTarget = struct let fgets l = Fgets l let get_locs = function - | Simple {l; java_tmp= Some tmp} | Size {l; java_tmp= Some tmp} -> + | Simple {l; java_tmp= Some tmp} + | Size {l; java_tmp= Some tmp} + | IteratorOffset {l; java_tmp= Some tmp} + | IteratorHasNext {l; java_tmp= Some tmp} -> PowLoc.singleton l |> PowLoc.add tmp - | Simple {l; java_tmp= None} | Size {l; java_tmp= None} | Empty l | Fgets l -> + | Simple {l; java_tmp= None} + | Size {l; java_tmp= None} + | Empty l + | Fgets l + | IteratorOffset {l; java_tmp= None} + | IteratorHasNext {l; java_tmp= None} -> PowLoc.singleton l | Top -> PowLoc.empty @@ -907,6 +933,12 @@ module AliasTarget = struct Option.map (f l) ~f:(fun l -> Size {alias_typ; l; i; java_tmp}) | Fgets l -> Option.map (f l) ~f:(fun l -> Fgets l) + | IteratorOffset {l; i; java_tmp} -> + let java_tmp = Option.bind java_tmp ~f in + Option.map (f l) ~f:(fun l -> IteratorOffset {l; i; java_tmp}) + | IteratorHasNext {l; java_tmp} -> + let java_tmp = Option.bind java_tmp ~f in + Option.map (f l) ~f:(fun l -> IteratorHasNext {l; java_tmp}) | Top -> Some Top @@ -956,6 +988,8 @@ module AliasTarget = struct match x with | Size {alias_typ; l; i} when Loc.equal l loc -> Size {alias_typ; l; i= IntLit.(add i minus_one); java_tmp= None} + | IteratorOffset {l; i; java_tmp} when Loc.equal l loc -> + IteratorOffset {l; i= IntLit.(add i minus_one); java_tmp} | _ -> x end @@ -1009,6 +1043,10 @@ module AliasMap = struct match find_opt (Key.LocKey loc) x with | Some (AliasTarget.Size a) -> Some (AliasTarget.Size {a with java_tmp= Some loc}) + | Some (AliasTarget.IteratorOffset a) -> + Some (AliasTarget.IteratorOffset {a with java_tmp= Some loc}) + | Some (AliasTarget.IteratorHasNext a) -> + Some (AliasTarget.IteratorHasNext {a with java_tmp= Some loc}) | _ as alias -> alias @@ -1061,6 +1099,30 @@ module AliasMap = struct add (Key.of_loc loc) (AliasTarget.Size {alias_typ; l; i= IntLit.add i n; java_tmp= None}) x | _ -> x + + + let add_iterator_offset_alias id arr x = + add (Key.of_id id) (AliasTarget.IteratorOffset {l= arr; i= IntLit.zero; java_tmp= None}) x + + + let incr_iterator_offset_alias id x = + match find_opt (Key.of_id id) x with + | Some (AliasTarget.IteratorOffset ({i; java_tmp} as tgt)) -> + let i = IntLit.(add i one) in + let x = add (Key.of_id id) (AliasTarget.IteratorOffset {tgt with i}) x in + Option.value_map java_tmp ~default:x ~f:(fun java_tmp -> + add (Key.of_loc java_tmp) (AliasTarget.IteratorOffset {tgt with i; java_tmp= None}) x + ) + | _ -> + x + + + let add_iterator_has_next_alias ~ret_id ~iterator x = + match find_opt (Key.of_id iterator) x with + | Some (AliasTarget.IteratorOffset {java_tmp= Some java_tmp}) -> + add (Key.of_id ret_id) (AliasTarget.IteratorHasNext {l= java_tmp; java_tmp= None}) x + | _ -> + x end module AliasRet = struct @@ -1102,6 +1164,8 @@ module Alias = struct let find_id : Ident.t -> t -> AliasTarget.t option = fun x -> bind_map (AliasMap.find_id x) + let find_loc : Loc.t -> t -> AliasTarget.t option = fun x -> bind_map (AliasMap.find_loc x) + let find_ret : t -> AliasTarget.t option = fun x -> AliasRet.get x.ret let load : Ident.t -> AliasTarget.t -> t -> t = fun id loc -> lift_map (AliasMap.load id loc) @@ -1159,6 +1223,28 @@ module Alias = struct a + let add_iterator_offset_alias : Ident.t -> PowLoc.t -> t -> t = + fun id arr_locs a -> + match PowLoc.is_singleton_or_more arr_locs with + | IContainer.Singleton arr_loc -> + lift_map (AliasMap.add_iterator_offset_alias id arr_loc) a + | More -> + (* NOTE: Keeping only one alias here is suboptimal, but current alias domain can keep one + alias for each ident, which will be extended later. *) + let arr_loc = PowLoc.min_elt arr_locs in + lift_map (AliasMap.add_iterator_offset_alias id arr_loc) a + | Empty -> + a + + + let incr_iterator_offset_alias : Ident.t -> t -> t = + fun id a -> lift_map (AliasMap.incr_iterator_offset_alias id) a + + + let add_iterator_has_next_alias : ret_id:Ident.t -> iterator:Ident.t -> t -> t = + fun ~ret_id ~iterator a -> lift_map (AliasMap.add_iterator_has_next_alias ~ret_id ~iterator) a + + let remove_temp : Ident.t -> t -> t = fun temp -> lift_map (AliasMap.remove (AliasMap.Key.of_id temp)) end @@ -1646,12 +1732,14 @@ module MemReach = struct let find_alias_id : Ident.t -> _ t0 -> AliasTarget.t option = fun k m -> Alias.find_id k m.alias + let find_alias_loc : Loc.t -> _ t0 -> AliasTarget.t option = fun k m -> Alias.find_loc k m.alias + let find_simple_alias : Ident.t -> _ t0 -> (Loc.t * IntLit.t option) option = fun k m -> match Alias.find_id k m.alias with | Some (AliasTarget.Simple {l; i}) -> Some (l, if IntLit.iszero i then None else Some i) - | Some (AliasTarget.Empty _ | AliasTarget.Size _ | AliasTarget.Fgets _ | AliasTarget.Top) + | Some AliasTarget.(Empty _ | Size _ | Fgets _ | IteratorOffset _ | IteratorHasNext _ | Top) | None -> None @@ -1692,6 +1780,20 @@ module MemReach = struct let incr_size_alias locs m = {m with alias= Alias.incr_size_alias locs m.alias} + let add_iterator_offset_alias id m = + let arr_locs = + let add_arr l v acc = if Itv.is_zero (Val.array_sizeof v) then PowLoc.add l acc else acc in + MemPure.fold add_arr m.mem_pure PowLoc.empty + in + {m with alias= Alias.add_iterator_offset_alias id arr_locs m.alias} + + + let incr_iterator_offset_alias id m = {m with alias= Alias.incr_iterator_offset_alias id m.alias} + + let add_iterator_has_next_alias ~ret_id ~iterator m = + {m with alias= Alias.add_iterator_has_next_alias ~ret_id ~iterator m.alias} + + let add_stack_loc : Loc.t -> t -> t = fun k m -> {m with stack_locs= StackLocs.add k m.stack_locs} let add_stack : ?represents_multiple_values:bool -> Loc.t -> Val.t -> t -> t = @@ -2032,6 +2134,10 @@ module Mem = struct fun k -> f_lift_default ~default:None (MemReach.find_alias_id k) + let find_alias_loc : Loc.t -> _ t0 -> AliasTarget.t option = + fun k -> f_lift_default ~default:None (MemReach.find_alias_loc k) + + let find_simple_alias : Ident.t -> _ t0 -> (Loc.t * IntLit.t option) option = fun k -> f_lift_default ~default:None (MemReach.find_simple_alias k) @@ -2071,6 +2177,24 @@ module Mem = struct let incr_size_alias locs = map ~f:(MemReach.incr_size_alias locs) + let add_iterator_offset_alias : Ident.t -> t -> t = + fun id -> map ~f:(MemReach.add_iterator_offset_alias id) + + + let incr_iterator_offset_alias : Exp.t -> t -> t = + fun iterator m -> + match iterator with Exp.Var id -> map ~f:(MemReach.incr_iterator_offset_alias id) m | _ -> m + + + let add_iterator_has_next_alias : Ident.t -> Exp.t -> t -> t = + fun ret_id iterator m -> + match iterator with + | Exp.Var iterator -> + map ~f:(MemReach.add_iterator_has_next_alias ~ret_id ~iterator) m + | _ -> + m + + let add_stack_loc : Loc.t -> t -> t = fun k -> map ~f:(MemReach.add_stack_loc k) let add_stack : ?represents_multiple_values:bool -> Loc.t -> Val.t -> t -> t = diff --git a/infer/src/bufferoverrun/bufferOverrunModels.ml b/infer/src/bufferoverrun/bufferOverrunModels.ml index a4850d591..ec5ead80b 100644 --- a/infer/src/bufferoverrun/bufferOverrunModels.ml +++ b/infer/src/bufferoverrun/bufferOverrunModels.ml @@ -889,7 +889,7 @@ module Collection = struct let iterator coll_exp = let exec {integer_type_widths} ~ret:(ret_id, _) mem = let itr = Sem.eval integer_type_widths coll_exp mem in - model_by_value itr ret_id mem + model_by_value itr ret_id mem |> Dom.Mem.add_iterator_offset_alias ret_id in {exec; check= no_check} @@ -918,6 +918,7 @@ module Collection = struct |> Dom.Val.get_iterator_itv |> Dom.Val.set_itv_updated_by_addition in model_by_value collection_size ret_id mem + |> Dom.Mem.add_iterator_has_next_alias ret_id iterator in {exec; check= no_check} @@ -927,6 +928,7 @@ module Collection = struct let traces = Sem.eval integer_type_widths iterator mem |> Dom.Val.get_traces in let locs = eval_collection_internal_array_locs iterator mem in model_by_value (Dom.Val.of_pow_loc ~traces locs) id mem + |> Dom.Mem.incr_iterator_offset_alias iterator in {exec; check= no_check} diff --git a/infer/src/bufferoverrun/bufferOverrunSemantics.ml b/infer/src/bufferoverrun/bufferOverrunSemantics.ml index b1189f9a0..06f9b91d8 100644 --- a/infer/src/bufferoverrun/bufferOverrunSemantics.ml +++ b/infer/src/bufferoverrun/bufferOverrunSemantics.ml @@ -317,11 +317,8 @@ let rec eval_arr : Typ.IntegerWidths.t -> Exp.t -> Mem.t -> Val.t = | Some (AliasTarget.Simple {l= loc; i}) when IntLit.iszero i -> Mem.find loc mem | Some - ( AliasTarget.Simple _ - | AliasTarget.Size _ - | AliasTarget.Empty _ - | AliasTarget.Fgets _ - | Top ) + AliasTarget.( + Simple _ | Size _ | Empty _ | Fgets _ | IteratorOffset _ | IteratorHasNext _ | Top) | None -> Val.bot ) | Exp.Lvar pvar -> @@ -514,12 +511,31 @@ let eval_array_locs_length arr_locs mem = module Prune = struct type t = {prune_pairs: PrunePairs.t; mem: Mem.t} + let collection_length_of_iterator loc mem = + let arr_locs = + Mem.find loc mem |> Val.get_all_locs + |> PowLoc.append_field ~fn:BufferOverrunField.java_collection_internal_array + in + eval_array_locs_length arr_locs mem + + let update_mem_in_prune lv v ?(pruning_exp = PruningExp.Unknown) {prune_pairs; mem} = let prune_pairs = PrunePairs.add lv (PrunedVal.make v pruning_exp) prune_pairs in let mem = Mem.update_mem (PowLoc.singleton lv) v mem in {prune_pairs; mem} + let prune_has_next ~true_branch iterator ({mem} as astate) = + match Mem.find_alias_loc iterator mem with + | Some (IteratorOffset {l= arr_loc; i}) when IntLit.(eq i zero) -> + let length = collection_length_of_iterator iterator mem |> Val.get_itv in + let v = Mem.find arr_loc mem in + let v' = (if true_branch then Val.prune_length_lt else Val.prune_length_eq) v length in + update_mem_in_prune arr_loc v' astate + | _ -> + astate + + let prune_unop : Exp.t -> t -> t = fun e ({mem} as astate) -> match e with @@ -537,7 +553,9 @@ module Prune = struct let strlen_loc = Loc.of_c_strlen lv in let v' = Val.prune_ge_one (Mem.find strlen_loc mem) in update_mem_in_prune strlen_loc v' astate - | Some (AliasTarget.Simple _ | AliasTarget.Size _ | Top) | None -> + | Some (IteratorHasNext {l= iterator}) -> + prune_has_next ~true_branch:true iterator astate + | Some AliasTarget.(Simple _ | Size _ | IteratorOffset _ | Top) | None -> astate ) | Exp.UnOp (Unop.LNot, Exp.Var x, _) -> ( match Mem.find_alias_id x mem with @@ -549,7 +567,9 @@ module Prune = struct let v = Mem.find lv mem in let v' = Val.prune_length_ge_one v in update_mem_in_prune lv v' astate - | Some (AliasTarget.Simple _ | AliasTarget.Size _ | AliasTarget.Fgets _ | Top) | None -> + | Some (IteratorHasNext {l= iterator}) -> + prune_has_next ~true_branch:false iterator astate + | Some AliasTarget.(Simple _ | Size _ | Fgets _ | IteratorOffset _ | Top) | None -> astate ) | _ -> astate diff --git a/infer/src/bufferoverrun/itv.ml b/infer/src/bufferoverrun/itv.ml index 3e0549120..a7c79239a 100644 --- a/infer/src/bufferoverrun/itv.ml +++ b/infer/src/bufferoverrun/itv.ml @@ -656,6 +656,8 @@ let prune_ge_one : t -> t = bind1 ItvPure.prune_ge_one let prune_comp : Binop.t -> t -> t -> t = fun comp -> bind2 (ItvPure.prune_comp comp) +let prune_lt : t -> t -> t = lift2 ItvPure.prune_lt + let prune_eq : t -> t -> t = bind2 ItvPure.prune_eq let prune_ne : t -> t -> t = bind2 ItvPure.prune_ne diff --git a/infer/src/bufferoverrun/itv.mli b/infer/src/bufferoverrun/itv.mli index 2354769a1..ebb2fab49 100644 --- a/infer/src/bufferoverrun/itv.mli +++ b/infer/src/bufferoverrun/itv.mli @@ -231,6 +231,8 @@ val prune_eq : t -> t -> t val prune_ne : t -> t -> t +val prune_lt : t -> t -> t + val subst : t -> Bound.eval_sym -> t val max_of_ikind : Typ.IntegerWidths.t -> Typ.ikind -> t diff --git a/infer/tests/codetoanalyze/java/bufferoverrun/ArrayListTest.java b/infer/tests/codetoanalyze/java/bufferoverrun/ArrayListTest.java index 37bf151ec..93175250c 100644 --- a/infer/tests/codetoanalyze/java/bufferoverrun/ArrayListTest.java +++ b/infer/tests/codetoanalyze/java/bufferoverrun/ArrayListTest.java @@ -73,4 +73,49 @@ class ArrayListTest { } // a.size should be [0, b.size] int j = b.get(a.size()); } + + void add_in_loop_iterator_ok(ArrayList b) { + ArrayList a = new ArrayList<>(); + for (Integer i : b) { + a.add(i); + } + int j = a.get(b.size() - 1); + } + + void add_in_loop_iterator_bad(ArrayList b) { + ArrayList a = new ArrayList<>(); + for (Integer i : b) { + a.add(i); + } + int j = a.get(b.size() + 1); + } + + void remove_in_loop_iterator_good_FP(ArrayList b) { + ArrayList a = new ArrayList<>(); + for (Integer i : b) { + a.add(i); + } + for (Integer i : b) { + a.remove(i); + } + /* a.size should be 0, but it is analyzed to [-oo, b.size] for now. + - array smashing: It abstracts all members as one abstract value, so cannot precisely analyze + the set of members in the array. + - imprecise remove model: Even with the array smashing, it should have been able to analyze + as [0, b.size], if the semantics of the model was preciser. */ + if (a.size() < 0) { + int j = b.get(b.size()); + } + } + + void remove_in_loop_iterator_bad(ArrayList b) { + ArrayList a = new ArrayList<>(); + for (Integer i : b) { + a.add(i); + } + for (Integer i : b) { + a.remove(i); + } // a.size should be 0 + int j = a.get(0); + } } diff --git a/infer/tests/codetoanalyze/java/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/java/bufferoverrun/issues.exp index 5e069dd4c..df920dfe9 100644 --- a/infer/tests/codetoanalyze/java/bufferoverrun/issues.exp +++ b/infer/tests/codetoanalyze/java/bufferoverrun/issues.exp @@ -10,7 +10,9 @@ codetoanalyze/java/bufferoverrun/Array.java, codetoanalyze.java.bufferoverrun.Ar codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_bad():void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Array access: Offset: 6 Size: 5] codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_by_param2_bad(java.util.ArrayList):void, 7, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Array declaration,Assignment,,Assignment,Array declaration,Array access: Offset: [0, b.length] Size: b.length] codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_by_param_bad(java.util.ArrayList):void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Assignment,Array declaration,Assignment,,Parameter `b.elements[*]`,Assignment,Array declaration,Array access: Offset: b.length + 1 Size: b.length] +codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_iterator_bad(java.util.ArrayList):void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Parameter `b.elements[*]`,Assignment,,Array declaration,Array access: Offset: b.length + 1 Size: b.length] codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.alloc_is_negative_bad():void, 2, INFERBO_ALLOC_IS_NEGATIVE, no_bucket, ERROR, [Allocation: Length: -1] +codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.remove_in_loop_iterator_good_FP(java.util.ArrayList):void, 14, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Parameter `b.elements[*]`,Assignment,,Parameter `b.elements[*]`,Array access: Offset: b.length Size: b.length] codetoanalyze/java/bufferoverrun/ArrayMember.java, codetoanalyze.java.bufferoverrun.ArrayMember.load_array_member_Bad():void, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Parameter `this.buf[*]`,Assignment,,Array declaration,Array access: Offset: [max(10, this.buf[*].lb), min(10, this.buf[*].ub)] Size: 10] codetoanalyze/java/bufferoverrun/CompressedData.java, codetoanalyze.java.bufferoverrun.CompressedData.decompressData(codetoanalyze.java.bufferoverrun.CompressedData$D):int, 9, INTEGER_OVERFLOW_L5, no_bucket, ERROR, [,Parameter `this.yy`,,Parameter `d.cci[*].s`,Assignment,Binary operation: ([0, this.yy - 1] × d.cci[*].s):signed32] codetoanalyze/java/bufferoverrun/External.java, External.external_function_Bad(external.library.SomeExternalClass):void, 1, INTEGER_OVERFLOW_L5, no_bucket, ERROR, [,Assignment,,Assignment,Binary operation: ([-oo, +oo] + [-oo, +oo]):signed32]