[inferbo] Extend alias for collection iteration loop

Summary:
This diff extends the alias domain to analyze loop with list comprehensions form in Java precisely.

```
list2 = new List();
for (Element e : list1) {
    list2.add(e);
}
```

1. `IteratorOffset` is a relation between a iterator offset and a length of another array.  For example, in the above example, after n-times of iterations, the offset of the iterator (if it exists) and the length of `list2` are the same as `n`.

2. `IteratorHasNext` is a relation between iterator and its `hasNext` result.

3. At the conditional nodes, it prunes the alias list length of `list2` by that of `list1`.

* if `hasNext(list1's iterator)` is true, `list2`'s length is pruned by `< list1's length`
* if `hasNext(list1's iterator)` is false, `list2`'s length is pruned by `= list1's length`

Reviewed By: ezgicicek

Differential Revision: D17667128

fbshipit-source-id: 41fb23a45
master
Sungkeun Cho 5 years ago committed by Facebook Github Bot
parent ec62fbefb2
commit 1468dcc1d9

@ -411,6 +411,10 @@ module Val = struct
let prune_ge_one : t -> t = lift_prune1 Itv.prune_ge_one
let prune_length_lt : t -> Itv.t -> t =
fun x y -> lift_prune_length1 (fun x -> Itv.prune_lt x y) x
let prune_length_eq : t -> Itv.t -> t =
fun x y -> lift_prune_length1 (fun x -> Itv.prune_eq x y) x
@ -833,27 +837,36 @@ module AliasTarget = struct
(* Relations between values of logical variables(registers) and program variables
"Simple relation": Since Sil distinguishes logical and program variables, we need a relation for
pruning values of program variables. For example, a C statement "if(x){...}" is translated to
"%r=load(x); if(%r){...}" in Sil. At the load statement, we record the alias between the
values of %r and x, then we can prune not only the value of %r, but also that of x inside the
if branch. The java_tmp field is an additional slot for keeping one more alias of temporary
variable in Java. The i field is to express "%r=load(x)+i".
pruning values of program variables. For example, a C statement [if(x){...}] is translated to
[%r=load(x); if(%r){...}] in Sil. At the load statement, we record the alias between the
values of [%r] and [x], then we can prune not only the value of [%r], but also that of [x]
inside the if branch. The [java_tmp] field is an additional slot for keeping one more alias of
temporary variable in Java. The [i] field is to express [%r=load(x)+i].
"Empty relation": For pruning vector.length with vector::empty() results, we adopt a specific
relation between %r and v->elements, where %r=v.empty(). So, if %r!=0, v's array length
([v->elements->length]) is pruned by 0. On the other hand, if %r==0, v's array length is
pruned by >=1.
"Empty relation": For pruning [vector.length] with [vector::empty()] results, we adopt a specific
relation between [%r] and [v->elements], where [%r=v.empty()]. So, if [%r!=0], [v]'s array
length ([v->elements->length]) is pruned by [=0]. On the other hand, if [%r==0], [v]'s array
length is pruned by [>=1].
"Size relation": This is for pruning vector's length. When there is a function call,
%r=x.size(), the alias target for %r becomes AliasTarget.size {l=x.elements}. The java_tmp
field is an additional slot for keeping one more alias of temporary variable in Java. The i
field is to express "%r=x.size()+i", which is required to follow the semantics of `Array.add`
inside loops precisely. *)
[%r=x.size()], the alias target for [%r] becomes [AliasTarget.size {l=x.elements}]. The
[java_tmp] field is an additional slot for keeping one more alias of temporary variable in
Java. The [i] field is to express [%r=x.size()+i], which is required to follow the semantics
of [Array.add] inside loops precisely.
"Iterator offset relation": This is for tracking a relation between an iterator offset and a
length of array. If [%r] has an alias to [IteratorOffset {l; i}], which means that [%r's
iterator offset] is same to [length(l)+i].
"HasNext relation": This is for tracking return values of the [hasNext] function. If [%r] has an
alias to [HasNext {l}], which means that [%r] is a [hasNext] results of the iterator [l]. *)
type t =
| Simple of {l: Loc.t; i: IntLit.t; java_tmp: Loc.t option}
| Empty of Loc.t
| Size of {alias_typ: alias_typ; l: Loc.t; i: IntLit.t; java_tmp: Loc.t option}
| Fgets of Loc.t
| IteratorOffset of {l: Loc.t; i: IntLit.t; java_tmp: Loc.t option}
| IteratorHasNext of {l: Loc.t; java_tmp: Loc.t option}
| Top
[@@deriving compare]
@ -876,6 +889,11 @@ module AliasTarget = struct
Loc.pp l pp_intlit i
| Fgets l ->
F.fprintf fmt "%t=fgets(%a)" pp_key Loc.pp l
| IteratorOffset {l; i; java_tmp} ->
F.fprintf fmt "iterator offset(%t%a)=length(%a)%a" pp_key pp_java_tmp java_tmp Loc.pp l
pp_intlit i
| IteratorHasNext {l; java_tmp} ->
F.fprintf fmt "%t%a=hasNext(%a)" pp_key pp_java_tmp java_tmp Loc.pp l
| Top ->
F.fprintf fmt "%t=?" pp_key
@ -885,9 +903,17 @@ module AliasTarget = struct
let fgets l = Fgets l
let get_locs = function
| Simple {l; java_tmp= Some tmp} | Size {l; java_tmp= Some tmp} ->
| Simple {l; java_tmp= Some tmp}
| Size {l; java_tmp= Some tmp}
| IteratorOffset {l; java_tmp= Some tmp}
| IteratorHasNext {l; java_tmp= Some tmp} ->
PowLoc.singleton l |> PowLoc.add tmp
| Simple {l; java_tmp= None} | Size {l; java_tmp= None} | Empty l | Fgets l ->
| Simple {l; java_tmp= None}
| Size {l; java_tmp= None}
| Empty l
| Fgets l
| IteratorOffset {l; java_tmp= None}
| IteratorHasNext {l; java_tmp= None} ->
PowLoc.singleton l
| Top ->
PowLoc.empty
@ -907,6 +933,12 @@ module AliasTarget = struct
Option.map (f l) ~f:(fun l -> Size {alias_typ; l; i; java_tmp})
| Fgets l ->
Option.map (f l) ~f:(fun l -> Fgets l)
| IteratorOffset {l; i; java_tmp} ->
let java_tmp = Option.bind java_tmp ~f in
Option.map (f l) ~f:(fun l -> IteratorOffset {l; i; java_tmp})
| IteratorHasNext {l; java_tmp} ->
let java_tmp = Option.bind java_tmp ~f in
Option.map (f l) ~f:(fun l -> IteratorHasNext {l; java_tmp})
| Top ->
Some Top
@ -956,6 +988,8 @@ module AliasTarget = struct
match x with
| Size {alias_typ; l; i} when Loc.equal l loc ->
Size {alias_typ; l; i= IntLit.(add i minus_one); java_tmp= None}
| IteratorOffset {l; i; java_tmp} when Loc.equal l loc ->
IteratorOffset {l; i= IntLit.(add i minus_one); java_tmp}
| _ ->
x
end
@ -1009,6 +1043,10 @@ module AliasMap = struct
match find_opt (Key.LocKey loc) x with
| Some (AliasTarget.Size a) ->
Some (AliasTarget.Size {a with java_tmp= Some loc})
| Some (AliasTarget.IteratorOffset a) ->
Some (AliasTarget.IteratorOffset {a with java_tmp= Some loc})
| Some (AliasTarget.IteratorHasNext a) ->
Some (AliasTarget.IteratorHasNext {a with java_tmp= Some loc})
| _ as alias ->
alias
@ -1061,6 +1099,30 @@ module AliasMap = struct
add (Key.of_loc loc) (AliasTarget.Size {alias_typ; l; i= IntLit.add i n; java_tmp= None}) x
| _ ->
x
let add_iterator_offset_alias id arr x =
add (Key.of_id id) (AliasTarget.IteratorOffset {l= arr; i= IntLit.zero; java_tmp= None}) x
let incr_iterator_offset_alias id x =
match find_opt (Key.of_id id) x with
| Some (AliasTarget.IteratorOffset ({i; java_tmp} as tgt)) ->
let i = IntLit.(add i one) in
let x = add (Key.of_id id) (AliasTarget.IteratorOffset {tgt with i}) x in
Option.value_map java_tmp ~default:x ~f:(fun java_tmp ->
add (Key.of_loc java_tmp) (AliasTarget.IteratorOffset {tgt with i; java_tmp= None}) x
)
| _ ->
x
let add_iterator_has_next_alias ~ret_id ~iterator x =
match find_opt (Key.of_id iterator) x with
| Some (AliasTarget.IteratorOffset {java_tmp= Some java_tmp}) ->
add (Key.of_id ret_id) (AliasTarget.IteratorHasNext {l= java_tmp; java_tmp= None}) x
| _ ->
x
end
module AliasRet = struct
@ -1102,6 +1164,8 @@ module Alias = struct
let find_id : Ident.t -> t -> AliasTarget.t option = fun x -> bind_map (AliasMap.find_id x)
let find_loc : Loc.t -> t -> AliasTarget.t option = fun x -> bind_map (AliasMap.find_loc x)
let find_ret : t -> AliasTarget.t option = fun x -> AliasRet.get x.ret
let load : Ident.t -> AliasTarget.t -> t -> t = fun id loc -> lift_map (AliasMap.load id loc)
@ -1159,6 +1223,28 @@ module Alias = struct
a
let add_iterator_offset_alias : Ident.t -> PowLoc.t -> t -> t =
fun id arr_locs a ->
match PowLoc.is_singleton_or_more arr_locs with
| IContainer.Singleton arr_loc ->
lift_map (AliasMap.add_iterator_offset_alias id arr_loc) a
| More ->
(* NOTE: Keeping only one alias here is suboptimal, but current alias domain can keep one
alias for each ident, which will be extended later. *)
let arr_loc = PowLoc.min_elt arr_locs in
lift_map (AliasMap.add_iterator_offset_alias id arr_loc) a
| Empty ->
a
let incr_iterator_offset_alias : Ident.t -> t -> t =
fun id a -> lift_map (AliasMap.incr_iterator_offset_alias id) a
let add_iterator_has_next_alias : ret_id:Ident.t -> iterator:Ident.t -> t -> t =
fun ~ret_id ~iterator a -> lift_map (AliasMap.add_iterator_has_next_alias ~ret_id ~iterator) a
let remove_temp : Ident.t -> t -> t =
fun temp -> lift_map (AliasMap.remove (AliasMap.Key.of_id temp))
end
@ -1646,12 +1732,14 @@ module MemReach = struct
let find_alias_id : Ident.t -> _ t0 -> AliasTarget.t option = fun k m -> Alias.find_id k m.alias
let find_alias_loc : Loc.t -> _ t0 -> AliasTarget.t option = fun k m -> Alias.find_loc k m.alias
let find_simple_alias : Ident.t -> _ t0 -> (Loc.t * IntLit.t option) option =
fun k m ->
match Alias.find_id k m.alias with
| Some (AliasTarget.Simple {l; i}) ->
Some (l, if IntLit.iszero i then None else Some i)
| Some (AliasTarget.Empty _ | AliasTarget.Size _ | AliasTarget.Fgets _ | AliasTarget.Top)
| Some AliasTarget.(Empty _ | Size _ | Fgets _ | IteratorOffset _ | IteratorHasNext _ | Top)
| None ->
None
@ -1692,6 +1780,20 @@ module MemReach = struct
let incr_size_alias locs m = {m with alias= Alias.incr_size_alias locs m.alias}
let add_iterator_offset_alias id m =
let arr_locs =
let add_arr l v acc = if Itv.is_zero (Val.array_sizeof v) then PowLoc.add l acc else acc in
MemPure.fold add_arr m.mem_pure PowLoc.empty
in
{m with alias= Alias.add_iterator_offset_alias id arr_locs m.alias}
let incr_iterator_offset_alias id m = {m with alias= Alias.incr_iterator_offset_alias id m.alias}
let add_iterator_has_next_alias ~ret_id ~iterator m =
{m with alias= Alias.add_iterator_has_next_alias ~ret_id ~iterator m.alias}
let add_stack_loc : Loc.t -> t -> t = fun k m -> {m with stack_locs= StackLocs.add k m.stack_locs}
let add_stack : ?represents_multiple_values:bool -> Loc.t -> Val.t -> t -> t =
@ -2032,6 +2134,10 @@ module Mem = struct
fun k -> f_lift_default ~default:None (MemReach.find_alias_id k)
let find_alias_loc : Loc.t -> _ t0 -> AliasTarget.t option =
fun k -> f_lift_default ~default:None (MemReach.find_alias_loc k)
let find_simple_alias : Ident.t -> _ t0 -> (Loc.t * IntLit.t option) option =
fun k -> f_lift_default ~default:None (MemReach.find_simple_alias k)
@ -2071,6 +2177,24 @@ module Mem = struct
let incr_size_alias locs = map ~f:(MemReach.incr_size_alias locs)
let add_iterator_offset_alias : Ident.t -> t -> t =
fun id -> map ~f:(MemReach.add_iterator_offset_alias id)
let incr_iterator_offset_alias : Exp.t -> t -> t =
fun iterator m ->
match iterator with Exp.Var id -> map ~f:(MemReach.incr_iterator_offset_alias id) m | _ -> m
let add_iterator_has_next_alias : Ident.t -> Exp.t -> t -> t =
fun ret_id iterator m ->
match iterator with
| Exp.Var iterator ->
map ~f:(MemReach.add_iterator_has_next_alias ~ret_id ~iterator) m
| _ ->
m
let add_stack_loc : Loc.t -> t -> t = fun k -> map ~f:(MemReach.add_stack_loc k)
let add_stack : ?represents_multiple_values:bool -> Loc.t -> Val.t -> t -> t =

@ -889,7 +889,7 @@ module Collection = struct
let iterator coll_exp =
let exec {integer_type_widths} ~ret:(ret_id, _) mem =
let itr = Sem.eval integer_type_widths coll_exp mem in
model_by_value itr ret_id mem
model_by_value itr ret_id mem |> Dom.Mem.add_iterator_offset_alias ret_id
in
{exec; check= no_check}
@ -918,6 +918,7 @@ module Collection = struct
|> Dom.Val.get_iterator_itv |> Dom.Val.set_itv_updated_by_addition
in
model_by_value collection_size ret_id mem
|> Dom.Mem.add_iterator_has_next_alias ret_id iterator
in
{exec; check= no_check}
@ -927,6 +928,7 @@ module Collection = struct
let traces = Sem.eval integer_type_widths iterator mem |> Dom.Val.get_traces in
let locs = eval_collection_internal_array_locs iterator mem in
model_by_value (Dom.Val.of_pow_loc ~traces locs) id mem
|> Dom.Mem.incr_iterator_offset_alias iterator
in
{exec; check= no_check}

@ -317,11 +317,8 @@ let rec eval_arr : Typ.IntegerWidths.t -> Exp.t -> Mem.t -> Val.t =
| Some (AliasTarget.Simple {l= loc; i}) when IntLit.iszero i ->
Mem.find loc mem
| Some
( AliasTarget.Simple _
| AliasTarget.Size _
| AliasTarget.Empty _
| AliasTarget.Fgets _
| Top )
AliasTarget.(
Simple _ | Size _ | Empty _ | Fgets _ | IteratorOffset _ | IteratorHasNext _ | Top)
| None ->
Val.bot )
| Exp.Lvar pvar ->
@ -514,12 +511,31 @@ let eval_array_locs_length arr_locs mem =
module Prune = struct
type t = {prune_pairs: PrunePairs.t; mem: Mem.t}
let collection_length_of_iterator loc mem =
let arr_locs =
Mem.find loc mem |> Val.get_all_locs
|> PowLoc.append_field ~fn:BufferOverrunField.java_collection_internal_array
in
eval_array_locs_length arr_locs mem
let update_mem_in_prune lv v ?(pruning_exp = PruningExp.Unknown) {prune_pairs; mem} =
let prune_pairs = PrunePairs.add lv (PrunedVal.make v pruning_exp) prune_pairs in
let mem = Mem.update_mem (PowLoc.singleton lv) v mem in
{prune_pairs; mem}
let prune_has_next ~true_branch iterator ({mem} as astate) =
match Mem.find_alias_loc iterator mem with
| Some (IteratorOffset {l= arr_loc; i}) when IntLit.(eq i zero) ->
let length = collection_length_of_iterator iterator mem |> Val.get_itv in
let v = Mem.find arr_loc mem in
let v' = (if true_branch then Val.prune_length_lt else Val.prune_length_eq) v length in
update_mem_in_prune arr_loc v' astate
| _ ->
astate
let prune_unop : Exp.t -> t -> t =
fun e ({mem} as astate) ->
match e with
@ -537,7 +553,9 @@ module Prune = struct
let strlen_loc = Loc.of_c_strlen lv in
let v' = Val.prune_ge_one (Mem.find strlen_loc mem) in
update_mem_in_prune strlen_loc v' astate
| Some (AliasTarget.Simple _ | AliasTarget.Size _ | Top) | None ->
| Some (IteratorHasNext {l= iterator}) ->
prune_has_next ~true_branch:true iterator astate
| Some AliasTarget.(Simple _ | Size _ | IteratorOffset _ | Top) | None ->
astate )
| Exp.UnOp (Unop.LNot, Exp.Var x, _) -> (
match Mem.find_alias_id x mem with
@ -549,7 +567,9 @@ module Prune = struct
let v = Mem.find lv mem in
let v' = Val.prune_length_ge_one v in
update_mem_in_prune lv v' astate
| Some (AliasTarget.Simple _ | AliasTarget.Size _ | AliasTarget.Fgets _ | Top) | None ->
| Some (IteratorHasNext {l= iterator}) ->
prune_has_next ~true_branch:false iterator astate
| Some AliasTarget.(Simple _ | Size _ | Fgets _ | IteratorOffset _ | Top) | None ->
astate )
| _ ->
astate

@ -656,6 +656,8 @@ let prune_ge_one : t -> t = bind1 ItvPure.prune_ge_one
let prune_comp : Binop.t -> t -> t -> t = fun comp -> bind2 (ItvPure.prune_comp comp)
let prune_lt : t -> t -> t = lift2 ItvPure.prune_lt
let prune_eq : t -> t -> t = bind2 ItvPure.prune_eq
let prune_ne : t -> t -> t = bind2 ItvPure.prune_ne

@ -231,6 +231,8 @@ val prune_eq : t -> t -> t
val prune_ne : t -> t -> t
val prune_lt : t -> t -> t
val subst : t -> Bound.eval_sym -> t
val max_of_ikind : Typ.IntegerWidths.t -> Typ.ikind -> t

@ -73,4 +73,49 @@ class ArrayListTest {
} // a.size should be [0, b.size]
int j = b.get(a.size());
}
void add_in_loop_iterator_ok(ArrayList<Integer> b) {
ArrayList<Integer> a = new ArrayList<>();
for (Integer i : b) {
a.add(i);
}
int j = a.get(b.size() - 1);
}
void add_in_loop_iterator_bad(ArrayList<Integer> b) {
ArrayList<Integer> a = new ArrayList<>();
for (Integer i : b) {
a.add(i);
}
int j = a.get(b.size() + 1);
}
void remove_in_loop_iterator_good_FP(ArrayList<Integer> b) {
ArrayList<Integer> a = new ArrayList<>();
for (Integer i : b) {
a.add(i);
}
for (Integer i : b) {
a.remove(i);
}
/* a.size should be 0, but it is analyzed to [-oo, b.size] for now.
- array smashing: It abstracts all members as one abstract value, so cannot precisely analyze
the set of members in the array.
- imprecise remove model: Even with the array smashing, it should have been able to analyze
as [0, b.size], if the semantics of the model was preciser. */
if (a.size() < 0) {
int j = b.get(b.size());
}
}
void remove_in_loop_iterator_bad(ArrayList<Integer> b) {
ArrayList<Integer> a = new ArrayList<>();
for (Integer i : b) {
a.add(i);
}
for (Integer i : b) {
a.remove(i);
} // a.size should be 0
int j = a.get(0);
}
}

@ -10,7 +10,9 @@ codetoanalyze/java/bufferoverrun/Array.java, codetoanalyze.java.bufferoverrun.Ar
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_bad():void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Length trace>,Array declaration,Array access: Offset: 6 Size: 5]
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_by_param2_bad(java.util.ArrayList):void, 7, BUFFER_OVERRUN_L2, no_bucket, ERROR, [<Offset trace>,Array declaration,Assignment,<Length trace>,Assignment,Array declaration,Array access: Offset: [0, b.length] Size: b.length]
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_by_param_bad(java.util.ArrayList):void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Offset trace>,Assignment,Array declaration,Assignment,<Length trace>,Parameter `b.elements[*]`,Assignment,Array declaration,Array access: Offset: b.length + 1 Size: b.length]
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.add_in_loop_iterator_bad(java.util.ArrayList):void, 5, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Offset trace>,Parameter `b.elements[*]`,Assignment,<Length trace>,Array declaration,Array access: Offset: b.length + 1 Size: b.length]
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.alloc_is_negative_bad():void, 2, INFERBO_ALLOC_IS_NEGATIVE, no_bucket, ERROR, [Allocation: Length: -1]
codetoanalyze/java/bufferoverrun/ArrayListTest.java, ArrayListTest.remove_in_loop_iterator_good_FP(java.util.ArrayList):void, 14, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Offset trace>,Parameter `b.elements[*]`,Assignment,<Length trace>,Parameter `b.elements[*]`,Array access: Offset: b.length Size: b.length]
codetoanalyze/java/bufferoverrun/ArrayMember.java, codetoanalyze.java.bufferoverrun.ArrayMember.load_array_member_Bad():void, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Offset trace>,Parameter `this.buf[*]`,Assignment,<Length trace>,Array declaration,Array access: Offset: [max(10, this.buf[*].lb), min(10, this.buf[*].ub)] Size: 10]
codetoanalyze/java/bufferoverrun/CompressedData.java, codetoanalyze.java.bufferoverrun.CompressedData.decompressData(codetoanalyze.java.bufferoverrun.CompressedData$D):int, 9, INTEGER_OVERFLOW_L5, no_bucket, ERROR, [<LHS trace>,Parameter `this.yy`,<RHS trace>,Parameter `d.cci[*].s`,Assignment,Binary operation: ([0, this.yy - 1] × d.cci[*].s):signed32]
codetoanalyze/java/bufferoverrun/External.java, External.external_function_Bad(external.library.SomeExternalClass):void, 1, INTEGER_OVERFLOW_L5, no_bucket, ERROR, [<LHS trace>,Assignment,<RHS trace>,Assignment,Binary operation: ([-oo, +oo] + [-oo, +oo]):signed32]

Loading…
Cancel
Save