From 402f3115ea9c448c9b19b213eed2879b347be2c0 Mon Sep 17 00:00:00 2001
From: Sungkeun Cho <scho@fb.com>
Date: Tue, 24 Sep 2019 02:08:31 -0700
Subject: [PATCH] [cost] Strengthen condition for collecting control variables

Summary:
In the cost checker, the range of selected control variables are used to estimate the number of loop iteration. However, sometimes the ranges of control variables are not related to how many times the loop iteration. This diff strengthens the condition for them as:

1. integers from `size` models
2. integers constructed from `+` or `-`
3. integers constructed from `*`

For the last one, the loop iteration is likely to be log scale of the range of the control variable:

```
while (i < c) {
  i *= 2;
}
```

We will address this in the future.

Reviewed By: ezgicicek

Differential Revision: D17365796

fbshipit-source-id: c1e709ae8
---
 .../src/bufferoverrun/bufferOverrunDomain.ml  | 123 +++++++++++++-----
 .../src/bufferoverrun/bufferOverrunModels.ml  |  11 +-
 .../bufferoverrun/bufferOverrunSemantics.ml   |  16 ++-
 .../java/performance/ArrayListTest.java       |  18 +++
 .../codetoanalyze/java/performance/issues.exp |   1 +
 5 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/infer/src/bufferoverrun/bufferOverrunDomain.ml b/infer/src/bufferoverrun/bufferOverrunDomain.ml
index 181a8ba03..65b3cdfc3 100644
--- a/infer/src/bufferoverrun/bufferOverrunDomain.ml
+++ b/infer/src/bufferoverrun/bufferOverrunDomain.ml
@@ -30,6 +30,39 @@ module ItvThresholds = AbstractDomain.FiniteSet (struct
   let pp = pp_print
 end)
 
+module ItvUpdatedBy = struct
+  type t = Addition | Multiplication | Top
+
+  let ( <= ) ~lhs ~rhs =
+    match (lhs, rhs) with
+    | Addition, _ ->
+        true
+    | _, Addition ->
+        false
+    | Multiplication, _ ->
+        true
+    | _, Multiplication ->
+        false
+    | Top, Top ->
+        true
+
+
+  let join x y = if ( <= ) ~lhs:x ~rhs:y then y else x
+
+  let widen ~prev ~next ~num_iters:_ = join prev next
+
+  let pp fmt = function
+    | Addition ->
+        F.pp_print_string fmt "+"
+    | Multiplication ->
+        F.pp_print_string fmt "*"
+    | Top ->
+        F.pp_print_string fmt "?"
+
+
+  let bottom = Addition
+end
+
 (* ModeledRange represents how many times the interval value can be updated by modeled functions.
    This domain is to support the case where there are mismatches between value of a control variable
    and actual number of loop iterations.  For example,
@@ -55,6 +88,7 @@ module Val = struct
   type t =
     { itv: Itv.t
     ; itv_thresholds: ItvThresholds.t
+    ; itv_updated_by: ItvUpdatedBy.t
     ; modeled_range: ModeledRange.t
     ; sym: Relation.Sym.t
     ; powloc: PowLoc.t
@@ -66,6 +100,7 @@ module Val = struct
   let bot : t =
     { itv= Itv.bot
     ; itv_thresholds= ItvThresholds.empty
+    ; itv_updated_by= ItvUpdatedBy.bottom
     ; modeled_range= ModeledRange.bottom
     ; sym= Relation.Sym.bot
     ; powloc= PowLoc.bot
@@ -80,6 +115,9 @@ module Val = struct
       if Config.bo_debug >= 3 && not (ItvThresholds.is_empty itv_thresholds) then
         F.fprintf fmt " (thresholds:%a)" ItvThresholds.pp itv_thresholds
     in
+    let itv_updated_by_pp fmt itv_updated_by =
+      if Config.bo_debug >= 3 then F.fprintf fmt "(updated by %a)" ItvUpdatedBy.pp itv_updated_by
+    in
     let relation_sym_pp fmt sym =
       if Option.is_some Config.bo_relational_domain then F.fprintf fmt ", %a" Relation.Sym.pp sym
     in
@@ -90,9 +128,10 @@ module Val = struct
     let trace_pp fmt traces =
       if Config.bo_debug >= 1 then F.fprintf fmt ", %a" TraceSet.pp traces
     in
-    F.fprintf fmt "(%a%a%a%a, %a, %a%a%a%a)" Itv.pp x.itv itv_thresholds_pp x.itv_thresholds
-      relation_sym_pp x.sym modeled_range_pp x.modeled_range PowLoc.pp x.powloc ArrayBlk.pp
-      x.arrayblk relation_sym_pp x.offset_sym relation_sym_pp x.size_sym trace_pp x.traces
+    F.fprintf fmt "(%a%a%a%a%a, %a, %a%a%a%a)" Itv.pp x.itv itv_thresholds_pp x.itv_thresholds
+      relation_sym_pp x.sym itv_updated_by_pp x.itv_updated_by modeled_range_pp x.modeled_range
+      PowLoc.pp x.powloc ArrayBlk.pp x.arrayblk relation_sym_pp x.offset_sym relation_sym_pp
+      x.size_sym trace_pp x.traces
 
 
   let unknown_from : callee_pname:_ -> location:_ -> t =
@@ -100,6 +139,7 @@ module Val = struct
     let traces = Trace.(Set.singleton_final location (UnknownFrom callee_pname)) in
     { itv= Itv.top
     ; itv_thresholds= ItvThresholds.empty
+    ; itv_updated_by= ItvUpdatedBy.Top
     ; modeled_range= ModeledRange.bottom
     ; sym= Relation.Sym.top
     ; powloc= PowLoc.unknown
@@ -114,6 +154,7 @@ module Val = struct
     else
       Itv.( <= ) ~lhs:lhs.itv ~rhs:rhs.itv
       && ItvThresholds.( <= ) ~lhs:lhs.itv_thresholds ~rhs:rhs.itv_thresholds
+      && ItvUpdatedBy.( <= ) ~lhs:lhs.itv_updated_by ~rhs:rhs.itv_updated_by
       && ModeledRange.( <= ) ~lhs:lhs.modeled_range ~rhs:rhs.modeled_range
       && Relation.Sym.( <= ) ~lhs:lhs.sym ~rhs:rhs.sym
       && PowLoc.( <= ) ~lhs:lhs.powloc ~rhs:rhs.powloc
@@ -131,6 +172,8 @@ module Val = struct
             ~thresholds:(ItvThresholds.elements itv_thresholds)
             ~prev:prev.itv ~next:next.itv ~num_iters
       ; itv_thresholds
+      ; itv_updated_by=
+          ItvUpdatedBy.widen ~prev:prev.itv_updated_by ~next:next.itv_updated_by ~num_iters
       ; modeled_range=
           ModeledRange.widen ~prev:prev.modeled_range ~next:next.modeled_range ~num_iters
       ; sym= Relation.Sym.widen ~prev:prev.sym ~next:next.sym ~num_iters
@@ -147,6 +190,7 @@ module Val = struct
     else
       { itv= Itv.join x.itv y.itv
       ; itv_thresholds= ItvThresholds.join x.itv_thresholds y.itv_thresholds
+      ; itv_updated_by= ItvUpdatedBy.join x.itv_updated_by y.itv_updated_by
       ; modeled_range= ModeledRange.join x.modeled_range y.modeled_range
       ; sym= Relation.Sym.join x.sym y.sym
       ; powloc= PowLoc.join x.powloc y.powloc
@@ -158,6 +202,8 @@ module Val = struct
 
   let get_itv : t -> Itv.t = fun x -> x.itv
 
+  let get_itv_updated_by : t -> ItvUpdatedBy.t = fun x -> x.itv_updated_by
+
   let get_modeled_range : t -> ModeledRange.t = fun x -> x.modeled_range
 
   let get_sym : t -> Relation.Sym.t = fun x -> x.sym
@@ -223,6 +269,14 @@ module Val = struct
     of_itv (Itv.set_lb_zero (Itv.of_int max_char))
 
 
+  let set_itv_updated_by itv_updated_by x = {x with itv_updated_by}
+
+  let set_itv_updated_by_addition = set_itv_updated_by ItvUpdatedBy.Addition
+
+  let set_itv_updated_by_multiplication = set_itv_updated_by ItvUpdatedBy.Multiplication
+
+  let set_itv_updated_by_unknown = set_itv_updated_by ItvUpdatedBy.Top
+
   let set_modeled_range range x = {x with modeled_range= range}
 
   let unknown_bit : t -> t = fun x -> {x with itv= Itv.top; sym= Relation.Sym.top}
@@ -235,6 +289,7 @@ module Val = struct
    fun f ?f_trace x y ->
     let itv = f x.itv y.itv in
     let itv_thresholds = ItvThresholds.join x.itv_thresholds y.itv_thresholds in
+    let itv_updated_by = ItvUpdatedBy.join x.itv_updated_by y.itv_updated_by in
     let modeled_range = ModeledRange.join x.modeled_range y.modeled_range in
     let traces =
       match f_trace with
@@ -249,7 +304,7 @@ module Val = struct
         | true, true | false, false ->
             TraceSet.join x.traces y.traces )
     in
-    {bot with itv; itv_thresholds; modeled_range; traces}
+    {bot with itv; itv_thresholds; itv_updated_by; modeled_range; traces}
 
 
   let lift_cmp_itv : (Itv.t -> Itv.t -> Boolean.t) -> Boolean.EqualOrder.t -> t -> t -> t =
@@ -490,11 +545,13 @@ module Val = struct
       (if is_java then ", is_java" else "") ;
     match typ.Typ.desc with
     | Tint (IBool | IChar | ISChar | IUChar) ->
-        itv_val ~non_int:(Language.curr_language_is Java)
+        let v = itv_val ~non_int:(Language.curr_language_is Java) in
+        if Language.curr_language_is Java then set_itv_updated_by_unknown v
+        else set_itv_updated_by_addition v
     | Tfloat _ | Tfun _ | TVar _ ->
-        itv_val ~non_int:true
+        itv_val ~non_int:true |> set_itv_updated_by_unknown
     | Tint _ | Tvoid ->
-        itv_val ~non_int:false
+        itv_val ~non_int:false |> set_itv_updated_by_addition
     | Tptr (elt, _) ->
         if is_java || SPath.is_this path then
           let deref_kind =
@@ -665,29 +722,35 @@ module MemPure = struct
     fold
       (fun loc (_, v) acc ->
         match filter_loc loc with
-        | Some loop_head_loc ->
-            let itv = Val.get_itv v in
-            if Itv.has_only_non_int_symbols itv then acc
-            else
-              let range1 =
-                match Val.get_modeled_range v with
-                | NonBottom range ->
-                    Polynomials.NonNegativePolynomial.of_non_negative_bound range
-                | Bottom ->
-                    Itv.range loop_head_loc itv |> Itv.ItvRange.to_top_lifted_polynomial
-              in
-              if Polynomials.NonNegativePolynomial.is_top range1 then
-                L.d_printfln_escaped "Range of %a (loc:%a) became top at %a." Itv.pp itv Loc.pp loc
-                  ProcCfg.Normal.Node.pp_id node_id ;
-              let range = Polynomials.NonNegativePolynomial.mult acc range1 in
-              if
-                (not (Polynomials.NonNegativePolynomial.is_top acc))
-                && Polynomials.NonNegativePolynomial.is_top range
-              then
-                L.d_printfln_escaped "Multiplication of %a and %a (loc:%a) became top at %a."
-                  Polynomials.NonNegativePolynomial.pp acc Polynomials.NonNegativePolynomial.pp
-                  range1 Loc.pp loc ProcCfg.Normal.Node.pp_id node_id ;
-              range
+        | Some loop_head_loc -> (
+            let itv_updated_by = Val.get_itv_updated_by v in
+            match itv_updated_by with
+            | Addition | Multiplication ->
+                (* TODO take range of multiplied one with log scale *)
+                let itv = Val.get_itv v in
+                if Itv.has_only_non_int_symbols itv then acc
+                else
+                  let range1 =
+                    match Val.get_modeled_range v with
+                    | NonBottom range ->
+                        Polynomials.NonNegativePolynomial.of_non_negative_bound range
+                    | Bottom ->
+                        Itv.range loop_head_loc itv |> Itv.ItvRange.to_top_lifted_polynomial
+                  in
+                  if Polynomials.NonNegativePolynomial.is_top range1 then
+                    L.d_printfln_escaped "Range of %a (loc:%a) became top at %a." Itv.pp itv Loc.pp
+                      loc ProcCfg.Normal.Node.pp_id node_id ;
+                  let range = Polynomials.NonNegativePolynomial.mult acc range1 in
+                  if
+                    (not (Polynomials.NonNegativePolynomial.is_top acc))
+                    && Polynomials.NonNegativePolynomial.is_top range
+                  then
+                    L.d_printfln_escaped "Multiplication of %a and %a (loc:%a) became top at %a."
+                      Polynomials.NonNegativePolynomial.pp acc Polynomials.NonNegativePolynomial.pp
+                      range1 Loc.pp loc ProcCfg.Normal.Node.pp_id node_id ;
+                  range
+            | Top ->
+                acc )
         | None ->
             acc )
       mem Polynomials.NonNegativePolynomial.one
diff --git a/infer/src/bufferoverrun/bufferOverrunModels.ml b/infer/src/bufferoverrun/bufferOverrunModels.ml
index 0af4680d6..52fbc4522 100644
--- a/infer/src/bufferoverrun/bufferOverrunModels.ml
+++ b/infer/src/bufferoverrun/bufferOverrunModels.ml
@@ -880,9 +880,11 @@ module Collection = struct
     {exec= change_size_by ~size_f:(Itv.plus zero_one) coll_id; check= no_check}
 
 
+  (* The return value is set by [set_itv_updated_by_addition] in order to be sure that it can be
+     used as a control variable value in the cost checker. *)
   let size coll_exp =
     let exec _ ~ret:(ret_id, _) mem =
-      let result = eval_collection_length coll_exp mem in
+      let result = eval_collection_length coll_exp mem |> Dom.Val.set_itv_updated_by_addition in
       let mem = model_by_value result ret_id mem in
       load_size_alias ret_id (eval_collection_internal_array_locs coll_exp mem) mem
     in
@@ -910,11 +912,16 @@ module Collection = struct
     {exec; check}
 
 
+  (* The return value is set by [set_itv_updated_by_addition] in order to be sure that it can be
+     used as a control variable value in the cost checker. *)
   let hasNext iterator =
     let exec _ ~ret:(ret_id, _) mem =
       (* Set the size of the iterator to be [0, size], so that range
          will be size of the collection. *)
-      let collection_size = eval_collection_length iterator mem |> Dom.Val.get_iterator_itv in
+      let collection_size =
+        eval_collection_length iterator mem
+        |> Dom.Val.get_iterator_itv |> Dom.Val.set_itv_updated_by_addition
+      in
       model_by_value collection_size ret_id mem
     in
     {exec; check= no_check}
diff --git a/infer/src/bufferoverrun/bufferOverrunSemantics.ml b/infer/src/bufferoverrun/bufferOverrunSemantics.ml
index af256928b..dc56b163f 100644
--- a/infer/src/bufferoverrun/bufferOverrunSemantics.ml
+++ b/infer/src/bufferoverrun/bufferOverrunSemantics.ml
@@ -156,8 +156,15 @@ let rec eval : Typ.IntegerWidths.t -> Exp.t -> Mem.t -> Val.t =
         if Mem.is_stack_loc loc mem then Mem.find loc mem else Val.of_loc loc
     | Exp.UnOp (uop, e, _) ->
         eval_unop integer_type_widths uop e mem
-    | Exp.BinOp (bop, e1, e2) ->
-        eval_binop integer_type_widths bop e1 e2 mem
+    | Exp.BinOp (bop, e1, e2) -> (
+        let v = eval_binop integer_type_widths bop e1 e2 mem in
+        match bop with
+        | Binop.(PlusA _ | MinusA _ | MinusPP) ->
+            Val.set_itv_updated_by_addition v
+        | Binop.(Mult _ | Div | Mod | Shiftlt | Shiftrt | BAnd | BXor | BOr) ->
+            Val.set_itv_updated_by_multiplication v
+        | Binop.(PlusPI | MinusPI | Lt | Gt | Le | Ge | Eq | Ne | LAnd | LOr) ->
+            Val.set_itv_updated_by_unknown v )
     | Exp.Const c ->
         eval_const integer_type_widths c
     | Exp.Cast (t, e) ->
@@ -563,7 +570,10 @@ module Prune = struct
     let prune_alias_core ~val_prune ~make_pruning_exp integer_type_widths x e ({mem} as astate) =
       Option.value_map (Mem.find_size_alias x mem) ~default:astate ~f:(fun (lv, java_tmp) ->
           let array_v = Mem.find lv mem in
-          let size = Val.get_array_blk array_v |> ArrayBlk.sizeof |> Val.of_itv in
+          let size =
+            Val.get_array_blk array_v |> ArrayBlk.sizeof |> Val.of_itv
+            |> Val.set_itv_updated_by_addition
+          in
           let rhs = eval integer_type_widths e mem in
           let size' = val_prune size rhs in
           let array_v' = Val.set_array_length Location.dummy ~length:size' array_v in
diff --git a/infer/tests/codetoanalyze/java/performance/ArrayListTest.java b/infer/tests/codetoanalyze/java/performance/ArrayListTest.java
index 15b2c2295..08f853f04 100644
--- a/infer/tests/codetoanalyze/java/performance/ArrayListTest.java
+++ b/infer/tests/codetoanalyze/java/performance/ArrayListTest.java
@@ -280,6 +280,24 @@ public class ArrayListTest {
       a.remove(0);
     }
   }
+
+  class Elt {
+    boolean b;
+
+    public boolean get_boolean() {
+      return b;
+    }
+  }
+
+  ArrayList<Elt> arr = new ArrayList();
+
+  void boolean_control_var_linear() {
+    for (int i = 0; i < arr.size(); i++) {
+      if (!arr.get(i).get_boolean()) {
+        break;
+      }
+    }
+  }
 }
 
 class LexicographicComparator implements java.util.Comparator<Person> {
diff --git a/infer/tests/codetoanalyze/java/performance/issues.exp b/infer/tests/codetoanalyze/java/performance/issues.exp
index cdd8f17a4..8c76256e3 100644
--- a/infer/tests/codetoanalyze/java/performance/issues.exp
+++ b/infer/tests/codetoanalyze/java/performance/issues.exp
@@ -31,6 +31,7 @@ codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.arraylist_remov
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.arraylist_remove_overrun_bad():void, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Length trace>,Array declaration,Through,Array access: Offset: 1 Size: 1]
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.arraylist_set_overrun_bad():void, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Length trace>,Array declaration,Through,Array access: Offset: 1 Size: 1]
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.arraylist_set_underrun_bad():void, 2, BUFFER_OVERRUN_L1, no_bucket, ERROR, [<Length trace>,Array declaration,Array access: Offset: 0 Size: 0]
+codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.boolean_control_var_linear():void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 2 + 19 ⋅ this.arr.length + 4 ⋅ (this.arr.length + 1), O(this.arr.length), degree = 1,{this.arr.length + 1},Loop at line 295,{this.arr.length},Loop at line 295]
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.call_sortArrayList(java.util.ArrayList):void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 4 + list.length × log(list.length), O(list.length × log(list.length)), degree = 1 + 1⋅log,{list.length},call to void ArrayListTest.sortArrayList(ArrayList),Modeled call to Collections.sort,{list.length},call to void ArrayListTest.sortArrayList(ArrayList),Modeled call to Collections.sort]
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.constructor_add_all(java.util.ArrayList,java.util.ArrayList):void, 4, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 10 + 5 ⋅ (l.length + list.length) + 3 ⋅ (l.length + list.length + 1), O((l.length + list.length)), degree = 1,{l.length + list.length + 1},Loop at line 245,{l.length + list.length},Loop at line 245]
 codetoanalyze/java/performance/ArrayListTest.java, ArrayListTest.constructor_add_all_sym(java.util.ArrayList,java.util.ArrayList):void, 4, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 10 + 5 ⋅ (l.length + list.length) + 3 ⋅ (l.length + list.length + 1), O((l.length + list.length)), degree = 1,{l.length + list.length + 1},Loop at line 252,{l.length + list.length},Loop at line 252]