From 738a751d175b381cb0e9b39caf7b3f3a71a8e1c9 Mon Sep 17 00:00:00 2001
From: Sungkeun Cho <scho@fb.com>
Date: Thu, 26 Sep 2019 04:03:39 -0700
Subject: [PATCH] [cost] Add eval mode for cost substitution

Summary:
This diff adds an eval mode for the substitutions of the cost results, in order to avoid precision
loss by joining two symbols.

The usual join of two different symbolic values, `s1` and `s2`, becomes top due to the limitation of
our domain.  On the other hand, in the new eval mode, it returns an upperbound `s1+s2`, because the
cost values only care about the upperbounds.

Reviewed By: ezgicicek

Differential Revision: D17573400

fbshipit-source-id: 2c84743d5
---
 infer/src/bufferoverrun/arrayBlk.ml           | 10 +++---
 .../bufferoverrun/bufferOverrunSemantics.ml   | 31 +++++++++++++------
 infer/src/checkers/cost.ml                    |  3 +-
 infer/src/checkers/cost.mli                   |  2 +-
 .../java/performance/ListTest.java            | 24 ++++++++++++++
 .../codetoanalyze/java/performance/issues.exp |  8 +++--
 6 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/infer/src/bufferoverrun/arrayBlk.ml b/infer/src/bufferoverrun/arrayBlk.ml
index a0566b64d..6ec345279 100644
--- a/infer/src/bufferoverrun/arrayBlk.ml
+++ b/infer/src/bufferoverrun/arrayBlk.ml
@@ -290,13 +290,15 @@ let make_java : Allocsite.t -> length:Itv.t -> t =
  fun a ~length -> singleton a (ArrInfo.make_java ~length)
 
 
-let join_itv : f:(ArrInfo.t -> Itv.t) -> t -> Itv.t =
- fun ~f a -> fold (fun _ arr -> Itv.join (f arr)) a Itv.bot
+let join_itv : cost_mode:bool -> f:(ArrInfo.t -> Itv.t) -> t -> Itv.t =
+ fun ~cost_mode ~f a ->
+  let join, init = if cost_mode then (Itv.plus, Itv.zero) else (Itv.join, Itv.bot) in
+  fold (fun _ arr -> join (f arr)) a init
 
 
-let offsetof = join_itv ~f:ArrInfo.offsetof
+let offsetof ?(cost_mode = false) = join_itv ~cost_mode ~f:ArrInfo.offsetof
 
-let sizeof = join_itv ~f:ArrInfo.sizeof
+let sizeof ?(cost_mode = false) = join_itv ~cost_mode ~f:ArrInfo.sizeof
 
 let plus_offset : t -> Itv.t -> t = fun arr i -> map (fun a -> ArrInfo.plus_offset a i) arr
 
diff --git a/infer/src/bufferoverrun/bufferOverrunSemantics.ml b/infer/src/bufferoverrun/bufferOverrunSemantics.ml
index dc56b163f..a8dc554b8 100644
--- a/infer/src/bufferoverrun/bufferOverrunSemantics.ml
+++ b/infer/src/bufferoverrun/bufferOverrunSemantics.ml
@@ -382,12 +382,20 @@ end
 
    EvalPOReachability: This is similar to [EvalPOCond], but it returns the bottom location, instead
    of the unknown location, when a location to substitute is not found.  This is used when
-   substituting reachabilities of proof obligations. *)
-type eval_mode = EvalNormal | EvalPOCond | EvalPOReachability
+   substituting reachabilities of proof obligations.
+
+   EvalCost: This is similar to [EvalNormal], but it is designed to be used in substitutions of
+   the cost results, avoiding precision loss by joining of symbolic values.  Normal join of two
+   different symbolic values, [s1] and [s2], becomes top due to the limitation of our domain.  On
+   the other hand, in this mode, it returns an upperbound [s1+s2] for the case, because the cost
+   values only care about the upperbounds. *)
+type eval_mode = EvalNormal | EvalPOCond | EvalPOReachability | EvalCost
+
+let is_cost_mode = function EvalCost -> true | _ -> false
 
 let eval_sympath_modeled_partial ~mode p =
   match (mode, p) with
-  | EvalNormal, Symb.SymbolPath.Callsite _ ->
+  | (EvalNormal | EvalCost), Symb.SymbolPath.Callsite _ ->
       Itv.of_modeled_path p |> Val.of_itv
   | _, _ ->
       (* We only have modeled modeled function calls created in costModels. *)
@@ -403,7 +411,7 @@ let rec eval_sympath_partial ~mode params p mem =
       Val.Itv.top )
   | Symb.SymbolPath.Callsite {cs} -> (
     match mode with
-    | EvalNormal ->
+    | EvalNormal | EvalCost ->
         L.d_printfln_escaped "Symbol for %a is not expected to be in parameters." Typ.Procname.pp
           (CallSite.pname cs) ;
         Mem.find (Loc.of_allocsite (Allocsite.make_symbol p)) mem
@@ -434,7 +442,7 @@ and eval_locpath ~mode params p mem =
     match mode with
     | EvalPOReachability ->
         res
-    | EvalNormal | EvalPOCond ->
+    | EvalNormal | EvalPOCond | EvalCost ->
         L.d_printfln_escaped "Location value for %a is not found." Symb.SymbolPath.pp_partial p ;
         PowLoc.unknown )
   else res
@@ -450,13 +458,14 @@ let eval_sympath ~mode params sympath mem =
       (Val.get_itv v, Val.get_traces v)
   | Symb.SymbolPath.Offset {p} ->
       let v = eval_sympath_partial ~mode params p mem in
-      (ArrayBlk.offsetof (Val.get_array_blk v), Val.get_traces v)
+      (ArrayBlk.offsetof ~cost_mode:(is_cost_mode mode) (Val.get_array_blk v), Val.get_traces v)
   | Symb.SymbolPath.Length {p} ->
       let v = eval_sympath_partial ~mode params p mem in
-      (ArrayBlk.sizeof (Val.get_array_blk v), Val.get_traces v)
+      (ArrayBlk.sizeof ~cost_mode:(is_cost_mode mode) (Val.get_array_blk v), Val.get_traces v)
 
 
-let mk_eval_sym_trace integer_type_widths callee_formals actual_exps caller_mem =
+let mk_eval_sym_trace integer_type_widths (callee_formals : (Pvar.t * Typ.t) list)
+    (actual_exps : (Exp.t * Typ.t) list) caller_mem =
   let params =
     let actuals = List.map ~f:(fun (a, _) -> eval integer_type_widths a caller_mem) actual_exps in
     ParamBindings.make callee_formals actuals
@@ -476,13 +485,15 @@ let mk_eval_sym_trace integer_type_widths callee_formals actual_exps caller_mem
   fun ~mode -> {eval_sym= eval_sym ~mode; trace_of_sym; eval_locpath= eval_locpath ~mode}
 
 
-let mk_eval_sym integer_type_widths callee_pdesc actual_exps caller_mem =
+let mk_eval_sym_mode ~mode integer_type_widths callee_formals actual_exps caller_mem =
   let eval_sym_trace =
-    mk_eval_sym_trace integer_type_widths callee_pdesc actual_exps caller_mem ~mode:EvalNormal
+    mk_eval_sym_trace integer_type_widths callee_formals actual_exps caller_mem ~mode
   in
   eval_sym_trace.eval_sym
 
 
+let mk_eval_sym_cost = mk_eval_sym_mode ~mode:EvalCost
+
 let get_sym_f integer_type_widths mem e = Val.get_sym (eval integer_type_widths e mem)
 
 let get_offset_sym_f integer_type_widths mem e =
diff --git a/infer/src/checkers/cost.ml b/infer/src/checkers/cost.ml
index 963d58f76..a778aa6e8 100644
--- a/infer/src/checkers/cost.ml
+++ b/infer/src/checkers/cost.ml
@@ -520,7 +520,8 @@ type extras_WorstCaseCost =
 let instantiate_cost integer_type_widths ~inferbo_caller_mem ~callee_pname ~callee_formals ~params
     ~callee_cost ~loc =
   let eval_sym =
-    BufferOverrunSemantics.mk_eval_sym integer_type_widths callee_formals params inferbo_caller_mem
+    BufferOverrunSemantics.mk_eval_sym_cost integer_type_widths callee_formals params
+      inferbo_caller_mem
   in
   BasicCost.subst callee_pname loc callee_cost eval_sym
 
diff --git a/infer/src/checkers/cost.mli b/infer/src/checkers/cost.mli
index 476ca8b23..191a4285a 100644
--- a/infer/src/checkers/cost.mli
+++ b/infer/src/checkers/cost.mli
@@ -14,7 +14,7 @@ val instantiate_cost :
   -> inferbo_caller_mem:BufferOverrunDomain.Mem.t
   -> callee_pname:Typ.Procname.t
   -> callee_formals:(Pvar.t * Typ.t) list
-  -> params:(Exp.t * 'a) list
+  -> params:(Exp.t * Typ.t) list
   -> callee_cost:CostDomain.BasicCost.t
   -> loc:Location.t
   -> CostDomain.BasicCost.t
diff --git a/infer/tests/codetoanalyze/java/performance/ListTest.java b/infer/tests/codetoanalyze/java/performance/ListTest.java
index e5af7d442..95c354fc3 100644
--- a/infer/tests/codetoanalyze/java/performance/ListTest.java
+++ b/infer/tests/codetoanalyze/java/performance/ListTest.java
@@ -6,6 +6,7 @@
  */
 import com.google.common.base.Objects;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.ListIterator;
 
@@ -39,4 +40,27 @@ class ListTest {
     List<String> list = Arrays.asList(array);
     for (String el : list) {}
   }
+
+  boolean unknown_bool;
+
+  List<Integer> two_lists(List<Integer> l1, List<Integer> l2) {
+    List<Integer> l;
+    if (unknown_bool) {
+      l = l1;
+    } else {
+      l = l2;
+    }
+    return l;
+  }
+
+  void iterate_elements_linear(List<Integer> l) {
+    Iterator iterator = l.iterator();
+    while (iterator.hasNext()) {
+      iterator.next();
+    }
+  }
+
+  void call_iterate_elements_linear(List<Integer> l1, List<Integer> l2) {
+    iterate_elements_linear(two_lists(l1, l2));
+  }
 }
diff --git a/infer/tests/codetoanalyze/java/performance/issues.exp b/infer/tests/codetoanalyze/java/performance/issues.exp
index 8c76256e3..dd7a17c74 100644
--- a/infer/tests/codetoanalyze/java/performance/issues.exp
+++ b/infer/tests/codetoanalyze/java/performance/issues.exp
@@ -147,10 +147,12 @@ codetoanalyze/java/performance/JsonUtils.java, libraries.marauder.analytics.util
 codetoanalyze/java/performance/JsonUtils.java, libraries.marauder.analytics.utils.json.JsonUtils.serialize(java.lang.String):java.lang.StringBuilder, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 33 + 65 ⋅ String.toCharArray().length.ub, O(String.toCharArray().length.ub), degree = 1,{String.toCharArray().length.ub},call to void JsonUtils.serialize(StringBuilder,String),call to void JsonUtils.escape(StringBuilder,String),Loop at line 13]
 codetoanalyze/java/performance/JsonUtils.java, libraries.marauder.analytics.utils.json.JsonUtils.serialize(java.lang.StringBuilder,java.lang.String):void, 5, EXPENSIVE_ALLOCATION, no_bucket, ERROR, [with estimated cost String.toCharArray().length.ub, O(String.toCharArray().length.ub), degree = 1,{String.toCharArray().length.ub},call to void JsonUtils.escape(StringBuilder,String),Loop at line 13]
 codetoanalyze/java/performance/JsonUtils.java, libraries.marauder.analytics.utils.json.JsonUtils.serialize(java.lang.StringBuilder,java.lang.String):void, 5, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 24 + 65 ⋅ String.toCharArray().length.ub, O(String.toCharArray().length.ub), degree = 1,{String.toCharArray().length.ub},call to void JsonUtils.escape(StringBuilder,String),Loop at line 13]
-codetoanalyze/java/performance/ListTest.java, ListTest.asList_linear(java.lang.String[]):void, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 7 + 8 ⋅ array.length + 3 ⋅ (array.length + 1), O(array.length), degree = 1,{array.length + 1},Loop at line 40,{array.length},Loop at line 40]
-codetoanalyze/java/performance/ListTest.java, ListTest.indexOfImpl_linear(java.util.List,java.lang.Object):int, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 4 + 11 ⋅ list.length + 3 ⋅ (list.length + 1), O(list.length), degree = 1,{list.length + 1},Loop at line 16,{list.length},Loop at line 16]
+codetoanalyze/java/performance/ListTest.java, ListTest.asList_linear(java.lang.String[]):void, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 7 + 8 ⋅ array.length + 3 ⋅ (array.length + 1), O(array.length), degree = 1,{array.length + 1},Loop at line 41,{array.length},Loop at line 41]
+codetoanalyze/java/performance/ListTest.java, ListTest.call_iterate_elements_linear(java.util.List,java.util.List):void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 26 + 5 ⋅ (l2.length + l1.length) + 3 ⋅ (l2.length + l1.length + 1), O((l2.length + l1.length)), degree = 1,{l2.length + l1.length + 1},call to void ListTest.iterate_elements_linear(List),Loop at line 58,{l2.length + l1.length},call to void ListTest.iterate_elements_linear(List),Loop at line 58]
+codetoanalyze/java/performance/ListTest.java, ListTest.indexOfImpl_linear(java.util.List,java.lang.Object):int, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 4 + 11 ⋅ list.length + 3 ⋅ (list.length + 1), O(list.length), degree = 1,{list.length + 1},Loop at line 17,{list.length},Loop at line 17]
+codetoanalyze/java/performance/ListTest.java, ListTest.iterate_elements_linear(java.util.List):void, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 4 + 5 ⋅ l.length + 3 ⋅ (l.length + 1), O(l.length), degree = 1,{l.length + 1},Loop at line 58,{l.length},Loop at line 58]
 codetoanalyze/java/performance/ListTest.java, ListTest.sort_comparator_nlogn(java.util.List):void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 8 + people.length × log(people.length), O(people.length × log(people.length)), degree = 1 + 1⋅log,{people.length},Modeled call to List.sort,{people.length},Modeled call to List.sort]
-codetoanalyze/java/performance/ListTest.java, ListTest.sublist(java.util.List):void, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 11 + 8 ⋅ (filesList.length - 1) + 3 ⋅ filesList.length, O(filesList.length), degree = 1,{filesList.length},Loop at line 30,{filesList.length - 1},Loop at line 30]
+codetoanalyze/java/performance/ListTest.java, ListTest.sublist(java.util.List):void, 2, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 11 + 8 ⋅ (filesList.length - 1) + 3 ⋅ filesList.length, O(filesList.length), degree = 1,{filesList.length},Loop at line 31,{filesList.length - 1},Loop at line 31]
 codetoanalyze/java/performance/Loops.java, codetoanalyze.java.performance.Loops.charsequence_length_linear(java.lang.CharSequence):void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 2 + 5 ⋅ seq.length + 3 ⋅ (seq.length + 1), O(seq.length), degree = 1,{seq.length + 1},Loop at line 115,{seq.length},Loop at line 115]
 codetoanalyze/java/performance/Loops.java, codetoanalyze.java.performance.Loops.do_while_independent_of_p(int):int, 3, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 250, O(1), degree = 0]
 codetoanalyze/java/performance/Loops.java, codetoanalyze.java.performance.Loops.dumb0(long[],int):void, 1, EXPENSIVE_EXECUTION_TIME, no_bucket, ERROR, [with estimated cost 2 + 25 ⋅ (length - 1), O(length), degree = 1,{length - 1},Loop at line 44]