From 8f0701a01c75fd3533583deb0da45e8d3831d59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezgi=20=C3=87i=C3=A7ek?= <ezgi@fb.com>
Date: Mon, 23 Apr 2018 01:58:21 -0700
Subject: [PATCH] Refine dependency analysis to remove vars at prune exit nodes

Reviewed By: ddino

Differential Revision: D7670952

fbshipit-source-id: 832a639
---
 infer/src/IR/Sil.ml                           |  2 +
 infer/src/IR/Sil.mli                          |  2 +
 infer/src/checkers/control.ml                 | 10 ++-
 .../c/performance/cost_test_deps.c            | 75 ++++++++++++++++---
 .../codetoanalyze/c/performance/issues.exp    | 12 ++-
 5 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/infer/src/IR/Sil.ml b/infer/src/IR/Sil.ml
index 871531bdd..14e9522d0 100644
--- a/infer/src/IR/Sil.ml
+++ b/infer/src/IR/Sil.ml
@@ -55,6 +55,8 @@ type instr =
   | Declare_locals of (Pvar.t * Typ.t) list * Location.t  (** declare local variables *)
 [@@deriving compare]
 
+let is_loop = function Ik_dowhile | Ik_while | Ik_for -> true | _ -> false
+
 let equal_instr = [%compare.equal : instr]
 
 let skip_instr = Remove_temps ([], Location.dummy)
diff --git a/infer/src/IR/Sil.mli b/infer/src/IR/Sil.mli
index 3df12e492..53579184a 100644
--- a/infer/src/IR/Sil.mli
+++ b/infer/src/IR/Sil.mli
@@ -28,6 +28,8 @@ type if_kind =
   | Ik_switch
 [@@deriving compare]
 
+val is_loop : if_kind -> bool
+
 (** An instruction. *)
 type instr =
   (* Note for frontend writers:
diff --git a/infer/src/checkers/control.ml b/infer/src/checkers/control.ml
index dce8e07be..1341937ec 100644
--- a/infer/src/checkers/control.ml
+++ b/infer/src/checkers/control.ml
@@ -87,13 +87,17 @@ module TransferFunctionsControlDeps (CFG : ProcCfg.S) = struct
 
   let exec_instr astate _ _ instr =
     match instr with
-    | Sil.Prune (exp, _, _, _) ->
+    | Sil.Prune (exp, _, true_branch, if_kind) ->
+        (* Only keep track of control flow variables for loop prune nodes with the true branch *)
+        (* For false branches, remove the var from the set for fixpoint *)
         let astate' =
           Exp.free_vars exp |> Sequence.map ~f:Var.of_id |> Sequence.to_list
-          |> ControlDepSet.of_list |> Domain.union astate
+          |> ControlDepSet.of_list
+          |> if true_branch && Sil.is_loop if_kind then Domain.union astate else Domain.diff astate
         in
         Exp.program_vars exp |> Sequence.map ~f:Var.of_pvar |> Sequence.to_list
-        |> ControlDepSet.of_list |> Domain.union astate'
+        |> ControlDepSet.of_list
+        |> if true_branch && Sil.is_loop if_kind then Domain.union astate' else Domain.diff astate'
     | Sil.Load _
     | Sil.Store _
     | Sil.Call _
diff --git a/infer/tests/codetoanalyze/c/performance/cost_test_deps.c b/infer/tests/codetoanalyze/c/performance/cost_test_deps.c
index 85d0fa32f..f8a401c52 100644
--- a/infer/tests/codetoanalyze/c/performance/cost_test_deps.c
+++ b/infer/tests/codetoanalyze/c/performance/cost_test_deps.c
@@ -33,27 +33,46 @@ int loop_no_dep2(int k) {
   return p;
 }
 
-// -- Below examples should have worked, but due to the imprecision of CF
-// analysis, they don't
+// -- Below examples didn't work before, but enhancing CF analysis
+// makes the analysis much more precise and we can get proper bounds
 //
-// This example doesn't work since for the loop
-// control vars={p,j} and since j in [-oo.+oo], we get oo count.
+// This example works now because even though j in [-oo.+oo],
+// since control vars={k} (notice that we will remove {p,j} in the else branch),
+// we ignore j and find the right bound for the inner loop
 int if_bad(int j) {
   int p = 10;
-  int i = 0;
   if (p < 10 + j) {
     p++;
   } else {
     p = j + 3;
-    for (int k = 0; k < 100; k++) {
+    for (int k = 0; k < 10; k++) {
       j += 3;
     }
   }
   return p;
 }
 
-// We get +oo for this program, but if you take the first loop out,
-// fake dependency disappears and we can get a proper bound
+// Notice that removing {j,p} above doesn't create any problems if we are in a
+// loop that depends on them. E.g.: below we still depend on {j} but in the
+// conditional prune statement, we will remove the temp. var that map to inner
+// {j}, not the outer {j}
+int if_bad_loop() {
+  int p = 10;
+  for (int j = 0; j < 5; j++) {
+    if (j < 2) {
+      p++;
+    } else {
+      p = 3;
+      for (int k = 0; k < 10; k++) {
+        int m = 0;
+      }
+    }
+  }
+  return p;
+}
+
+// The fake dependency btw first and second loop disappeared and we can get a
+// proper bound
 //
 int two_loops() {
   int p = 10;
@@ -68,8 +87,25 @@ int two_loops() {
   return p;
 }
 
-// We can't get program point A's execution count as 5 due to the weakness in CF
-// analysis
+// We don't get a false dependency to m (hence p) since
+// for if statements, we don't add prune variables as dependency
+int loop_despite_inferbo(int p) {
+
+  int k = 100;
+  for (int i = 0; i < k; i++) {
+    int m = p + 3;
+    if (m < 14) {
+      p += 9;
+    }
+  }
+  return p;
+}
+
+// -- Below examples should have worked, but due to the imprecision/weakness
+//  in inferbo's relational analysis, they don't
+
+// We can get program point A's execution count as 5, however
+// due to the weakness in inferbo's relational analysis `i` is in [0, +oo]
 int nested_loop() {
   int k = 0;
   for (int i = 0; i < 5; i++) {
@@ -83,6 +119,7 @@ int nested_loop() {
 }
 
 // Unlike the above program, B will be inside the inner loop, hence executed
+// around 105 times
 int simulated_nested_loop(int p) {
   int k = 0;
   int t = 5;
@@ -96,3 +133,21 @@ int simulated_nested_loop(int p) {
   }
   return k;
 }
+
+// B will be inside the inner loop and executed ~500 times
+int simulated_nested_loop_more_expensive(int p) {
+  int k = 0;
+  int t = 5;
+  int j = 0;
+  for (int i = 0; i < 5; i++) {
+  B:
+    t = 3;
+    j++;
+    if (j < 100)
+      goto B; // continue;
+    else {
+      j = 0;
+    }
+  }
+  return k;
+}
diff --git a/infer/tests/codetoanalyze/c/performance/issues.exp b/infer/tests/codetoanalyze/c/performance/issues.exp
index 642113c72..5ba6be7e6 100644
--- a/infer/tests/codetoanalyze/c/performance/issues.exp
+++ b/infer/tests/codetoanalyze/c/performance/issues.exp
@@ -7,7 +7,12 @@ codetoanalyze/c/performance/cost_test.c, loop1_bad, 3, EXPENSIVE_EXECUTION_TIME_
 codetoanalyze/c/performance/cost_test.c, loop1_bad, 4, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1104]
 codetoanalyze/c/performance/cost_test.c, loop1_bad, 6, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1106]
 codetoanalyze/c/performance/cost_test.c, main_bad, 8, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 212]
-codetoanalyze/c/performance/cost_test_deps.c, if_bad, 0, INFINITE_EXECUTION_TIME_CALL, ERROR, []
+codetoanalyze/c/performance/cost_test_deps.c, if_bad_loop, 12, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 202]
+codetoanalyze/c/performance/cost_test_deps.c, loop_despite_inferbo, 3, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1204]
+codetoanalyze/c/performance/cost_test_deps.c, loop_despite_inferbo, 3, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1204]
+codetoanalyze/c/performance/cost_test_deps.c, loop_despite_inferbo, 4, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1204]
+codetoanalyze/c/performance/cost_test_deps.c, loop_despite_inferbo, 5, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1204]
+codetoanalyze/c/performance/cost_test_deps.c, loop_despite_inferbo, 9, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 1207]
 codetoanalyze/c/performance/cost_test_deps.c, loop_no_dep1, 3, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 605]
 codetoanalyze/c/performance/cost_test_deps.c, loop_no_dep1, 4, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 605]
 codetoanalyze/c/performance/cost_test_deps.c, loop_no_dep1, 6, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 608]
@@ -16,4 +21,7 @@ codetoanalyze/c/performance/cost_test_deps.c, loop_no_dep2, 4, EXPENSIVE_EXECUTI
 codetoanalyze/c/performance/cost_test_deps.c, loop_no_dep2, 6, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 612]
 codetoanalyze/c/performance/cost_test_deps.c, nested_loop, 0, INFINITE_EXECUTION_TIME_CALL, ERROR, []
 codetoanalyze/c/performance/cost_test_deps.c, simulated_nested_loop, 0, INFINITE_EXECUTION_TIME_CALL, ERROR, []
-codetoanalyze/c/performance/cost_test_deps.c, two_loops, 0, INFINITE_EXECUTION_TIME_CALL, ERROR, []
+codetoanalyze/c/performance/cost_test_deps.c, simulated_nested_loop_more_expensive, 0, INFINITE_EXECUTION_TIME_CALL, ERROR, []
+codetoanalyze/c/performance/cost_test_deps.c, two_loops, 7, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 545]
+codetoanalyze/c/performance/cost_test_deps.c, two_loops, 8, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 545]
+codetoanalyze/c/performance/cost_test_deps.c, two_loops, 10, EXPENSIVE_EXECUTION_TIME_CALL, ERROR, [with estimated cost 548]