From 093bf285cc067092f5f6e8f5831a46b208350549 Mon Sep 17 00:00:00 2001
From: Mehdi Bouaziz <mbouaziz@fb.com>
Date: Mon, 9 Oct 2017 08:59:46 -0700
Subject: [PATCH] [inferbo] Do not report subsumed issues

Summary:
Next step to issue deduplication: do not keep safety conditions that are subsumed by others.
Only do it if they do not have infinite bound: replacing `0 < size` by `1 < size` is ok, but replacing it by `+oo < size` is not because it looks much more like a lack of precision.

Reviewed By: skcho

Differential Revision: D5978455

fbshipit-source-id: acc2384
---
 .../bufferOverrunProofObligations.ml          | 99 ++++++++++++++-----
 infer/src/bufferoverrun/itv.ml                | 56 +++++++++++
 .../c/bufferoverrun/duplicates.c              | 12 ++-
 .../codetoanalyze/c/bufferoverrun/issues.exp  |  6 +-
 .../cpp/bufferoverrun/issues.exp              |  3 -
 5 files changed, 147 insertions(+), 29 deletions(-)

diff --git a/infer/src/bufferoverrun/bufferOverrunProofObligations.ml b/infer/src/bufferoverrun/bufferOverrunProofObligations.ml
index 502d6d502..f52cead60 100644
--- a/infer/src/bufferoverrun/bufferOverrunProofObligations.ml
+++ b/infer/src/bufferoverrun/bufferOverrunProofObligations.ml
@@ -20,8 +20,6 @@ module Condition = struct
 
   let get_symbols c = ItvPure.get_symbols c.idx @ ItvPure.get_symbols c.size
 
-  let eq c1 c2 = ItvPure.equal c1.idx c2.idx && ItvPure.equal c1.size c2.size
-
   let set_size_pos : t -> t =
     fun c ->
       let size' = ItvPure.make_positive c.size in
@@ -41,6 +39,53 @@ module Condition = struct
     fun ~idx ~size ->
       if ItvPure.is_invalid idx || ItvPure.is_invalid size then None else Some {idx; size}
 
+  let have_similar_bounds {idx= lidx; size= lsiz} {idx= ridx; size= rsiz} =
+    ItvPure.have_similar_bounds lidx ridx && ItvPure.have_similar_bounds lsiz rsiz
+
+  let xcompare ~lhs:{idx= lidx; size= lsiz} ~rhs:{idx= ridx; size= rsiz} =
+    let idxcmp = ItvPure.xcompare ~lhs:lidx ~rhs:ridx in
+    let sizcmp = ItvPure.xcompare ~lhs:lsiz ~rhs:rsiz in
+    match (idxcmp, sizcmp) with
+    | `Equal, `Equal
+     -> `Equal
+    | `NotComparable, _
+     -> `NotComparable
+    | `Equal, (`LeftSmallerThanRight | `LeftSubsumesRight)
+     -> `LeftSubsumesRight
+    | `Equal, (`RightSmallerThanLeft | `RightSubsumesLeft)
+     -> `RightSubsumesLeft
+    | `LeftSubsumesRight, (`Equal | `LeftSubsumesRight)
+     -> `LeftSubsumesRight
+    | `RightSubsumesLeft, (`Equal | `RightSubsumesLeft)
+     -> `RightSubsumesLeft
+    | (`LeftSmallerThanRight | `RightSmallerThanLeft), _
+     -> let lidxpos = ItvPure.le_sem ItvPure.zero lidx in
+        let ridxpos = ItvPure.le_sem ItvPure.zero ridx in
+        if not (ItvPure.equal lidxpos ridxpos) then `NotComparable
+        else if ItvPure.is_true lidxpos then
+          (* both idx >= 0 *)
+          match (idxcmp, sizcmp) with
+          | `LeftSmallerThanRight, (`Equal | `RightSmallerThanLeft | `RightSubsumesLeft)
+           -> `RightSubsumesLeft
+          | `RightSmallerThanLeft, (`Equal | `LeftSmallerThanRight | `LeftSubsumesRight)
+           -> `LeftSubsumesRight
+          | _
+           -> `NotComparable
+        else if ItvPure.is_false lidxpos then
+          (* both idx < 0, size doesn't matter *)
+          match idxcmp with
+          | `LeftSmallerThanRight
+           -> `LeftSubsumesRight
+          | `RightSmallerThanLeft
+           -> `RightSubsumesLeft
+          | `Equal
+           -> `Equal
+          | _
+           -> `NotComparable
+        else `NotComparable
+    | _
+     -> `NotComparable
+
   let filter1 : t -> bool =
     fun c ->
       ItvPure.is_top c.idx || ItvPure.is_top c.size
@@ -173,26 +218,36 @@ module ConditionSet = struct
       (ConditionTrace.get_location cwt2.trace)
 
   let try_merge ~existing ~new_ =
-    if Condition.eq existing.cond new_.cond then
-      (* keep the first one in the code *)
-      if compare_by_location existing new_ <= 0 then `KeepExistingAndStop
-      else `RemoveExistingAddNewAndStop
-    else `KeepGoingFinallyAddNew
+    if Condition.have_similar_bounds existing.cond new_.cond then
+      match Condition.xcompare ~lhs:existing.cond ~rhs:new_.cond with
+      | `Equal
+       -> (* keep the first one in the code *)
+          if compare_by_location existing new_ <= 0 then `DoNotAddAndStop
+          else `RemoveExistingAndContinue
+      (* we don't want to remove issues that would end up in a higher bucket,
+         e.g. [a, b] < [c, d] is subsumed by [a, +oo] < [c, d] but the latter is less precise *)
+      | `LeftSubsumesRight
+       -> `DoNotAddAndStop
+      | `RightSubsumesLeft
+       -> `RemoveExistingAndContinue
+      | `NotComparable
+       -> `KeepExistingAndContinue
+    else `KeepExistingAndContinue
 
   let add_one condset new_ =
-    let rec aux ~new_ acc = function
+    let rec aux ~new_ acc ~same = function
       | []
-       -> new_ :: condset
-      | existing :: rest ->
+       -> if same then new_ :: condset else new_ :: acc
+      | existing :: rest as existings ->
         match try_merge ~existing ~new_ with
-        | `KeepExistingAndStop
-         -> condset
-        | `RemoveExistingAddNewAndStop
-         -> new_ :: List.rev_append acc rest
-        | `KeepGoingFinallyAddNew
-         -> aux ~new_ (existing :: acc) rest
+        | `DoNotAddAndStop
+         -> if same then condset else List.rev_append acc existings
+        | `RemoveExistingAndContinue
+         -> aux ~new_ acc ~same:false rest
+        | `KeepExistingAndContinue
+         -> aux ~new_ (existing :: acc) ~same rest
     in
-    aux ~new_ [] condset
+    aux ~new_ [] ~same:true condset
 
   let join condset1 condset2 = List.fold_left ~f:add_one condset1 ~init:condset2
 
@@ -206,14 +261,14 @@ module ConditionSet = struct
         join [cwt] condset
 
   let subst condset (bound_map, trace_map) caller_pname callee_pname loc =
-    let subst_cwt cwt =
+    let subst_add_cwt condset cwt =
       match Condition.get_symbols cwt.cond with
       | []
-       -> Some cwt
+       -> add_one condset cwt
       | symbols ->
         match Condition.subst cwt.cond bound_map with
         | None
-         -> None
+         -> condset
         | Some cond
          -> let traces_caller =
               List.fold symbols ~init:ValTraceSet.empty ~f:(fun val_traces symbol ->
@@ -228,9 +283,9 @@ module ConditionSet = struct
                 trace
             in
             let trace = make_call_and_subst cwt.trace in
-            Some {cond; trace}
+            add_one condset {cond; trace}
     in
-    List.filter_map condset ~f:subst_cwt
+    List.fold condset ~f:subst_add_cwt ~init:[]
 
   let iter ~f condset = List.iter condset ~f:(fun cwt -> f cwt.cond cwt.trace)
 
diff --git a/infer/src/bufferoverrun/itv.ml b/infer/src/bufferoverrun/itv.ml
index 12209a23c..1efb2101a 100644
--- a/infer/src/bufferoverrun/itv.ml
+++ b/infer/src/bufferoverrun/itv.ml
@@ -432,6 +432,19 @@ module Bound = struct
 
   let eq : t -> t -> bool = fun x y -> le x y && le y x
 
+  let xcompare ~lhs ~rhs =
+    let ller = le lhs rhs in
+    let rlel = le rhs lhs in
+    match (ller, rlel) with
+    | true, true
+     -> `Equal
+    | true, false
+     -> `LeftSmallerThanRight
+    | false, true
+     -> `RightSmallerThanLeft
+    | false, false
+     -> `NotComparable
+
   let remove_max_int : t -> t =
     fun x ->
       match x with
@@ -625,10 +638,22 @@ module Bound = struct
     | MinMax (_, _, _, _, s)
      -> [s]
 
+  let are_similar b1 b2 =
+    match (b1, b2) with
+    | MInf, MInf
+     -> true
+    | PInf, PInf
+     -> true
+    | (Linear _ | MinMax _), (Linear _ | MinMax _)
+     -> true
+    | _
+     -> false
+
   let is_not_infty : t -> bool = function MInf | PInf -> false | _ -> true
 end
 
 module ItvPure = struct
+  (** (l, u) represents the closed interval [l; u] (of course infinite bounds are open) *)
   type astate = Bound.t * Bound.t [@@deriving compare]
 
   type t = astate
@@ -645,6 +670,8 @@ module ItvPure = struct
     fun (l, u) ->
       match (Bound.is_const l, Bound.is_const u) with Some _, Some _ -> true | _, _ -> false
 
+  let have_similar_bounds (l1, u1) (l2, u2) = Bound.are_similar l1 l2 && Bound.are_similar u1 u2
+
   let make : Bound.t -> Bound.t -> t = fun l u -> (l, u)
 
   let subst : t -> Bound.t bottom_lifted SubstMap.t -> t bottom_lifted =
@@ -660,6 +687,35 @@ module ItvPure = struct
   let ( <= ) : lhs:t -> rhs:t -> bool =
     fun ~lhs:(l1, u1) ~rhs:(l2, u2) -> Bound.le l2 l1 && Bound.le u1 u2
 
+  let xcompare ~lhs:(l1, u1) ~rhs:(l2, u2) =
+    let lcmp = Bound.xcompare ~lhs:l1 ~rhs:l2 in
+    let ucmp = Bound.xcompare ~lhs:u1 ~rhs:u2 in
+    match (lcmp, ucmp) with
+    | `Equal, `Equal
+     -> `Equal
+    | `NotComparable, _ | _, `NotComparable -> (
+      match Bound.xcompare ~lhs:u1 ~rhs:l2 with
+      | `LeftSmallerThanRight
+       -> `LeftSmallerThanRight
+      | u1l2 ->
+        match (Bound.xcompare ~lhs:u2 ~rhs:l1, u1l2) with
+        | `LeftSmallerThanRight, _
+         -> `RightSmallerThanLeft
+        | `Equal, `Equal
+         -> `Equal (* weird, though *)
+        | _, `Equal
+         -> `LeftSmallerThanRight
+        | _
+         -> `NotComparable )
+    | (`LeftSmallerThanRight | `Equal), (`LeftSmallerThanRight | `Equal)
+     -> `LeftSmallerThanRight
+    | (`RightSmallerThanLeft | `Equal), (`RightSmallerThanLeft | `Equal)
+     -> `RightSmallerThanLeft
+    | `LeftSmallerThanRight, `RightSmallerThanLeft
+     -> `LeftSubsumesRight
+    | `RightSmallerThanLeft, `LeftSmallerThanRight
+     -> `RightSubsumesLeft
+
   let join : t -> t -> t = fun (l1, u1) (l2, u2) -> (Bound.lb l1 l2, Bound.ub u1 u2)
 
   let widen : prev:t -> next:t -> num_iters:int -> t =
diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/duplicates.c b/infer/tests/codetoanalyze/c/bufferoverrun/duplicates.c
index fbfcc95bb..6d41fd17b 100644
--- a/infer/tests/codetoanalyze/c/bufferoverrun/duplicates.c
+++ b/infer/tests/codetoanalyze/c/bufferoverrun/duplicates.c
@@ -9,7 +9,7 @@
 
 void two_accesses(int* arr) {
   if (arr[1] < 0) {
-    arr[1] = 0;
+    arr[0] = 0;
   }
 }
 
@@ -17,3 +17,13 @@ void one_alarm_is_enough() {
   int arr[1];
   two_accesses(arr);
 }
+
+void two_symbolic_accesses(int n) {
+  int arr[1];
+  arr[n] = 0;
+  arr[n - 2] = 0; // Do not remove the associated condition
+}
+
+void tsa_one_alarm_Bad() { two_symbolic_accesses(3); }
+
+void tsa_two_alarms_Bad() { two_symbolic_accesses(1); }
diff --git a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp
index a2947f587..4a1809eda 100644
--- a/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp
+++ b/infer/tests/codetoanalyze/c/bufferoverrun/issues.exp
@@ -7,9 +7,10 @@ codetoanalyze/c/bufferoverrun/break_continue_return.c, break_continue_return, 16
 codetoanalyze/c/bufferoverrun/cast.c, cast2_Good_FP, 2, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [4, 4] Size: [4, 4]]
 codetoanalyze/c/bufferoverrun/do_while.c, do_while, 2, BUFFER_OVERRUN_L4, [ArrayDeclaration,Assignment,Call,Assignment,ArrayAccess: Offset: [0, +oo] Size: [10, 10] @ codetoanalyze/c/bufferoverrun/do_while.c:18:5 by call `do_while_sub()` ]
 codetoanalyze/c/bufferoverrun/duplicates.c, one_alarm_is_enough, 2, BUFFER_OVERRUN_L1, [ArrayDeclaration,Call,ArrayAccess: Offset: [1, 1] Size: [1, 1] @ codetoanalyze/c/bufferoverrun/duplicates.c:11:7 by call `two_accesses()` ]
-codetoanalyze/c/bufferoverrun/external.c, extern_bad, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
+codetoanalyze/c/bufferoverrun/duplicates.c, tsa_one_alarm_Bad, 0, BUFFER_OVERRUN_L1, [Call,ArrayDeclaration,ArrayAccess: Offset: [3, 3] Size: [1, 1] @ codetoanalyze/c/bufferoverrun/duplicates.c:23:3 by call `two_symbolic_accesses()` ]
+codetoanalyze/c/bufferoverrun/duplicates.c, tsa_two_alarms_Bad, 0, BUFFER_OVERRUN_L1, [Call,ArrayDeclaration,ArrayAccess: Offset: [-1, -1] Size: [1, 1] @ codetoanalyze/c/bufferoverrun/duplicates.c:24:3 by call `two_symbolic_accesses()` ]
+codetoanalyze/c/bufferoverrun/duplicates.c, tsa_two_alarms_Bad, 0, BUFFER_OVERRUN_L1, [Call,ArrayDeclaration,ArrayAccess: Offset: [1, 1] Size: [1, 1] @ codetoanalyze/c/bufferoverrun/duplicates.c:23:3 by call `two_symbolic_accesses()` ]
 codetoanalyze/c/bufferoverrun/external.c, extern_bad, 5, BUFFER_OVERRUN_L5, [Assignment,ArrayAccess: Offset: [-oo, +oo] Size: [0, +oo]]
-codetoanalyze/c/bufferoverrun/external.c, extern_bad, 7, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [20, 20] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/external.c, extern_bad, 10, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [30, 30] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/for_loop.c, for_loop, 10, BUFFER_OVERRUN_L3, [Call,ArrayDeclaration,Assignment,Assignment,Return,Assignment,Assignment,ArrayAccess: Offset: [0, 9] Size: [5, 10]]
 codetoanalyze/c/bufferoverrun/for_loop.c, safealloc, 10, UNREACHABLE_CODE, []
@@ -17,7 +18,6 @@ codetoanalyze/c/bufferoverrun/function_call.c, call_by_arr_bad, 3, BUFFER_OVERRU
 codetoanalyze/c/bufferoverrun/function_call.c, call_by_ptr_bad, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: [-1, -1] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/function_call.c, call_by_struct_ptr_bad, 5, BUFFER_OVERRUN_L1, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: [-1, -1] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/function_call.c, function_call, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: [100, 100] Size: [10, 10] @ codetoanalyze/c/bufferoverrun/function_call.c:21:3 by call `arr_access()` ]
-codetoanalyze/c/bufferoverrun/function_call.c, function_call, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,Assignment,Call,ArrayAccess: Offset: [20, 20] Size: [10, 10] @ codetoanalyze/c/bufferoverrun/function_call.c:22:3 by call `arr_access()` ]
 codetoanalyze/c/bufferoverrun/global.c, compare_global_variable_bad, 3, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/goto_loop.c, goto_loop, 11, BUFFER_OVERRUN_L1, [ArrayDeclaration,Assignment,ArrayAccess: Offset: [10, +oo] Size: [10, 10]]
 codetoanalyze/c/bufferoverrun/inf_loop.c, inf_loop, 4, CONDITION_ALWAYS_TRUE, []
diff --git a/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp
index af33dd5ce..519f5209f 100644
--- a/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp
+++ b/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp
@@ -1,8 +1,6 @@
 codetoanalyze/cpp/bufferoverrun/class.cpp, my_class_access2_Bad, 2, BUFFER_OVERRUN_L1, [Call,Assignment,Call,Assignment,Return,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/class.cpp, my_class_access_Bad, 2, BUFFER_OVERRUN_L1, [Call,Call,Assignment,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
-codetoanalyze/cpp/bufferoverrun/external.cpp, extern_bad, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/external.cpp, extern_bad, 5, BUFFER_OVERRUN_L5, [Assignment,ArrayAccess: Offset: [-oo, +oo] Size: [0, +oo]]
-codetoanalyze/cpp/bufferoverrun/external.cpp, extern_bad, 7, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [20, 20] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/external.cpp, extern_bad, 10, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [30, 30] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/function_call.cpp, call_by_ref_bad, 4, BUFFER_OVERRUN_L1, [ArrayDeclaration,Call,Assignment,ArrayAccess: Offset: [-1, -1] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/remove_temps.cpp, C_foo_Bad, 1, CONDITION_ALWAYS_TRUE, []
@@ -14,7 +12,6 @@ codetoanalyze/cpp/bufferoverrun/simple_vector.cpp, instantiate_my_vector_oob_Ok,
 codetoanalyze/cpp/bufferoverrun/simple_vector.cpp, my_vector_oob_Bad, 2, BUFFER_OVERRUN_L2, [Call,Call,ArrayDeclaration,Assignment,ArrayAccess: Offset: [max(0, s$4), s$5] Size: [max(0, s$4), s$5] @ codetoanalyze/cpp/bufferoverrun/simple_vector.cpp:21:23 by call `int_vector_access_at()` ]
 codetoanalyze/cpp/bufferoverrun/trivial.cpp, trivial, 2, BUFFER_OVERRUN_L1, [ArrayDeclaration,ArrayAccess: Offset: [10, 10] Size: [10, 10]]
 codetoanalyze/cpp/bufferoverrun/vector.cpp, call_safe_access4_Good_FP, 2, BUFFER_OVERRUN_L1, [Call,Call,Assignment,Call,Call,Assignment,Return,Call,Assignment,Call,Call,Call,Call,Call,ArrayDeclaration,Assignment,ArrayAccess: Offset: [0, 0] Size: [0, 0]]
-codetoanalyze/cpp/bufferoverrun/vector.cpp, just_test_model_FP, 11, BUFFER_OVERRUN_L4, [Call,Assignment,Call,Call,Call,Call,Call,Assignment,Call,Call,Call,Call,Call,Assignment,Call,Call,Call,Call,Call,Assignment,Call,Call,Call,Call,Call,Assignment,Call,Call,ArrayDeclaration,Assignment,ArrayAccess: Offset: [0, 0] Size: [0, +oo]]
 codetoanalyze/cpp/bufferoverrun/vector.cpp, just_test_model_FP, 16, BUFFER_OVERRUN_L5, [Call,ArrayAccess: Offset: [-oo, +oo] Size: [0, +oo]]
 codetoanalyze/cpp/bufferoverrun/vector.cpp, just_test_model_FP, 18, BUFFER_OVERRUN_L3, [Call,Call,Call,Assignment,Call,Call,Call,Call,Call,ArrayDeclaration,Assignment,ArrayAccess: Offset: [1, 1] Size: [0, +oo]]
 codetoanalyze/cpp/bufferoverrun/vector.cpp, out_of_bound_Bad, 2, BUFFER_OVERRUN_L2, [Call,Call,Call,ArrayDeclaration,Assignment,ArrayAccess: Offset: [max(0, s$12), s$13] Size: [max(0, s$12), s$13]]