[pulse] cap the size of literals in formulas

Summary: On some pathological examples of crypto primitives like libsodium, later diffs make pulse grind to a halt due to an explosion in the size of literals. This is at least partly due to the fact the arithmetic doesn't operate modulo 2^64. Due to the fact the arithmetic is confused in any case when we reach such large numbers, cap them, currently at 2^128. This removes pathological cases for now, even now on libsodium Pulse is ~5 times faster than before! Take this opportunity to put the modified Q/Z modules in the own files. Reviewed By: jberdine Differential Revision: D27463933 fbshipit-source-id: 342d941e2
4 years ago · d1b3e56574
parent 2d83dfdcb0
commit d1b3e56574
7 changed files with 250 additions and 86 deletions
--- a/infer/src/pulse/PulseFormula.ml
+++ b/infer/src/pulse/PulseFormula.ml
@ -10,96 +10,12 @@ module F = Format
 module L = Logging
 module SatUnsat = PulseSatUnsat
 module Var = PulseAbstractValue
+module Q = QSafeCapped
+module Z = ZSafe
 open SatUnsat

 type operand = LiteralOperand of IntLit.t | AbstractValueOperand of Var.t

-(** {!Z} from zarith with a few convenience functions added *)
-module Z = struct
-  include Z
-
-  let protect f x = try Some (f x) with Division_by_zero | Invalid_argument _ | Z.Overflow -> None
-
-  let protect2 f x y =
-    try Some (f x y) with Division_by_zero | Invalid_argument _ | Z.Overflow -> None
-
-
-  let yojson_of_t z = `String (Z.to_string z)
-
-  [@@@warning "-32"]
-
-  let div = protect2 Z.div
-
-  let rem = protect2 Z.rem
-
-  let div_rem = protect2 Z.div_rem
-
-  let cdiv = protect2 Z.cdiv
-
-  let fdiv = protect2 Z.fdiv
-
-  let ediv_rem = protect2 Z.ediv_rem
-
-  let ediv = protect2 Z.ediv
-
-  let erem = protect2 Z.erem
-
-  let divexact = protect2 Z.divexact
-
-  let gcd = protect2 Z.gcd
-
-  let gcdext = protect2 Z.gcdext
-
-  let lcm = protect2 Z.lcm
-
-  let powm = protect2 Z.powm
-
-  let powm_sec = protect2 Z.powm_sec
-
-  let invert = protect2 Z.invert
-
-  let ( / ) = protect2 Z.( / )
-
-  let ( /> ) = protect2 Z.( /> )
-
-  let ( /< ) = protect2 Z.( /< )
-
-  let ( /| ) = protect2 Z.( /| )
-
-  let ( mod ) = protect2 Z.( mod )
-end
-
-(** {!Q} from zarith with a few convenience functions added *)
-module Q = struct
-  include Q
-
-  type _q = t = {num: Z.t; den: Z.t} [@@deriving yojson_of]
-
-  let yojson_of_t = [%yojson_of: _q]
-
-  let not_equal q1 q2 = not (Q.equal q1 q2)
-
-  [@@@warning "-32"]
-
-  let is_one q = Q.equal q Q.one
-
-  let is_minus_one q = Q.equal q Q.minus_one
-
-  let is_zero q = Q.equal q Q.zero
-
-  let is_not_zero q = not (is_zero q)
-
-  let to_int q = Z.protect Q.to_int q
-
-  let to_int32 q = Z.protect Q.to_int32 q
-
-  let to_int64 q = Z.protect Q.to_int64 q
-
-  let to_bigint q = Z.protect Q.to_bigint q
-
-  let to_nativeint q = Z.protect Q.to_nativeint q
-end
-
 (** Linear Arithmetic *)
 module LinArith : sig
  (** linear combination of variables, eg [2·x + 3/4·y + 12] *)
--- a/infer/src/pulse/QSafeCapped.ml
+++ b/infer/src/pulse/QSafeCapped.ml
@ -0,0 +1,47 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+module Z = ZSafe
+include Q
+
+type _q = Q.t = {num: Z.t; den: Z.t} [@@deriving yojson_of]
+
+let yojson_of_t = [%yojson_of: _q]
+
+let not_equal q1 q2 = not (Q.equal q1 q2)
+
+let is_one q = Q.equal q Q.one
+
+let is_minus_one q = Q.equal q Q.minus_one
+
+let is_zero q = Q.equal q Q.zero
+
+let is_not_zero q = not (is_zero q)
+
+let to_int q = Z.protect Q.to_int q
+
+let to_int32 q = Z.protect Q.to_int32 q
+
+let to_int64 q = Z.protect Q.to_int64 q
+
+let to_bigint q = Z.protect Q.to_bigint q
+
+let to_nativeint q = Z.protect Q.to_nativeint q
+
+(** cap certain operations to prevent numerators and denominators from growing too large (>128 bits
+    on a 64-bit machine) *)
+let cap q = if Int.(Z.size q.num > 2 || Z.size q.den > 2) then Q.undef else q
+
+let mul q1 q2 =
+  (* {!Q.mul} does not optimise these cases *)
+  if is_one q1 then q2 else if is_one q2 then q1 else Q.mul q1 q2 |> cap
+
+
+let div q1 q2 =
+  (* {!Q.div} does not optimise these cases *)
+  if is_one q2 then q1 else Q.div q1 q2
--- a/infer/src/pulse/QSafeCapped.mli
+++ b/infer/src/pulse/QSafeCapped.mli
@ -0,0 +1,39 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+
+(* OCaml needs to know not only that this has the same interface as [Q] but also that the types it
+   defines are, in fact, the same as [Q] *)
+include module type of struct
+  include Q
+end
+
+val yojson_of_t : [%yojson_of: t]
+
+val is_zero : t -> bool
+
+val is_not_zero : t -> bool
+
+val is_one : t -> bool
+
+val is_minus_one : t -> bool
+
+val not_equal : t -> t -> bool
+
+(* the functions below shadow definitions in [Q] to give them safer types *)
+[@@@warning "-32"]
+
+val to_int : t -> int option
+
+val to_int32 : t -> int32 option
+
+val to_int64 : t -> int64 option
+
+val to_bigint : t -> Z.t option
+
+val to_nativeint : t -> nativeint option
--- a/infer/src/pulse/ZSafe.ml
+++ b/infer/src/pulse/ZSafe.ml
@ -0,0 +1,58 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+
+let protect f x = try Some (f x) with Division_by_zero | Invalid_argument _ | Z.Overflow -> None
+
+let protect2 f x y =
+  try Some (f x y) with Division_by_zero | Invalid_argument _ | Z.Overflow -> None
+
+
+let yojson_of_t z = `String (Z.to_string z)
+
+include Z
+
+let div = protect2 Z.div
+
+let rem = protect2 Z.rem
+
+let div_rem = protect2 Z.div_rem
+
+let cdiv = protect2 Z.cdiv
+
+let fdiv = protect2 Z.fdiv
+
+let ediv_rem = protect2 Z.ediv_rem
+
+let ediv = protect2 Z.ediv
+
+let erem = protect2 Z.erem
+
+let divexact = protect2 Z.divexact
+
+let gcd = protect2 Z.gcd
+
+let gcdext = protect2 Z.gcdext
+
+let lcm = protect2 Z.lcm
+
+let powm = protect2 Z.powm
+
+let powm_sec = protect2 Z.powm_sec
+
+let invert = protect2 Z.invert
+
+let ( / ) = protect2 Z.( / )
+
+let ( /> ) = protect2 Z.( /> )
+
+let ( /< ) = protect2 Z.( /< )
+
+let ( /| ) = protect2 Z.( /| )
+
+let ( mod ) = protect2 Z.( mod )
--- a/infer/src/pulse/ZSafe.mli
+++ b/infer/src/pulse/ZSafe.mli
@ -0,0 +1,62 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+
+(* OCaml needs to know not only that this has the same interface as [Z] but also that the types it
+   defines are, in fact, the same as [Z] *)
+include module type of struct
+  include Z
+end
+
+val protect : ('a -> 'b) -> 'a -> 'b option
+(** [None] instead of throwing [Division_by_zero | Invalid_argument _ | Z.Overflow] *)
+
+val yojson_of_t : [%yojson_of: t]
+
+(* the functions below shadow definitions in [Z] to give them safer types *)
+[@@@warning "-32"]
+
+val div : t -> t -> t option
+
+val rem : t -> t -> t option
+
+val div_rem : t -> t -> (t * t) option
+
+val cdiv : t -> t -> t option
+
+val fdiv : t -> t -> t option
+
+val ediv_rem : t -> t -> (t * t) option
+
+val ediv : t -> t -> t option
+
+val erem : t -> t -> t option
+
+val divexact : t -> t -> t option
+
+val gcd : t -> t -> t option
+
+val gcdext : t -> t -> (t * t * t) option
+
+val lcm : t -> t -> t option
+
+val powm : t -> t -> (t -> t) option
+
+val powm_sec : t -> t -> (t -> t) option
+
+val invert : t -> t -> t option
+
+val ( / ) : t -> t -> t option
+
+val ( /> ) : t -> t -> t option
+
+val ( /< ) : t -> t -> t option
+
+val ( /| ) : t -> t -> t option
+
+val ( mod ) : t -> t -> t option
--- a/infer/tests/codetoanalyze/cpp/pulse/issues.exp
+++ b/infer/tests/codetoanalyze/cpp/pulse/issues.exp
@ -133,6 +133,8 @@ codetoanalyze/cpp/pulse/use_after_free.cpp, trigger_assumed_aliasing3_bad, 0, US
 codetoanalyze/cpp/pulse/use_after_free.cpp, trigger_assumed_aliasing_bad, 0, USE_AFTER_FREE, no_bucket, ERROR, [calling context starts here,in call to `assumed_aliasing_latent`,invalidation part of the trace starts here,parameter `x` of assumed_aliasing_latent,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `y` of assumed_aliasing_latent,invalid access occurs here]
 codetoanalyze/cpp/pulse/use_after_free.cpp, use_after_free_simple_bad, 2, USE_AFTER_FREE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `x` of use_after_free_simple_bad,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `x` of use_after_free_simple_bad,invalid access occurs here]
 codetoanalyze/cpp/pulse/use_after_scope.cpp, access_out_of_scope_stack_ref_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidation part of the trace starts here,variable `p` declared here,when calling `invalidate_local_ok` here,variable `t` declared here,is the address of a stack variable `t` whose lifetime has ended,use-after-lifetime part of the trace starts here,variable `p` declared here,passed as argument to `invalidate_local_ok`,variable `t` declared here,assigned,return from call to `invalidate_local_ok`,invalid access occurs here]
+codetoanalyze/cpp/pulse/values.cpp, FP_int_over_cap_ok, 13, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here]
+codetoanalyze/cpp/pulse/values.cpp, FP_ints_are_not_rationals_ok, 4, NULLPTR_DEREFERENCE, no_bucket, ERROR, [source of the null value part of the trace starts here,assigned,is the null pointer,null pointer dereference part of the trace starts here,assigned,invalid access occurs here]
 codetoanalyze/cpp/pulse/values.cpp, error_under_true_conditionals_bad, 5, USE_AFTER_FREE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `x` of error_under_true_conditionals_bad,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `x` of error_under_true_conditionals_bad,invalid access occurs here]
 codetoanalyze/cpp/pulse/values.cpp, free_if_deref_bad, 2, USE_AFTER_FREE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `x` of free_if_deref_bad,when calling `free_if` here,parameter `x` of free_if,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `x` of free_if_deref_bad,invalid access occurs here]
 codetoanalyze/cpp/pulse/vector.cpp, FP_init_fill_then_push_back_ok, 7, VECTOR_INVALIDATION, no_bucket, ERROR, [invalidation part of the trace starts here,variable `vec` declared here,was potentially invalidated by `std::vector::push_back()`,use-after-lifetime part of the trace starts here,variable `vec` declared here,passed as argument to `std::vector::at()` (modelled),return from call to `std::vector::at()` (modelled),assigned,invalid access occurs here]
--- a/infer/tests/codetoanalyze/cpp/pulse/values.cpp
+++ b/infer/tests/codetoanalyze/cpp/pulse/values.cpp
@ -115,3 +115,43 @@ void function_empty_range_interproc_ok() {
  StringRange x{};
  find_first_non_space(x);
 }
+
+// arithmetic on integers does not wrap around but ignores too-large values
+void FP_int_over_cap_ok() {
+  unsigned long one = 1;
+  // 2^(63+63+3) + 2*2^(63+3) + 1*8 = 2^129 + 2^67 + 8 = 8 mod 2^64
+  // this is convoluted to escape various simplifications from Z that would
+  // avoid the false positive
+  unsigned long x = ((one << 62) * 2 + 1) * ((one << 62) * 2 + 1) * 8;
+  unsigned long y = ((one << 62) * 2 + 1) * ((one << 62) * 2 + 1) * 8;
+  // - x == y+1 is true in "Formulas" because x = y = Q.undef, but not true in
+  //   inferbo intervals because they keep arbitrary precision integers
+  // - x != 8 is not true in Formulas but true in inferbo
+  // - In C both of these would be false, so overall we get a false positive
+  if (x == y + 1 || x != 8) {
+    int* p = nullptr;
+    *p = 42;
+  }
+}
+
+void int_under_cap_ok() {
+  unsigned long one = 1;
+  // 2^63
+  unsigned long x = (one << 62) * 2;
+  if (x != (unsigned long)9223372036854775808) {
+    int* p = nullptr;
+    *p = 42;
+  }
+}
+
+// used to confuse inferbo
+int mult(int x, int y) { return x * y; }
+
+// integers are internally represented as rationals
+void FP_ints_are_not_rationals_ok() {
+  int x = 5 / 2;
+  if (x != mult(2, 1)) {
+    int* p = nullptr;
+    *p = 42;
+  }
+}