[pulse] recency model for memory accesses

Summary: Add a new data structure and use it for the map of memory accesses to limit the number of destinations reachable from a given address. This avoids remembering details of each index in large arrays, or even each field in large structs. Reviewed By: skcho Differential Revision: D18246091 fbshipit-source-id: 5d3974d9c
5 years ago · 5c453393ff
parent d01ada643d
commit 5c453393ff
15 changed files with 331 additions and 31 deletions
--- a/infer/man/man1/infer-full.txt
+++ b/infer/man/man1/infer-full.txt
@ -1608,6 +1608,10 @@ INTERNAL OPTIONS
       --pulse-model-free-pattern-reset
           Cancel the effect of --pulse-model-free-pattern.

+       --pulse-recency-limit int
+           Maximum number of array elements and structure fields to keep
+           track of for a given array address.
+
       --pulse-widen-threshold int
           Under-approximate after int loop iterations

--- a/infer/src/base/Config.ml
+++ b/infer/src/base/Config.ml
@ -1754,6 +1754,12 @@ and project_root =
    ~meta:"dir" "Specify the root directory of the project"


+and pulse_recency_limit =
+  CLOpt.mk_int ~long:"pulse-recency-limit" ~default:32
+    "Maximum number of array elements and structure fields to keep track of for a given array \
+     address."
+
+
 and pulse_intraprocedural_only =
  CLOpt.mk_bool ~long:"pulse-intraprocedural-only"
    "Disable inter-procedural analysis in Pulse. Used for experimentations only."
@ -2849,6 +2855,8 @@ and progress_bar =

 and project_root = !project_root

+and pulse_recency_limit = !pulse_recency_limit
+
 and pulse_intraprocedural_only = !pulse_intraprocedural_only

 and pulse_max_disjuncts = !pulse_max_disjuncts
--- a/infer/src/base/Config.mli
+++ b/infer/src/base/Config.mli
@ -443,6 +443,8 @@ val progress_bar : [`MultiLine | `Plain | `Quiet]

 val project_root : string

+val pulse_recency_limit : int
+
 val pulse_intraprocedural_only : bool

 val pulse_max_disjuncts : int
--- a/infer/src/checkers/impurity.ml
+++ b/infer/src/checkers/impurity.ml
@ -106,9 +106,8 @@ let get_modified_params pname post_stack pre_heap post formals =
      | Some (addr, _) when Typ.is_pointer typ -> (
        match BaseMemory.find_opt addr pre_heap with
        | Some edges_pre ->
-            BaseMemory.Edges.fold
-              (fun access (addr, _) acc -> add_to_modified ~var ~access ~addr pre_heap post acc)
-              edges_pre acc
+            BaseMemory.Edges.fold edges_pre ~init:acc ~f:(fun acc access (addr, _) ->
+                add_to_modified ~var ~access ~addr pre_heap post acc )
        | None ->
            debug "The address is not materialized in in pre-heap." ;
            acc )
--- a/infer/src/istd/PrettyPrintable.ml
+++ b/infer/src/istd/PrettyPrintable.ml
@ -205,6 +205,8 @@ module MakePPMonoMap (Ord : PrintableOrderedType) (Val : PrintableType) =
 module type PrintableRankedType = sig
  include PrintableType

+  val compare : t -> t -> int
+
  val equal : t -> t -> bool

  val to_rank : t -> int
--- a/infer/src/istd/PrettyPrintable.mli
+++ b/infer/src/istd/PrettyPrintable.mli
@ -155,6 +155,8 @@ module MakePPMonoMap (Ord : PrintableOrderedType) (Val : PrintableType) :
 module type PrintableRankedType = sig
  include PrintableType

+  val compare : t -> t -> int
+
  val equal : t -> t -> bool

  val to_rank : t -> int
--- a/infer/src/istd/RecencyMap.ml
+++ b/infer/src/istd/RecencyMap.ml
@ -0,0 +1,204 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! Core
+module F = Format
+
+module type Config = sig
+  val limit : int
+end
+
+module type S = sig
+  type t [@@deriving compare]
+
+  type key
+
+  type value
+
+  val equal : (value -> value -> bool) -> t -> t -> bool
+
+  val pp : F.formatter -> t -> unit
+
+  val empty : t
+
+  val add : key -> value -> t -> t
+
+  val find_opt : key -> t -> value option
+
+  val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc
+
+  val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc
+
+  val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t
+
+  val is_empty : t -> bool
+
+  val bindings : t -> (key * value) list
+
+  val union : f:(key -> value -> value -> value option) -> t -> t -> t
+
+  val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t
+end
+
+module Make
+    (Key : PrettyPrintable.PrintableOrderedType)
+    (Value : PrettyPrintable.PrintableOrderedType)
+    (Config : Config) : S with type key = Key.t and type value = Value.t = struct
+  type key = Key.t
+
+  type value = Value.t [@@deriving compare]
+
+  module M = Caml.Map.Make (Key)
+
+  (** [new_] and [old] together represent the map. Keys may be present in both [old] and [new_], in
+      which case bindings in [new_] take precendence.
+
+      The idea is to add to [new_] whenever an element is modified. When the size of [new_] passes
+      the limit, throw away [old] and move [new_] to [old] (an O(1) cleanup operation). This way the
+      [Config.limit] most recently modified elements will always be in the map, but we may
+      temporarily have up to [2*Config.limit] elements in memory.
+
+      Using [Caml.Map]s as our backing data-structure allows us to avoid having to scan the [new_]
+      collection before adding an element in order to avoid adding it multiple times (which would
+      force us to either go over the [2*Config.limit] max number of elements or risk us losing some
+      of the [N] most recently-used keys because of repeating keys). *)
+  type t =
+    { count_new: int  (** invariant: [count_new] is [M.length new_] *)
+    ; new_: value M.t  (** invariant: [M.length new_ ≤ Config.limit] *)
+    ; old: value M.t
+          (** invariant: [M.length old ≤ Config.limit]. Actually, the length of [old] is always
+              either [0] or [N], except possibly after a call to [merge]. *) }
+  [@@deriving compare]
+
+  let empty = {count_new= 0; old= M.empty; new_= M.empty}
+
+  let edges map =
+    M.union
+      (fun _addr edge_new _edge_old -> (* bindings in [new_] take precedence *) Some edge_new)
+      map.new_ map.old
+
+
+  (** {2 External API} *)
+
+  let is_empty map = map.count_new = 0 && M.is_empty map.old
+
+  let find_opt key map =
+    match M.find_opt key map.new_ with
+    | Some _ as value_yes ->
+        value_yes
+    | None ->
+        (* Ideally we would want to put the binding in [new_] to reflect that it was recently
+           accessed but that would imply returning the new map, which is odd for a [find_opt]
+           function. Could be done in the future. *)
+        M.find_opt key map.old
+
+
+  let add key value map =
+    (* compute the final count first *)
+    let next_count_new = if M.mem key map.new_ then map.count_new else map.count_new + 1 in
+    (* shrink if we are about to go past the limit size of [new_] *)
+    if next_count_new > Config.limit then {count_new= 1; new_= M.singleton key value; old= map.new_}
+    else {count_new= next_count_new; new_= M.add key value map.new_; old= map.old}
+
+
+  let equal equal_value map1 map2 =
+    let edges1 = edges map1 in
+    let edges2 = edges map2 in
+    M.equal equal_value edges1 edges2
+
+
+  let fold map ~init ~f =
+    let acc = M.fold (fun key value acc -> f acc key value) map.new_ init in
+    M.fold (fun key value acc -> if M.mem key map.new_ then acc else f acc key value) map.old acc
+
+
+  let fold_bindings map ~init ~f = fold map ~init ~f:(fun acc k v -> f acc (k, v))
+
+  let pp fmt map =
+    let pp_item fmt (k, v) = F.fprintf fmt "%a -> %a" Key.pp k Value.pp v in
+    IContainer.pp_collection ~pp_item fmt map ~fold:fold_bindings
+
+
+  let map m ~f =
+    let new_ = M.map f m.new_ in
+    let old = M.mapi (fun key value -> if M.mem key m.new_ then value else f value) m.old in
+    {m with new_; old}
+
+
+  let fold_map m ~init ~f =
+    let acc_ref = ref init in
+    let m' =
+      map m ~f:(fun value ->
+          let acc, value' = f !acc_ref value in
+          acc_ref := acc ;
+          value' )
+    in
+    (!acc_ref, m')
+
+
+  let bindings map = M.bindings (edges map)
+
+  (** [merge ~f map1 map2] has at least all the elements [M.merge f map1.new_ map2.new_] but if
+      there are more than [N] then some will be arbitrarily demoted to [old]. Elements of
+      [M.merge f map1.old map2.old] may not make it into the final map if [old] then overflows too.
+      In other words: merge new first, overflow arbitrarily into old, then overflow old arbitrarily
+      into nothingness. *)
+  let merge ~f map1 map2 =
+    let count_new = ref 0 in
+    let count_old = ref 0 in
+    let old = ref M.empty in
+    let new_ =
+      M.merge
+        (fun addr val1_opt val2_opt ->
+          match f addr val1_opt val2_opt with
+          | None ->
+              None
+          | Some value as value_opt ->
+              incr count_new ;
+              if !count_new > Config.limit then (
+                incr count_old ;
+                old := M.add addr value !old ;
+                None )
+              else value_opt )
+        map1.new_ map2.new_
+    in
+    (* save [old] as it was after having added just the [new_] components *)
+    let old_from_new_ = !old in
+    let old =
+      M.merge
+        (fun addr val1_opt val2_opt ->
+          (* Check if we have already seen [addr] when merging the [new_] maps. As bindings in
+             [new_] override those in [old] we don't want to add it again in case it was spilled
+             into [old_from_new_], but it's fine to add it again if it made it to [new_].
+
+             Also if [!old] is already full then we cannot add any more elements so return [None].
+          *)
+          if !count_old >= Config.limit || M.mem addr old_from_new_ then None
+          else
+            match f addr val1_opt val2_opt with
+            | None ->
+                None
+            | Some _ as value_opt ->
+                incr count_old ; value_opt )
+        map1.old map2.old
+    in
+    {count_new= !count_new; old; new_}
+
+
+  (** standard-looking implementation of [union] based on [merge] *)
+  let union ~f map1 map2 =
+    merge
+      ~f:(fun addr val1_opt val2_opt ->
+        match (val1_opt, val2_opt) with
+        | None, None ->
+            None
+        | (Some _ as val_), None | None, (Some _ as val_) ->
+            val_
+        | Some val1, Some val2 ->
+            f addr val1 val2 )
+      map1 map2
+end
--- a/infer/src/istd/RecencyMap.mli
+++ b/infer/src/istd/RecencyMap.mli
@ -0,0 +1,60 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! Core
+module F = Format
+
+module type Config = sig
+  val limit : int
+  (** the maximum number [N] of bindings to keep around *)
+end
+
+(** A functional map interface where only the [N] most recently-accessed elements are guaranteed to
+    be persisted, similarly to an LRU cache. The map stores at most [2*N] elements.
+
+    Operations on the map have the same asymptotic complexity as {!Map.Make}. *)
+module type S = sig
+  type t [@@deriving compare]
+
+  type key
+
+  type value
+
+  val equal : (value -> value -> bool) -> t -> t -> bool
+
+  val pp : F.formatter -> t -> unit
+
+  val empty : t
+
+  val add : key -> value -> t -> t
+
+  val find_opt : key -> t -> value option
+
+  val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc
+
+  val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc
+  (** convenience function based on [fold] *)
+
+  val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t
+
+  val is_empty : t -> bool
+
+  val bindings : t -> (key * value) list
+
+  val union : f:(key -> value -> value -> value option) -> t -> t -> t
+  (** same caveat as {!merge} *)
+
+  val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t
+  (** if the maps passed as arguments disagree over which keys are the most recent and there are
+      more than [2*N] different keys between the two maps then some bindings will be arbitrarily
+      lost *)
+end
+
+module Make
+    (Key : PrettyPrintable.PrintableOrderedType)
+    (Value : PrettyPrintable.PrintableOrderedType)
+    (Config : Config) : S with type key = Key.t and type value = Value.t
--- a/infer/src/pulse/PulseBaseDomain.ml
+++ b/infer/src/pulse/PulseBaseDomain.ml
@ -231,9 +231,8 @@ end = struct

  and visit_edges orig_var ~f rev_accesses ~edges astate visited_accum =
    let finish visited_accum = Continue visited_accum in
-    Container.fold_until edges
-      ~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold)
-      ~finish ~init:visited_accum ~f:(fun visited_accum (access, (address, _trace)) ->
+    Container.fold_until edges ~fold:Memory.Edges.fold_bindings ~finish ~init:visited_accum
+      ~f:(fun visited_accum (access, (address, _trace)) ->
        match visit_address orig_var ~f (access :: rev_accesses) astate address visited_accum with
        | Continue _ as cont ->
            cont
--- a/infer/src/pulse/PulseBaseMemory.ml
+++ b/infer/src/pulse/PulseBaseMemory.ml
@ -4,6 +4,7 @@
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 *)
+
 open! IStd
 open PulseBasicInterface

@ -26,7 +27,12 @@ module AddrTrace = struct
    else AbstractValue.pp fmt (fst addr_trace)
 end

-module Edges = PrettyPrintable.MakePPMonoMap (Access) (AddrTrace)
+module Edges =
+  RecencyMap.Make (Access) (AddrTrace)
+    (struct
+      let limit = Config.pulse_recency_limit
+    end)
+
 module Graph = PrettyPrintable.MakePPMonoMap (AbstractValue) (Edges)

 let register_address addr memory =
--- a/infer/src/pulse/PulseBaseMemory.mli
+++ b/infer/src/pulse/PulseBaseMemory.mli
@ -4,6 +4,7 @@
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 *)
+
 open! IStd
 open PulseBasicInterface

@ -17,7 +18,7 @@ module AddrTrace : sig
  type t = AbstractValue.t * ValueHistory.t
 end

-module Edges : PrettyPrintable.PPMonoMap with type key = Access.t and type value = AddrTrace.t
+module Edges : RecencyMap.S with type key = Access.t and type value = AddrTrace.t

 include PrettyPrintable.PPMonoMap with type key = AbstractValue.t and type value = Edges.t

--- a/infer/src/pulse/PulseInterproc.ml
+++ b/infer/src/pulse/PulseInterproc.ml
@ -157,8 +157,8 @@ let rec materialize_pre_from_address callee_proc_name call_location ~pre ~addr_p
    | None ->
        Ok call_state
    | Some edges_pre ->
-        Container.fold_result ~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold)
-          ~init:call_state edges_pre ~f:(fun call_state (access, (addr_pre_dest, _)) ->
+        Container.fold_result ~fold:Memory.Edges.fold_bindings ~init:call_state edges_pre
+          ~f:(fun call_state (access, (addr_pre_dest, _)) ->
            let astate, addr_hist_dest_caller =
              Memory.eval_edge addr_hist_caller access call_state.astate
            in
@ -385,15 +385,13 @@ let delete_edges_in_callee_pre_from_caller ~addr_callee:_ ~edges_pre_opt ~addr_c
    | None ->
        old_post_edges
    | Some edges_pre ->
-        BaseMemory.Edges.merge
-          (fun _access old_opt pre_opt ->
+        BaseMemory.Edges.merge old_post_edges edges_pre ~f:(fun _access old_opt pre_opt ->
            (* TODO: should apply [call_state.subst] to [_access]! Actually, should rewrite the
               whole [cell_pre] beforehand so that [Edges.merge] makes sense. *)
            if Option.is_some pre_opt then
              (* delete edge if some edge for the same access exists in the pre *)
              None
-            else (* keep old edge if it exists *) old_opt )
-          old_post_edges edges_pre )
+            else (* keep old edge if it exists *) old_opt ) )


 let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt
@ -423,9 +421,8 @@ let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt
      delete_edges_in_callee_pre_from_caller ~addr_callee ~edges_pre_opt ~addr_caller call_state
    in
    let edges_post_caller =
-      BaseMemory.Edges.union
-        (fun _ _ post_cell -> Some post_cell)
-        post_edges_minus_pre translated_post_edges
+      BaseMemory.Edges.union post_edges_minus_pre translated_post_edges ~f:(fun _ _ post_cell ->
+          Some post_cell )
    in
    AbductiveDomain.set_post_edges addr_caller edges_post_caller call_state.astate
  in
@ -450,8 +447,8 @@ let rec record_post_for_address callee_proc_name call_loc ({AbductiveDomain.pre;
            record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt ~addr_hist_caller
              ~cell_callee_post call_state
        in
-        IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold ~init:call_state_after_post
-          edges_post ~f:(fun call_state (_access, (addr_callee_dest, _)) ->
+        Memory.Edges.fold ~init:call_state_after_post edges_post
+          ~f:(fun call_state _access (addr_callee_dest, _) ->
            let call_state, addr_hist_curr_dest =
              call_state_subst_find_or_new call_state addr_callee_dest
                ~default_hist_caller:(snd addr_hist_caller)
--- a/infer/src/pulse/PulseOperations.ml
+++ b/infer/src/pulse/PulseOperations.ml
@ -46,11 +46,9 @@ module Closures = struct


  let mk_capture_edges captured =
-    let fake_fields =
-      List.rev_mapi captured ~f:(fun id captured_addr_trace ->
-          (HilExp.Access.FieldAccess (mk_fake_field ~id), captured_addr_trace) )
-    in
-    Memory.Edges.of_seq (Caml.List.to_seq fake_fields)
+    List.foldi captured ~init:Memory.Edges.empty ~f:(fun id edges captured_addr_trace ->
+        Memory.Edges.add (HilExp.Access.FieldAccess (mk_fake_field ~id)) captured_addr_trace edges
+    )


  let check_captured_addresses action lambda_addr (astate : t) =
@ -61,8 +59,7 @@ module Closures = struct
        let+ () =
          IContainer.iter_result ~fold:Attributes.fold attributes ~f:(function
            | Attribute.Closure _ ->
-                IContainer.iter_result
-                  ~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold) edges
+                IContainer.iter_result ~fold:Memory.Edges.fold_bindings edges
                  ~f:(fun (access, addr_trace) ->
                    if is_captured_fake_access access then
                      let+ _ = check_addr_access action addr_trace astate in
@ -241,14 +238,12 @@ let invalidate_array_elements location cause addr_trace astate =
  | None ->
      astate
  | Some edges ->
-      Memory.Edges.fold
-        (fun access dest_addr_trace astate ->
+      Memory.Edges.fold edges ~init:astate ~f:(fun astate access dest_addr_trace ->
          match (access : Memory.Access.t) with
          | ArrayAccess _ ->
              AddressAttributes.invalidate dest_addr_trace cause location astate
          | _ ->
              astate )
-        edges astate


 let shallow_copy location addr_hist astate =
--- a/infer/tests/codetoanalyze/c/pulse/issues.exp
+++ b/infer/tests/codetoanalyze/c/pulse/issues.exp
@ -2,3 +2,4 @@ codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad, 0, PULSE_MEMO
 codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad2, 4, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here]
 codetoanalyze/c/pulse/memory_leak.c, malloc_no_free_bad, 0, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here]
 codetoanalyze/c/pulse/nullptr.c, malloc_no_check_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,allocated by call to `malloc` (modelled),is the null pointer,use-after-lifetime part of the trace starts here,allocated by call to `malloc` (modelled),assigned,invalid access occurs here]
+codetoanalyze/c/pulse/nullptr.c, nullptr_deref_young_bad, 5, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,assigned,invalid access occurs here]
--- a/infer/tests/codetoanalyze/c/pulse/nullptr.c
+++ b/infer/tests/codetoanalyze/c/pulse/nullptr.c
@ -8,7 +8,7 @@
 #include <stdlib.h>

 int* malloc_no_check_bad() {
-  int* p = malloc(sizeof(int));
+  int* p = (int*)malloc(sizeof(int));
  *p = 42;
  return p;
 }
@ -37,7 +37,7 @@ void create_null_path2_ok(int* p) {

 // combine several of the difficulties above
 void malloc_then_call_create_null_path_then_deref_unconditionally_ok(int* p) {
-  int* x = malloc(sizeof(int));
+  int* x = (int*)malloc(sizeof(int));
  if (p) {
    *p = 32;
  }
@ -45,3 +45,23 @@ void malloc_then_call_create_null_path_then_deref_unconditionally_ok(int* p) {
  *p = 52;
  free(x);
 }
+
+// pulse should remember the value of vec[64] because it was just written to
+void nullptr_deref_young_bad(int* x) {
+  int* vec[65] = {x, x, x, x, x, x, x, x, x, x, x, x, x, x,   x, x, x,
+                  x, x, x, x, x, x, x, x, x, x, x, x, x, x,   x, x, x,
+                  x, x, x, x, x, x, x, x, x, x, x, x, x, x,   x, x, x,
+                  x, x, x, x, x, x, x, x, x, x, x, x, x, NULL};
+  int p = *vec[64];
+}
+
+// due to the recency model of memory accesses, vec[0] can get forgotten
+// by the time we have processed the last element of the
+// initialization so we don't report here
+void nullptr_deref_old_bad_FP(int* x) {
+  int* vec[65] = {NULL, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+                  x,    x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+                  x,    x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
+                  x,    x, x, x, x, x, x, x, x, x, x, x, x, x};
+  int p = *vec[0];
+}