[pulse] recency model for memory accesses

Summary:
Add a new data structure and use it for the map of memory accesses to
limit the number of destinations reachable from a given address. This
avoids remembering details of each index in large arrays, or even each
field in large structs.

Reviewed By: skcho

Differential Revision: D18246091

fbshipit-source-id: 5d3974d9c
master
Jules Villard 5 years ago committed by Facebook GitHub Bot
parent d01ada643d
commit 5c453393ff

@ -1608,6 +1608,10 @@ INTERNAL OPTIONS
--pulse-model-free-pattern-reset
Cancel the effect of --pulse-model-free-pattern.
--pulse-recency-limit int
Maximum number of array elements and structure fields to keep
track of for a given array address.
--pulse-widen-threshold int
Under-approximate after int loop iterations

@ -1754,6 +1754,12 @@ and project_root =
~meta:"dir" "Specify the root directory of the project"
and pulse_recency_limit =
CLOpt.mk_int ~long:"pulse-recency-limit" ~default:32
"Maximum number of array elements and structure fields to keep track of for a given array \
address."
and pulse_intraprocedural_only =
CLOpt.mk_bool ~long:"pulse-intraprocedural-only"
"Disable inter-procedural analysis in Pulse. Used for experimentations only."
@ -2849,6 +2855,8 @@ and progress_bar =
and project_root = !project_root
and pulse_recency_limit = !pulse_recency_limit
and pulse_intraprocedural_only = !pulse_intraprocedural_only
and pulse_max_disjuncts = !pulse_max_disjuncts

@ -443,6 +443,8 @@ val progress_bar : [`MultiLine | `Plain | `Quiet]
val project_root : string
val pulse_recency_limit : int
val pulse_intraprocedural_only : bool
val pulse_max_disjuncts : int

@ -106,9 +106,8 @@ let get_modified_params pname post_stack pre_heap post formals =
| Some (addr, _) when Typ.is_pointer typ -> (
match BaseMemory.find_opt addr pre_heap with
| Some edges_pre ->
BaseMemory.Edges.fold
(fun access (addr, _) acc -> add_to_modified ~var ~access ~addr pre_heap post acc)
edges_pre acc
BaseMemory.Edges.fold edges_pre ~init:acc ~f:(fun acc access (addr, _) ->
add_to_modified ~var ~access ~addr pre_heap post acc )
| None ->
debug "The address is not materialized in in pre-heap." ;
acc )

@ -205,6 +205,8 @@ module MakePPMonoMap (Ord : PrintableOrderedType) (Val : PrintableType) =
module type PrintableRankedType = sig
include PrintableType
val compare : t -> t -> int
val equal : t -> t -> bool
val to_rank : t -> int

@ -155,6 +155,8 @@ module MakePPMonoMap (Ord : PrintableOrderedType) (Val : PrintableType) :
module type PrintableRankedType = sig
include PrintableType
val compare : t -> t -> int
val equal : t -> t -> bool
val to_rank : t -> int

@ -0,0 +1,204 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! Core
module F = Format
module type Config = sig
val limit : int
end
module type S = sig
type t [@@deriving compare]
type key
type value
val equal : (value -> value -> bool) -> t -> t -> bool
val pp : F.formatter -> t -> unit
val empty : t
val add : key -> value -> t -> t
val find_opt : key -> t -> value option
val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc
val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc
val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t
val is_empty : t -> bool
val bindings : t -> (key * value) list
val union : f:(key -> value -> value -> value option) -> t -> t -> t
val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t
end
module Make
(Key : PrettyPrintable.PrintableOrderedType)
(Value : PrettyPrintable.PrintableOrderedType)
(Config : Config) : S with type key = Key.t and type value = Value.t = struct
type key = Key.t
type value = Value.t [@@deriving compare]
module M = Caml.Map.Make (Key)
(** [new_] and [old] together represent the map. Keys may be present in both [old] and [new_], in
which case bindings in [new_] take precendence.
The idea is to add to [new_] whenever an element is modified. When the size of [new_] passes
the limit, throw away [old] and move [new_] to [old] (an O(1) cleanup operation). This way the
[Config.limit] most recently modified elements will always be in the map, but we may
temporarily have up to [2*Config.limit] elements in memory.
Using [Caml.Map]s as our backing data-structure allows us to avoid having to scan the [new_]
collection before adding an element in order to avoid adding it multiple times (which would
force us to either go over the [2*Config.limit] max number of elements or risk us losing some
of the [N] most recently-used keys because of repeating keys). *)
type t =
{ count_new: int (** invariant: [count_new] is [M.length new_] *)
; new_: value M.t (** invariant: [M.length new_ ≤ Config.limit] *)
; old: value M.t
(** invariant: [M.length old ≤ Config.limit]. Actually, the length of [old] is always
either [0] or [N], except possibly after a call to [merge]. *) }
[@@deriving compare]
let empty = {count_new= 0; old= M.empty; new_= M.empty}
let edges map =
M.union
(fun _addr edge_new _edge_old -> (* bindings in [new_] take precedence *) Some edge_new)
map.new_ map.old
(** {2 External API} *)
let is_empty map = map.count_new = 0 && M.is_empty map.old
let find_opt key map =
match M.find_opt key map.new_ with
| Some _ as value_yes ->
value_yes
| None ->
(* Ideally we would want to put the binding in [new_] to reflect that it was recently
accessed but that would imply returning the new map, which is odd for a [find_opt]
function. Could be done in the future. *)
M.find_opt key map.old
let add key value map =
(* compute the final count first *)
let next_count_new = if M.mem key map.new_ then map.count_new else map.count_new + 1 in
(* shrink if we are about to go past the limit size of [new_] *)
if next_count_new > Config.limit then {count_new= 1; new_= M.singleton key value; old= map.new_}
else {count_new= next_count_new; new_= M.add key value map.new_; old= map.old}
let equal equal_value map1 map2 =
let edges1 = edges map1 in
let edges2 = edges map2 in
M.equal equal_value edges1 edges2
let fold map ~init ~f =
let acc = M.fold (fun key value acc -> f acc key value) map.new_ init in
M.fold (fun key value acc -> if M.mem key map.new_ then acc else f acc key value) map.old acc
let fold_bindings map ~init ~f = fold map ~init ~f:(fun acc k v -> f acc (k, v))
let pp fmt map =
let pp_item fmt (k, v) = F.fprintf fmt "%a -> %a" Key.pp k Value.pp v in
IContainer.pp_collection ~pp_item fmt map ~fold:fold_bindings
let map m ~f =
let new_ = M.map f m.new_ in
let old = M.mapi (fun key value -> if M.mem key m.new_ then value else f value) m.old in
{m with new_; old}
let fold_map m ~init ~f =
let acc_ref = ref init in
let m' =
map m ~f:(fun value ->
let acc, value' = f !acc_ref value in
acc_ref := acc ;
value' )
in
(!acc_ref, m')
let bindings map = M.bindings (edges map)
(** [merge ~f map1 map2] has at least all the elements [M.merge f map1.new_ map2.new_] but if
there are more than [N] then some will be arbitrarily demoted to [old]. Elements of
[M.merge f map1.old map2.old] may not make it into the final map if [old] then overflows too.
In other words: merge new first, overflow arbitrarily into old, then overflow old arbitrarily
into nothingness. *)
let merge ~f map1 map2 =
let count_new = ref 0 in
let count_old = ref 0 in
let old = ref M.empty in
let new_ =
M.merge
(fun addr val1_opt val2_opt ->
match f addr val1_opt val2_opt with
| None ->
None
| Some value as value_opt ->
incr count_new ;
if !count_new > Config.limit then (
incr count_old ;
old := M.add addr value !old ;
None )
else value_opt )
map1.new_ map2.new_
in
(* save [old] as it was after having added just the [new_] components *)
let old_from_new_ = !old in
let old =
M.merge
(fun addr val1_opt val2_opt ->
(* Check if we have already seen [addr] when merging the [new_] maps. As bindings in
[new_] override those in [old] we don't want to add it again in case it was spilled
into [old_from_new_], but it's fine to add it again if it made it to [new_].
Also if [!old] is already full then we cannot add any more elements so return [None].
*)
if !count_old >= Config.limit || M.mem addr old_from_new_ then None
else
match f addr val1_opt val2_opt with
| None ->
None
| Some _ as value_opt ->
incr count_old ; value_opt )
map1.old map2.old
in
{count_new= !count_new; old; new_}
(** standard-looking implementation of [union] based on [merge] *)
let union ~f map1 map2 =
merge
~f:(fun addr val1_opt val2_opt ->
match (val1_opt, val2_opt) with
| None, None ->
None
| (Some _ as val_), None | None, (Some _ as val_) ->
val_
| Some val1, Some val2 ->
f addr val1 val2 )
map1 map2
end

@ -0,0 +1,60 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! Core
module F = Format
module type Config = sig
val limit : int
(** the maximum number [N] of bindings to keep around *)
end
(** A functional map interface where only the [N] most recently-accessed elements are guaranteed to
be persisted, similarly to an LRU cache. The map stores at most [2*N] elements.
Operations on the map have the same asymptotic complexity as {!Map.Make}. *)
module type S = sig
type t [@@deriving compare]
type key
type value
val equal : (value -> value -> bool) -> t -> t -> bool
val pp : F.formatter -> t -> unit
val empty : t
val add : key -> value -> t -> t
val find_opt : key -> t -> value option
val fold : t -> init:'acc -> f:('acc -> key -> value -> 'acc) -> 'acc
val fold_bindings : t -> init:'acc -> f:('acc -> key * value -> 'acc) -> 'acc
(** convenience function based on [fold] *)
val fold_map : t -> init:'acc -> f:('acc -> value -> 'acc * value) -> 'acc * t
val is_empty : t -> bool
val bindings : t -> (key * value) list
val union : f:(key -> value -> value -> value option) -> t -> t -> t
(** same caveat as {!merge} *)
val merge : f:(key -> value option -> value option -> value option) -> t -> t -> t
(** if the maps passed as arguments disagree over which keys are the most recent and there are
more than [2*N] different keys between the two maps then some bindings will be arbitrarily
lost *)
end
module Make
(Key : PrettyPrintable.PrintableOrderedType)
(Value : PrettyPrintable.PrintableOrderedType)
(Config : Config) : S with type key = Key.t and type value = Value.t

@ -231,9 +231,8 @@ end = struct
and visit_edges orig_var ~f rev_accesses ~edges astate visited_accum =
let finish visited_accum = Continue visited_accum in
Container.fold_until edges
~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold)
~finish ~init:visited_accum ~f:(fun visited_accum (access, (address, _trace)) ->
Container.fold_until edges ~fold:Memory.Edges.fold_bindings ~finish ~init:visited_accum
~f:(fun visited_accum (access, (address, _trace)) ->
match visit_address orig_var ~f (access :: rev_accesses) astate address visited_accum with
| Continue _ as cont ->
cont

@ -4,6 +4,7 @@
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
open PulseBasicInterface
@ -26,7 +27,12 @@ module AddrTrace = struct
else AbstractValue.pp fmt (fst addr_trace)
end
module Edges = PrettyPrintable.MakePPMonoMap (Access) (AddrTrace)
module Edges =
RecencyMap.Make (Access) (AddrTrace)
(struct
let limit = Config.pulse_recency_limit
end)
module Graph = PrettyPrintable.MakePPMonoMap (AbstractValue) (Edges)
let register_address addr memory =

@ -4,6 +4,7 @@
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
open PulseBasicInterface
@ -17,7 +18,7 @@ module AddrTrace : sig
type t = AbstractValue.t * ValueHistory.t
end
module Edges : PrettyPrintable.PPMonoMap with type key = Access.t and type value = AddrTrace.t
module Edges : RecencyMap.S with type key = Access.t and type value = AddrTrace.t
include PrettyPrintable.PPMonoMap with type key = AbstractValue.t and type value = Edges.t

@ -157,8 +157,8 @@ let rec materialize_pre_from_address callee_proc_name call_location ~pre ~addr_p
| None ->
Ok call_state
| Some edges_pre ->
Container.fold_result ~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold)
~init:call_state edges_pre ~f:(fun call_state (access, (addr_pre_dest, _)) ->
Container.fold_result ~fold:Memory.Edges.fold_bindings ~init:call_state edges_pre
~f:(fun call_state (access, (addr_pre_dest, _)) ->
let astate, addr_hist_dest_caller =
Memory.eval_edge addr_hist_caller access call_state.astate
in
@ -385,15 +385,13 @@ let delete_edges_in_callee_pre_from_caller ~addr_callee:_ ~edges_pre_opt ~addr_c
| None ->
old_post_edges
| Some edges_pre ->
BaseMemory.Edges.merge
(fun _access old_opt pre_opt ->
BaseMemory.Edges.merge old_post_edges edges_pre ~f:(fun _access old_opt pre_opt ->
(* TODO: should apply [call_state.subst] to [_access]! Actually, should rewrite the
whole [cell_pre] beforehand so that [Edges.merge] makes sense. *)
if Option.is_some pre_opt then
(* delete edge if some edge for the same access exists in the pre *)
None
else (* keep old edge if it exists *) old_opt )
old_post_edges edges_pre )
else (* keep old edge if it exists *) old_opt ) )
let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt
@ -423,9 +421,8 @@ let record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt
delete_edges_in_callee_pre_from_caller ~addr_callee ~edges_pre_opt ~addr_caller call_state
in
let edges_post_caller =
BaseMemory.Edges.union
(fun _ _ post_cell -> Some post_cell)
post_edges_minus_pre translated_post_edges
BaseMemory.Edges.union post_edges_minus_pre translated_post_edges ~f:(fun _ _ post_cell ->
Some post_cell )
in
AbductiveDomain.set_post_edges addr_caller edges_post_caller call_state.astate
in
@ -450,8 +447,8 @@ let rec record_post_for_address callee_proc_name call_loc ({AbductiveDomain.pre;
record_post_cell callee_proc_name call_loc ~addr_callee ~edges_pre_opt ~addr_hist_caller
~cell_callee_post call_state
in
IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold ~init:call_state_after_post
edges_post ~f:(fun call_state (_access, (addr_callee_dest, _)) ->
Memory.Edges.fold ~init:call_state_after_post edges_post
~f:(fun call_state _access (addr_callee_dest, _) ->
let call_state, addr_hist_curr_dest =
call_state_subst_find_or_new call_state addr_callee_dest
~default_hist_caller:(snd addr_hist_caller)

@ -46,11 +46,9 @@ module Closures = struct
let mk_capture_edges captured =
let fake_fields =
List.rev_mapi captured ~f:(fun id captured_addr_trace ->
(HilExp.Access.FieldAccess (mk_fake_field ~id), captured_addr_trace) )
in
Memory.Edges.of_seq (Caml.List.to_seq fake_fields)
List.foldi captured ~init:Memory.Edges.empty ~f:(fun id edges captured_addr_trace ->
Memory.Edges.add (HilExp.Access.FieldAccess (mk_fake_field ~id)) captured_addr_trace edges
)
let check_captured_addresses action lambda_addr (astate : t) =
@ -61,8 +59,7 @@ module Closures = struct
let+ () =
IContainer.iter_result ~fold:Attributes.fold attributes ~f:(function
| Attribute.Closure _ ->
IContainer.iter_result
~fold:(IContainer.fold_of_pervasives_map_fold ~fold:Memory.Edges.fold) edges
IContainer.iter_result ~fold:Memory.Edges.fold_bindings edges
~f:(fun (access, addr_trace) ->
if is_captured_fake_access access then
let+ _ = check_addr_access action addr_trace astate in
@ -241,14 +238,12 @@ let invalidate_array_elements location cause addr_trace astate =
| None ->
astate
| Some edges ->
Memory.Edges.fold
(fun access dest_addr_trace astate ->
Memory.Edges.fold edges ~init:astate ~f:(fun astate access dest_addr_trace ->
match (access : Memory.Access.t) with
| ArrayAccess _ ->
AddressAttributes.invalidate dest_addr_trace cause location astate
| _ ->
astate )
edges astate
let shallow_copy location addr_hist astate =

@ -2,3 +2,4 @@ codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad, 0, PULSE_MEMO
codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad2, 4, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here]
codetoanalyze/c/pulse/memory_leak.c, malloc_no_free_bad, 0, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here]
codetoanalyze/c/pulse/nullptr.c, malloc_no_check_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,allocated by call to `malloc` (modelled),is the null pointer,use-after-lifetime part of the trace starts here,allocated by call to `malloc` (modelled),assigned,invalid access occurs here]
codetoanalyze/c/pulse/nullptr.c, nullptr_deref_young_bad, 5, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,assigned,invalid access occurs here]

@ -8,7 +8,7 @@
#include <stdlib.h>
int* malloc_no_check_bad() {
int* p = malloc(sizeof(int));
int* p = (int*)malloc(sizeof(int));
*p = 42;
return p;
}
@ -37,7 +37,7 @@ void create_null_path2_ok(int* p) {
// combine several of the difficulties above
void malloc_then_call_create_null_path_then_deref_unconditionally_ok(int* p) {
int* x = malloc(sizeof(int));
int* x = (int*)malloc(sizeof(int));
if (p) {
*p = 32;
}
@ -45,3 +45,23 @@ void malloc_then_call_create_null_path_then_deref_unconditionally_ok(int* p) {
*p = 52;
free(x);
}
// pulse should remember the value of vec[64] because it was just written to
void nullptr_deref_young_bad(int* x) {
int* vec[65] = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, NULL};
int p = *vec[64];
}
// due to the recency model of memory accesses, vec[0] can get forgotten
// by the time we have processed the last element of the
// initialization so we don't report here
void nullptr_deref_old_bad_FP(int* x) {
int* vec[65] = {NULL, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x};
int p = *vec[0];
}

Loading…
Cancel
Save