From 3220804ddbfd48d0ea43198d3f1af224d1a6da23 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Mon, 20 Apr 2020 11:00:06 -0700 Subject: [PATCH] [pulse] add a cache of constants to equate them Summary: When encountering a constant, pulse creates an abstract value (a variable) to represent it, and remembers that it's equal to it. The problem is that pulse doesn't yet know how to deal with the fact that some variables are going to be equal to each other. This hacks around this issue in the case of constants, within the same procedure, by remembering which constants have been assigned to which place-holder variables, and serving those variables again when the same constant is translated again. Limitation: this doesn't work across procedure calls as the "constant maps" are not saved in summaries. Something to look out for: we don't want to make `if (p == NULL)` create a path where `p` is invalid (we only make null invalid when we see an assignment from 0, i.e. `p = NULL;`). Reviewed By: ezgicicek Differential Revision: D21089961 fbshipit-source-id: 5ebb85d0a --- infer/src/backend/ondemand.ml | 6 +-- infer/src/pulse/Pulse.ml | 2 +- infer/src/pulse/PulseAbstractValue.ml | 46 ++++++++++++++---- infer/src/pulse/PulseAbstractValue.mli | 31 ++++++++---- infer/src/pulse/PulseModels.ml | 22 ++++----- infer/src/pulse/PulseOperations.ml | 9 ++-- infer/tests/codetoanalyze/c/pulse/issues.exp | 1 + infer/tests/codetoanalyze/c/pulse/nullptr.c | 47 +++++++++++++++++++ .../tests/codetoanalyze/cpp/pulse/issues.exp | 2 - .../codetoanalyze/cpp/pulse/memory_leak.cpp | 15 +++--- .../tests/codetoanalyze/cpp/pulse/values.cpp | 2 +- 11 files changed, 133 insertions(+), 50 deletions(-) create mode 100644 infer/tests/codetoanalyze/c/pulse/nullptr.c diff --git a/infer/src/backend/ondemand.ml b/infer/src/backend/ondemand.ml index 6d9916a82..51d65d3de 100644 --- a/infer/src/backend/ondemand.ml +++ b/infer/src/backend/ondemand.ml @@ -100,7 +100,7 @@ type global_state = ; name_generator: Ident.NameGenerator.t ; proc_analysis_time: (Mtime.Span.t * string) option (** the time elapsed doing [status] so far *) - ; pulse_address_generator: PulseAbstractValue.state + ; pulse_address_generator: PulseAbstractValue.State.t ; symexec_state: State.t } let save_global_state () = @@ -115,7 +115,7 @@ let save_global_state () = ; proc_analysis_time= Option.map !current_taskbar_status ~f:(fun (t0, status) -> (Mtime.span t0 (Mtime_clock.now ()), status) ) - ; pulse_address_generator= PulseAbstractValue.get_state () + ; pulse_address_generator= PulseAbstractValue.State.get () ; symexec_state= State.save_state () } @@ -126,7 +126,7 @@ let restore_global_state st = BiabductionConfig.footprint := st.footprint_mode ; Printer.curr_html_formatter := st.html_formatter ; Ident.NameGenerator.set_current st.name_generator ; - PulseAbstractValue.set_state st.pulse_address_generator ; + PulseAbstractValue.State.set st.pulse_address_generator ; State.restore_state st.symexec_state ; current_taskbar_status := Option.map st.proc_analysis_time ~f:(fun (suspended_span, status) -> diff --git a/infer/src/pulse/Pulse.ml b/infer/src/pulse/Pulse.ml index da9f5c528..01ffc0a95 100644 --- a/infer/src/pulse/Pulse.ml +++ b/infer/src/pulse/Pulse.ml @@ -230,7 +230,7 @@ let sledge_test_fmt = let checker {Callbacks.exe_env; summary} = let tenv = Exe_env.get_tenv exe_env (Summary.get_proc_name summary) in - AbstractValue.init () ; + AbstractValue.State.reset () ; let pdesc = Summary.get_proc_desc summary in let initial = DisjunctiveTransferFunctions.Disjuncts.singleton (ExecutionDomain.mk_initial pdesc) diff --git a/infer/src/pulse/PulseAbstractValue.ml b/infer/src/pulse/PulseAbstractValue.ml index 0f4e474b5..2ec946c94 100644 --- a/infer/src/pulse/PulseAbstractValue.ml +++ b/infer/src/pulse/PulseAbstractValue.ml @@ -11,7 +11,9 @@ type t = int [@@deriving compare] let equal = [%compare.equal: t] -let next_fresh = ref 1 +let initial_next_fresh = 1 + +let next_fresh = ref initial_next_fresh let mk_fresh () = let l = !next_fresh in @@ -20,16 +22,8 @@ let mk_fresh () = let pp f l = F.fprintf f "v%d" l -let init () = next_fresh := 1 - let of_id v = v -type state = int - -let get_state () = !next_fresh - -let set_state counter = next_fresh := counter - module PPKey = struct type nonrec t = t @@ -40,3 +34,37 @@ end module Set = PrettyPrintable.MakePPSet (PPKey) module Map = PrettyPrintable.MakePPMap (PPKey) + +module Constants = struct + module M = Caml.Map.Make (IntLit) + + type nonrec t = t M.t + + let initial_cache = M.empty + + let cache = ref initial_cache + + let get_int i = + match M.find_opt i !cache with + | Some v -> + v + | None -> + let v = mk_fresh () in + cache := M.add i v !cache ; + v +end + +module State = struct + type t = int * Constants.t + + let get () = (!next_fresh, !Constants.cache) + + let set (counter, cache) = + next_fresh := counter ; + Constants.cache := cache + + + let reset () = + next_fresh := initial_next_fresh ; + Constants.cache := Constants.initial_cache +end diff --git a/infer/src/pulse/PulseAbstractValue.mli b/infer/src/pulse/PulseAbstractValue.mli index 998105a8e..fbf204ec7 100644 --- a/infer/src/pulse/PulseAbstractValue.mli +++ b/infer/src/pulse/PulseAbstractValue.mli @@ -8,7 +8,6 @@ open! IStd module F = Format (** An abstract value, eg an address in memory. *) - type t = private int [@@deriving compare] val equal : t -> t -> bool @@ -17,16 +16,32 @@ val mk_fresh : unit -> t val pp : F.formatter -> t -> unit -val init : unit -> unit - val of_id : int -> t -type state - -val get_state : unit -> state - -val set_state : state -> unit +module Constants : sig + val get_int : IntLit.t -> t + (** Get or create an abstract value associated with a constant {!IntLit.t}. The idea is that + clients will record in the abstract state that the returned [t] is equal to the given integer. + If the same integer is queried later on then this module will return the same abstract + variable. *) +end module Set : PrettyPrintable.PPSet with type elt = t module Map : PrettyPrintable.PPMap with type key = t + +(** internal state of the module + + Under the hood a "next fresh" reference counter is maintained to produce fresh [t]. The + [Constants] module also remembers a mapping from certain constants to their corresponding [t]. + Both of these should be per-procedure only so internal state bookkeeping has to be performed by + the interprocedural analysis. *) +module State : sig + type t + + val get : unit -> t + + val set : t -> unit + + val reset : unit -> unit +end diff --git a/infer/src/pulse/PulseModels.ml b/infer/src/pulse/PulseModels.ml index 6c4bc01f9..3e6ddeca1 100644 --- a/infer/src/pulse/PulseModels.ml +++ b/infer/src/pulse/PulseModels.ml @@ -41,11 +41,9 @@ module Misc = struct let return_int : Int64.t -> model = fun i64 ~caller_summary:_ ~callee_procname:_ location ~ret:(ret_id, _) astate -> - let ret_addr = AbstractValue.mk_fresh () in - let astate = - let i = IntLit.of_int64 i64 in - PulseArithmetic.and_eq_int (Immediate {location; history= []}) ret_addr i astate - in + let i = IntLit.of_int64 i64 in + let ret_addr = AbstractValue.Constants.get_int i in + let astate = PulseArithmetic.and_eq_int (Immediate {location; history= []}) ret_addr i astate in PulseOperations.write_id ret_id (ret_addr, []) astate |> PulseOperations.ok_continue @@ -88,24 +86,22 @@ module C = struct let malloc _ : model = fun ~caller_summary:_ ~callee_procname location ~ret:(ret_id, _) astate -> - let ret_addr = AbstractValue.mk_fresh () in let hist = [ValueHistory.Allocation {f= Model (Procname.to_string callee_procname); location}] in + let immediate_hist = Trace.Immediate {location; history= hist} in + let ret_addr = AbstractValue.mk_fresh () in let ret_value = (ret_addr, hist) in let astate = PulseOperations.write_id ret_id ret_value astate in - let immediate_hist = Trace.Immediate {location; history= hist} in let astate_alloc = - PulseOperations.allocate callee_procname location ret_value astate - |> PulseArithmetic.and_positive immediate_hist ret_addr - |> ExecutionDomain.continue + PulseArithmetic.and_positive immediate_hist ret_addr astate + |> PulseOperations.allocate callee_procname location ret_value in let+ astate_null = PulseArithmetic.and_eq_int immediate_hist ret_addr IntLit.zero astate - |> PulseOperations.invalidate location (Invalidation.ConstantDereference IntLit.zero) - ret_value + |> PulseOperations.invalidate location (ConstantDereference IntLit.zero) ret_value in - [astate_alloc; ExecutionDomain.ContinueProgram astate_null] + [ExecutionDomain.ContinueProgram astate_alloc; ExecutionDomain.ContinueProgram astate_null] let malloc_not_null _ : model = diff --git a/infer/src/pulse/PulseOperations.ml b/infer/src/pulse/PulseOperations.ml index 9ed11e3f7..e755907e6 100644 --- a/infer/src/pulse/PulseOperations.ml +++ b/infer/src/pulse/PulseOperations.ml @@ -133,15 +133,14 @@ let eval location exp0 astate = | Cast (_, exp') -> eval exp' astate | Const (Cint i) -> - (* TODO: make identical const the same address *) - let addr = AbstractValue.mk_fresh () in + let v = AbstractValue.Constants.get_int i in let astate = - PulseArithmetic.and_eq_int (Immediate {location; history= []}) addr i astate + PulseArithmetic.and_eq_int (Immediate {location; history= []}) v i astate |> AddressAttributes.invalidate - (addr, [ValueHistory.Assignment location]) + (v, [ValueHistory.Assignment location]) (ConstantDereference i) location in - Ok (astate, (addr, [])) + Ok (astate, (v, [])) | UnOp (unop, exp, _typ) -> let+ astate, (addr, hist) = eval exp astate in PulseArithmetic.eval_unop location unop addr hist astate diff --git a/infer/tests/codetoanalyze/c/pulse/issues.exp b/infer/tests/codetoanalyze/c/pulse/issues.exp index 7877269a5..0c7366b4a 100644 --- a/infer/tests/codetoanalyze/c/pulse/issues.exp +++ b/infer/tests/codetoanalyze/c/pulse/issues.exp @@ -1,3 +1,4 @@ codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad, 0, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,when calling `create_p` here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_interproc_no_free_bad2, 4, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/c/pulse/memory_leak.c, malloc_no_free_bad, 0, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] +codetoanalyze/c/pulse/nullptr.c, malloc_no_check_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,allocated by call to `malloc` (modelled),is the null pointer,use-after-lifetime part of the trace starts here,allocated by call to `malloc` (modelled),assigned,invalid access occurs here] diff --git a/infer/tests/codetoanalyze/c/pulse/nullptr.c b/infer/tests/codetoanalyze/c/pulse/nullptr.c new file mode 100644 index 000000000..2359906cb --- /dev/null +++ b/infer/tests/codetoanalyze/c/pulse/nullptr.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +int* malloc_no_check_bad() { + int* p = malloc(sizeof(int)); + *p = 42; + return p; +} + +void create_null_path_ok(int* p) { + if (p) { + *p = 32; + } +} + +void call_create_null_path_then_deref_unconditionally_ok(int* p) { + create_null_path_ok(p); + *p = 52; +} + +void create_null_path2_ok(int* p) { + int* q = NULL; + if (p) { + *p = 32; + } + // arguably bogus to check p above but not here, but the above could + // also be macro-generated code so both reporting and not reporting + // are sort of justifiable + *p = 52; +} + +// combine several of the difficulties above +void malloc_then_call_create_null_path_then_deref_unconditionally_ok(int* p) { + int* x = malloc(sizeof(int)); + if (p) { + *p = 32; + } + create_null_path_ok(p); + *p = 52; + free(x); +} diff --git a/infer/tests/codetoanalyze/cpp/pulse/issues.exp b/infer/tests/codetoanalyze/cpp/pulse/issues.exp index 64fecf6f1..5a8206eb9 100644 --- a/infer/tests/codetoanalyze/cpp/pulse/issues.exp +++ b/infer/tests/codetoanalyze/cpp/pulse/issues.exp @@ -25,7 +25,6 @@ codetoanalyze/cpp/pulse/interprocedural.cpp, delete_then_write_bad, 2, USE_AFTER codetoanalyze/cpp/pulse/interprocedural.cpp, feed_invalid_into_access_bad, 2, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,when calling `may_return_invalid_ptr_ok` here,assigned,was invalidated by `delete`,use-after-lifetime part of the trace starts here,passed as argument to `may_return_invalid_ptr_ok`,return from call to `may_return_invalid_ptr_ok`,assigned,when calling `call_store` here,parameter `y` of call_store,when calling `store` here,parameter `y` of store,invalid access occurs here] codetoanalyze/cpp/pulse/join.cpp, invalidate_node_alias_bad, 12, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `head` of invalidate_node_alias_bad,assigned,assigned,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `head` of invalidate_node_alias_bad,assigned,assigned,invalid access occurs here] codetoanalyze/cpp/pulse/join.cpp, invalidate_node_alias_bad, 12, USE_AFTER_DELETE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `head` of invalidate_node_alias_bad,assigned,assigned,assigned,was invalidated by `delete`,use-after-lifetime part of the trace starts here,parameter `head` of invalidate_node_alias_bad,assigned,assigned,assigned,invalid access occurs here] -codetoanalyze/cpp/pulse/memory_leak.cpp, constant_index_no_leak_FP, 3, PULSE_MEMORY_LEAK, no_bucket, ERROR, [allocation part of the trace starts here,allocated by call to `malloc` (modelled),allocation part of the trace ends here,memory becomes unreachable here] codetoanalyze/cpp/pulse/nullptr.cpp, deref_nullptr_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,assigned,invalid access occurs here] codetoanalyze/cpp/pulse/nullptr.cpp, no_check_return_bad, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,when calling `may_return_nullptr` here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,passed as argument to `may_return_nullptr`,return from call to `may_return_nullptr`,assigned,invalid access occurs here] codetoanalyze/cpp/pulse/nullptr.cpp, std_false_type_deref_bad, 3, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,assigned,invalid access occurs here] @@ -74,7 +73,6 @@ codetoanalyze/cpp/pulse/use_after_free.cpp, use_after_free_simple_bad, 2, USE_AF codetoanalyze/cpp/pulse/use_after_scope.cpp, access_out_of_scope_stack_ref_bad, 3, USE_AFTER_LIFETIME, no_bucket, ERROR, [invalidation part of the trace starts here,variable `p` declared here,when calling `invalidate_local_ok` here,variable `t` declared here,is the address of a stack variable `t` whose lifetime has ended,use-after-lifetime part of the trace starts here,variable `p` declared here,passed as argument to `invalidate_local_ok`,variable `t` declared here,assigned,return from call to `invalidate_local_ok`,invalid access occurs here] codetoanalyze/cpp/pulse/values.cpp, error_under_true_conditionals_bad, 5, USE_AFTER_FREE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `x` of error_under_true_conditionals_bad,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `x` of error_under_true_conditionals_bad,invalid access occurs here] codetoanalyze/cpp/pulse/values.cpp, free_if_deref_bad, 2, USE_AFTER_FREE, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `x` of free_if_deref_bad,when calling `free_if` here,parameter `x` of free_if,was invalidated by call to `free()`,use-after-lifetime part of the trace starts here,parameter `x` of free_if_deref_bad,invalid access occurs here] -codetoanalyze/cpp/pulse/values.cpp, function_empty_range_interproc_ok_FP, 2, NULLPTR_DEREFERENCE, no_bucket, ERROR, [invalidation part of the trace starts here,variable `x` declared here,when calling `StringRange::StringRange` here,assigned,is the null pointer,use-after-lifetime part of the trace starts here,variable `x` declared here,passed as argument to `StringRange::StringRange`,assigned,return from call to `StringRange::StringRange`,when calling `find_first_non_space` here,parameter `x` of find_first_non_space,passed as argument to `StringRange::data`,return from call to `StringRange::data`,assigned,invalid access occurs here] codetoanalyze/cpp/pulse/vector.cpp, FP_init_fill_then_push_back_loop_ok, 6, VECTOR_INVALIDATION, no_bucket, ERROR, [invalidation part of the trace starts here,variable `vec` declared here,was potentially invalidated by `std::vector::push_back()`,use-after-lifetime part of the trace starts here,variable `vec` declared here,passed as argument to `std::vector::at()` (modelled),return from call to `std::vector::at()` (modelled),assigned,invalid access occurs here] codetoanalyze/cpp/pulse/vector.cpp, assign_bad, 3, VECTOR_INVALIDATION, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `vec` of assign_bad,was potentially invalidated by `std::vector::assign()`,use-after-lifetime part of the trace starts here,parameter `vec` of assign_bad,passed as argument to `std::vector::at()` (modelled),return from call to `std::vector::at()` (modelled),assigned,invalid access occurs here] codetoanalyze/cpp/pulse/vector.cpp, clear_bad, 3, VECTOR_INVALIDATION, no_bucket, ERROR, [invalidation part of the trace starts here,parameter `vec` of clear_bad,was potentially invalidated by `std::vector::clear()`,use-after-lifetime part of the trace starts here,parameter `vec` of clear_bad,passed as argument to `std::vector::at()` (modelled),return from call to `std::vector::at()` (modelled),assigned,invalid access occurs here] diff --git a/infer/tests/codetoanalyze/cpp/pulse/memory_leak.cpp b/infer/tests/codetoanalyze/cpp/pulse/memory_leak.cpp index 578a47ef5..b494a7c24 100644 --- a/infer/tests/codetoanalyze/cpp/pulse/memory_leak.cpp +++ b/infer/tests/codetoanalyze/cpp/pulse/memory_leak.cpp @@ -4,24 +4,23 @@ * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ + #include -void malloc_free_no_leak(int count) { + +void malloc_free_ok(int count) { int* e = (int*)malloc(count); int* res[] = {e}; free(e); } -void constant_index_no_leak_FP(int count) { +void constant_index_ok(int count) { int* e = (int*)malloc(count); int* res[] = {e}; - free(res[0]); // pulse creates a new abstract value each time so we - // don't know that res[0] == e. Needs to be fixed. + free(res[0]); } -void symbolic_index_no_leak(int x, int** res) { +void symbolic_index_ok(int x, int** res) { int* e = (int*)malloc(10); res[x] = e; - free(res[x]); // it will know that res[x] == res[x], because the - // abstract value involved in both array accesses will - // be the same + free(res[x]); } diff --git a/infer/tests/codetoanalyze/cpp/pulse/values.cpp b/infer/tests/codetoanalyze/cpp/pulse/values.cpp index 2494917f0..3db75d310 100644 --- a/infer/tests/codetoanalyze/cpp/pulse/values.cpp +++ b/infer/tests/codetoanalyze/cpp/pulse/values.cpp @@ -111,7 +111,7 @@ void find_first_non_space(StringRange& x) { } } -void function_empty_range_interproc_ok_FP() { +void function_empty_range_interproc_ok() { StringRange x{}; find_first_non_space(x); }