From f1dbf2548d4c171a29677da0610d49d6b5a82b1c Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Fri, 4 Jun 2021 13:10:12 -0700 Subject: [PATCH] [sledge] Add scheduler strategy for random path sampling Summary: Add a mode which executes randomly-chosen paths depth-first, without duplicating the work of executing shared prefixes. This mode is to be used when hoping to get lucky and quickly choose a path that exhibits a bug, but is slower to achieve coverage than the default. Differential Revision: D27996963 fbshipit-source-id: 20a7a32c5 --- sledge/cli/sledge_cli.ml | 14 +++- sledge/sledge-help.txt | 4 + sledge/src/control.ml | 168 +++++++++++++++++++++++++++++++++++++-- sledge/src/control.mli | 1 + 4 files changed, 178 insertions(+), 9 deletions(-) diff --git a/sledge/cli/sledge_cli.ml b/sledge/cli/sledge_cli.ml index 727e3ac71..9ac6d176f 100644 --- a/sledge/cli/sledge_cli.ml +++ b/sledge/cli/sledge_cli.ml @@ -118,6 +118,10 @@ let analyze = " select abstract domain; must be one of \"sh\" (default, \ symbolic heap domain), \"globals\" (used-globals domain), or \ \"unit\" (unit domain)" + and sample = flag "sample" no_arg ~doc:" randomly sample execution paths" + and seed = + flag "seed" (optional int) + ~doc:" specify random number generator seed" and no_simplify_states = flag "no-simplify-states" no_arg ~doc:"do not simplify states during symbolic execution" @@ -144,10 +148,14 @@ let analyze = | `itv -> (module Domain_itv) | `unit -> (module Domain_unit) in - let module Dom = (val dom) in - let module Analysis = - Control.Make (Config) (Dom) (Control.PriorityQueue) + let module Domain = (val dom) in + let queue : (module Control.Queue) = + if sample then (module Control.RandomQueue) + else (module Control.PriorityQueue) in + let module Queue = (val queue) in + let module Analysis = Control.Make (Config) (Domain) (Queue) in + (match seed with None -> Random.self_init () | Some n -> Random.init n) ; Domain_sh.simplify_states := not no_simplify_states ; Option.iter dump_query ~f:(fun n -> Solver.dump_query := n) ; at_exit (fun () -> Report.coverage pgm) ; diff --git a/sledge/sledge-help.txt b/sledge/sledge-help.txt index 9823090bb..2c5c30bc0 100644 --- a/sledge/sledge-help.txt +++ b/sledge/sledge-help.txt @@ -123,6 +123,8 @@ Analyze code in one or more LLVM bitcode files. This is a convenience wrapper fo [-preanalyze-globals] pre-analyze global variables used by each function [-report ] write report sexp to , or to standard error if "-" + [-sample] randomly sample execution paths + [-seed ] specify random number generator seed [-stats] output performance statistics to stderr [-trace ] enable debug tracing [-help] print this help text and exit @@ -200,6 +202,8 @@ The file must be binary LLAIR, such as produced by `sledge translate`. [-preanalyze-globals] pre-analyze global variables used by each function [-report ] write report sexp to , or to standard error if "-" + [-sample] randomly sample execution paths + [-seed ] specify random number generator seed [-stats] output performance statistics to stderr [-trace ] enable debug tracing [-help] print this help text and exit diff --git a/sledge/src/control.ml b/sledge/src/control.ml index 0963f8872..85dd41a92 100644 --- a/sledge/src/control.ml +++ b/sledge/src/control.ml @@ -5,20 +5,25 @@ * LICENSE file in the root directory of this source tree. *) -(** Iterative Breadth-First Bounded Exploration - - The analysis' semantics of control flow. *) +(** The analysis' semantics of control flow. *) open Domain_intf open Control_intf +(** An element of work to be scheduled. The scheduling strategies have very + few dependencies on elements, mainly some just need to test if two + elements can be joined. An example element instance is a pair of a + control-flow edge with a symbolic state that has reached the source of + the edge and has yet to be propagated to the destination. *) module type Elt = sig type t [@@deriving compare, equal, sexp_of] val pp : t pp val joinable : t -> t -> bool + val dnf : t -> t list end +(** Interface of analysis control scheduler "queues". *) module type QueueS = sig type elt @@ -35,13 +40,16 @@ module type QueueS = sig val pop : t -> (elt * elt list * t) option (** [pop q] is [None] if [q] is empty and otherwise is [Some (e, es, q')] - where [e] is the selected element in [q], [es] are other elements in - [q] with the same destination as [e], and [q'] is [q] without [e] and - [es]. *) + where [e] is the selected element in [q], any elements [es] have the + same destination as [e], and [q'] is [q] without [e] and [es]. *) end +(** Type of a queue implementation, which is parameterized over elements. *) module type Queue = functor (Elt : Elt) -> QueueS with type elt = Elt.t +(** A strategy that implements an iterative breadth-first exploration that + joins scheduled elements whenever possible, thereby DAGifying the + execution tree. *) module PriorityQueue (Elt : Elt) : QueueS with type elt = Elt.t = struct type elt = Elt.t @@ -77,6 +85,153 @@ module PriorityQueue (Elt : Elt) : QueueS with type elt = Elt.t = struct Some (top, elts, {queue; removed}) end +module RandomQueue (Elt : Elt) : QueueS with type elt = Elt.t = struct + type elt = Elt.t + + module M = Int.Map + + (** The analyzer, after calling [create], performs a sequence of [add] and + [pop] operations. Implicitly, each [add] is for an element that is a + successor of the element that was returned by the last [pop]. This + module assumes this implicit protocol in order to infer the structure + of the execution tree, and uses it to assign weights aiming to + implement fair random sampling of paths. + + Each edge of an execution tree conceptually has a "weight" [1/w] + (represented by just the denominator [w]) indicating that there is a + [1] in [w] chance of making the sequence of branching choices leading + to the edge. An execution tree starts with a single edge to the + initial control point with weight [1]. For an edge with weight [1/w] + that has [n] successors, the edge to each of the successors has weight + [1 / (w * n)]. + + The scheduling "frontier" is a set of edges with weights (represented + as a map from weights to lists of edges) that have been reached but + not followed. + + Edges are selected from the frontier randomly using the weights to + simulate fair sampling of paths as follows. Let [{eᵢ}] be a sequence + of the edges of the frontier in decreasing weight order. The weight of + edge [eᵢ] is written [wᵢ]. The sum of the weights of the frontier + is [s = Σᵢ wᵢ]. Now choose a random number [0 ≤ n ≤ s]. This + determines an edge [eᵣ] where [r] is least such that + [Σᵢ₌₀ʳ wᵢ ≥ n]. + + The inferred structure of the execution tree is also used to schedule + the analysis to proceed depth-first where a random successor is chosen + at each point until no further progress is possible, at which point a + new path is sampled. Successors are added by the analyzer prior to + knowing whether they are feasible. For example executing a conditional + branch results in two [add] operations where the next instruction on + each is to assume the condition and the negation of the condition. To + avoid depth-first execution from being thwarted by choosing infeasible + branches in such cases, a [recent] list is maintained that contains + the successors of the last popped element. When an element is popped + from the recent list, it is not known whether or not it is immediately + infeasible. If it is, the next operation will be another [pop], and + this is also taken from the [recent] list. If the element was not + immediately infeasible, the next operation is an [add] (of a + successor), at which point the recent list is flushed to the + "frontier". In this way, each [pop] that requests the next branch to + explore is chosen from the successors of the last control point, + effecting depth-first exploration. Only when the recent list is empty, + is an element chosen from the "frontier" of untaken branches. *) + + type t = + { recent: Elt.t RAL.t (** elements added since last pop; add *) + ; recent_weight: int (** combined weight of recent *) + ; frontier: Elt.t RAL.t M.t (** weight-keyed elements to be explored *) + ; frontier_weight: float (** combined weight of frontier *) + ; last: last_operation (** single step of execution history *) } + + and last_operation = + | Add_or_pop_frontier + (** last operation was either [add] or [pop] where [recent] was + empty *) + | Pop_recent of int + (** last operation was [pop] where [recent] was not empty, and the + returned element had given weight *) + + let pp ppf {recent; frontier} = + Format.fprintf ppf "@[%a @@@ %a@]" (List.pp " ::@ " Elt.pp) + (RAL.to_list recent) + (M.pp Int.pp (RAL.pp " ::@ " Elt.pp)) + frontier + + let create () = + { recent= RAL.empty + ; recent_weight= 1 + ; frontier= M.empty + ; frontier_weight= 0. + ; last= Add_or_pop_frontier } + + let add elt q = + let add_elt l = List.fold ~f:RAL.cons (Elt.dnf elt) l in + match q.last with + | Add_or_pop_frontier -> + (* elt is a sibling of the elements of recent, so extend recent *) + {q with recent= add_elt q.recent} + | Pop_recent elt_weight -> + (* elt is a successor of the last popped element (which is itself a + sibling of the elements of recent), so flush recent to frontier + and reset recent to the singleton elt with a combined weight + equal to that of the previously popped element *) + { recent= add_elt RAL.empty + ; recent_weight= elt_weight + ; frontier= + ( if RAL.is_empty q.recent then q.frontier + else + M.update elt_weight q.frontier ~f:(function + | Some data -> Some (RAL.append q.recent data) + | None -> Some q.recent ) ) + ; frontier_weight= + q.frontier_weight + +. Float.of_int (RAL.length q.recent) + /. Float.of_int elt_weight + ; last= Add_or_pop_frontier } + + let pop q = + let num_recent = RAL.length q.recent in + if num_recent > 0 then + let elt, recent = + RAL.get_and_remove_exn q.recent (Random.int num_recent) + in + match q.last with + | Pop_recent _ -> + (* elt is sibling to last popped element, with same elt_weight *) + Some (elt, [], {q with recent}) + | Add_or_pop_frontier -> + (* recent is now complete, and weight of each element can be + computed from combined weight and length *) + let elt_weight = q.recent_weight * num_recent in + Some (elt, [], {q with recent; last= Pop_recent elt_weight}) + else + let random_weight = Random.float q.frontier_weight in + M.fold_until q.frontier 0. + ~f:(fun ~key ~data prefix_weight -> + let len = RAL.length data in + let w = Float.of_int len /. Float.of_int key in + let prefix_weight = prefix_weight +. w in + if Float.(prefix_weight < random_weight) then + `Continue prefix_weight + else + let elt, data = RAL.get_and_remove_exn data (Random.int len) in + `Stop + (Some + ( elt + , [] + , { recent= RAL.empty + ; recent_weight= key + ; frontier= + ( if RAL.is_empty data then M.remove key q.frontier + else M.add ~key ~data q.frontier ) + ; frontier_weight= q.frontier_weight -. w + ; last= Add_or_pop_frontier } )) ) + ~finish:(fun _ -> + assert (M.is_empty q.frontier) ; + None ) +end + module Make (Config : Config) (D : Domain) (Queue : Queue) = struct module Stack : sig type t @@ -251,6 +406,7 @@ module Make (Config : Config) (D : Domain) (Queue : Queue) = struct Format.fprintf ppf "%i: %a" depth Edge.pp edge let joinable x y = Llair.Block.equal x.edge.dst y.edge.dst + let dnf x = List.map ~f:(fun state -> {x with state}) (D.dnf x.state) end module Queue = Queue (Elt) diff --git a/sledge/src/control.mli b/sledge/src/control.mli index 13bce7192..dd5720ab0 100644 --- a/sledge/src/control.mli +++ b/sledge/src/control.mli @@ -13,6 +13,7 @@ open Control_intf module type Queue module PriorityQueue : Queue +module RandomQueue : Queue module Make (_ : Config) (Domain : Domain) (_ : Queue) : sig val exec_pgm : Llair.program -> unit