[scheduler] bottom-up callgraph scheduling

Reviewed By: mbouaziz

Differential Revision: D15079846

fbshipit-source-id: c30a26f62
master
Nikos Gorogiannis 6 years ago committed by Facebook Github Bot
parent 0ad15356c2
commit 4bdc6efd2e

@ -1155,6 +1155,10 @@ INTERNAL OPTIONS
--buck-targets-blacklist-reset --buck-targets-blacklist-reset
Set --buck-targets-blacklist to the empty list. Set --buck-targets-blacklist to the empty list.
--call-graph-schedule
Activates: use call graph for scheduling analysis (Conversely:
--no-call-graph-schedule)
--no-capture --no-capture
Deactivates: capture and translate source files into infer's Deactivates: capture and translate source files into infer's
intermediate language for analysis (Conversely: --capture) intermediate language for analysis (Conversely: --capture)

@ -0,0 +1,173 @@
(*
* Copyright (c) 2019-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module F = Format
module L = Logging
module type NodeSig = sig
type t = private {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool}
val make : int -> Typ.Procname.t -> int list -> t
val set_flag : t -> unit
val unset_flag : t -> unit
val pp_dot : F.formatter -> t -> unit
end
module Node : NodeSig = struct
type t = {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool}
let make id pname successors = {id; pname; successors; flag= false}
let set_flag n = n.flag <- true
let unset_flag n = n.flag <- false
let pp_dot fmt {id; pname; successors} =
let pp_id fmt id = F.fprintf fmt "N%d" id in
let pp_edge fmt src dst = F.fprintf fmt " %a -> %a ;@\n" pp_id src pp_id dst in
F.fprintf fmt " %a [ label = %S ];@\n" pp_id id (F.asprintf "%a" Typ.Procname.pp pname) ;
List.iter successors ~f:(pp_edge fmt id) ;
F.pp_print_newline fmt ()
end
module IdMap = Typ.Procname.Hash
module NodeMap = Caml.Hashtbl.Make (Int)
(** [node_map] is a map from ids (unique ints) to nodes corresponding to defined procedures.
[id_map] is a map from all encountered (not necessarily defined) procnames to their ids,
and thus its image is a superset of the domain of [node_map], and usually a strict superset.
[trim_id_map] makes the image equal to the domain of [node_map]. *)
type t = {id_map: int IdMap.t; node_map: Node.t NodeMap.t}
let create initial_capacity =
{id_map= IdMap.create initial_capacity; node_map= NodeMap.create initial_capacity}
let id_of_procname {id_map} pname = IdMap.find_opt id_map pname
let node_of_id {node_map} id = NodeMap.find_opt node_map id
let mem {node_map} id = NodeMap.mem node_map id
(** [id_map] may contain undefined procedures, so use [node_map] for actual size *)
let length {node_map} = NodeMap.length node_map
let node_of_procname g pname = id_of_procname g pname |> Option.bind ~f:(node_of_id g)
let remove (g : t) pname id = IdMap.remove g.id_map pname ; NodeMap.remove g.node_map id
let add ({id_map; node_map} as graph) pname successor_pnames =
let get_or_set_id procname =
match id_of_procname graph procname with
| None ->
let id = IdMap.length id_map in
IdMap.replace id_map procname id ; id
| Some id ->
id
in
let id = get_or_set_id pname in
let successors = List.map successor_pnames ~f:get_or_set_id in
let node = Node.make id pname successors in
NodeMap.replace node_map id node
let remove_reachable g start_pname =
let add_live_successors_and_remove_self init (n : Node.t) =
remove g n.pname n.id ;
List.fold n.successors ~init ~f:(fun init succ_id ->
node_of_id g succ_id |> Option.fold ~init ~f:(fun acc s -> s :: acc) )
in
let rec remove_list frontier =
if not (List.is_empty frontier) then
remove_list (List.fold frontier ~init:[] ~f:add_live_successors_and_remove_self)
in
node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> remove_list [start_node])
let flag_reachable g start_pname =
let process_node init (n : Node.t) =
if n.flag then init
else (
Node.set_flag n ;
List.fold n.successors ~init ~f:(fun acc id ->
match node_of_id g id with Some n' when not n'.flag -> n' :: acc | _ -> acc ) )
in
let rec flag_list frontier =
if not (List.is_empty frontier) then flag_list (List.fold frontier ~init:[] ~f:process_node)
in
node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> flag_list [start_node])
let remove_unflagged_and_unflag_all {id_map; node_map} =
NodeMap.filter_map_inplace
(fun _id (n : Node.t) ->
if n.flag then ( Node.unset_flag n ; Some n ) else ( IdMap.remove id_map n.pname ; None ) )
node_map
(** remove pnames for all undefined procedures *)
let trim_id_map (g : t) =
IdMap.filter_map_inplace (fun _pname id -> Option.some_if (mem g id) id) g.id_map
let pp_dot fmt {node_map} =
F.fprintf fmt "@\ndigraph callgraph {@\n" ;
NodeMap.iter (fun _id n -> Node.pp_dot fmt n) node_map ;
F.fprintf fmt "}@."
let to_dotty g =
let outc = Filename.concat Config.results_dir "callgraph.dot" |> Out_channel.create in
let fmt = F.formatter_of_out_channel outc in
pp_dot fmt g ; Out_channel.close outc
let build_from_captured_procs g =
let hashcons_pname =
let pname_tbl : Typ.Procname.t IdMap.t = IdMap.create 1001 in
fun pname ->
match IdMap.find_opt pname_tbl pname with
| Some pname' ->
pname'
| None ->
IdMap.add pname_tbl pname pname ; pname
in
let db = ResultsDatabase.get_database () in
let stmt = Sqlite3.prepare db "SELECT proc_name, callees FROM procedures" in
SqliteUtils.result_fold_rows db ~log:"creating call graph" stmt ~init:() ~f:(fun () stmt ->
let proc_name = Sqlite3.column stmt 0 |> Typ.Procname.SQLite.deserialize |> hashcons_pname in
let callees =
Sqlite3.column stmt 1 |> Typ.Procname.SQLiteList.deserialize |> List.map ~f:hashcons_pname
in
add g proc_name callees )
let build_from_sources g sources =
let time0 = Mtime_clock.counter () in
L.progress "Building call graph...@\n%!" ;
build_from_captured_procs g ;
let captured_length = length g in
List.iter sources ~f:(fun sf ->
SourceFiles.proc_names_of_source sf |> List.iter ~f:(flag_reachable g) ) ;
remove_unflagged_and_unflag_all g ;
trim_id_map g ;
if Config.debug_level_analysis > 0 then to_dotty g ;
L.progress "Building call graph took %a@\n" Mtime.Span.pp (Mtime_clock.count time0) ;
L.progress
"Constructed call graph from %d total procs, %d reachable defined procs, and takes %d bytes@."
captured_length (length g)
(Obj.(reachable_words (repr g)) * (Sys.word_size / 8))
let get_unflagged_leaves g =
NodeMap.fold
(fun _id (n : Node.t) acc ->
if n.flag || List.exists n.successors ~f:(mem g) then acc else n :: acc )
g.node_map []

@ -0,0 +1,41 @@
(*
* Copyright (c) 2019-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module F = Format
module type NodeSig = sig
type t = private {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool}
val make : int -> Typ.Procname.t -> int list -> t
val set_flag : t -> unit
val unset_flag : t -> unit
val pp_dot : F.formatter -> t -> unit
end
module Node : NodeSig
type t
val create : int -> t
(** [create n] makes an empty graph with initial capacity [n] which grows as required *)
val build_from_sources : t -> SourceFile.t list -> unit
(** build restriction of call graph to procedures reachable from provided sources *)
val mem : t -> int -> bool
(** is an int [id] the index of a node in the graph? *)
val flag_reachable : t -> Typ.Procname.t -> unit
(** flag all nodes reachable from the node of the given procname, if it exists *)
val remove_reachable : t -> Typ.Procname.t -> unit
(** remove all nodes reachable from procname *)
val get_unflagged_leaves : t -> Node.t list

@ -17,17 +17,27 @@ let clear_caches () =
Typ.Procname.SQLite.clear_cache () Typ.Procname.SQLite.clear_cache ()
(** Create tasks to analyze an execution environment *) let analyze_target : TaskScheduler.target Tasks.doer =
let analyze_source_file : SourceFile.t Tasks.doer = let analyze_source_file exe_env source_file =
fun source_file ->
if Config.memcached then Memcached.connect () ;
DB.Results_dir.init source_file ; DB.Results_dir.init source_file ;
let exe_env = Exe_env.mk () in
L.task_progress SourceFile.pp source_file ~f:(fun () -> L.task_progress SourceFile.pp source_file ~f:(fun () ->
Callbacks.analyze_file exe_env source_file ;
if Config.write_html then Printer.write_all_html_files source_file )
in
let analyze_proc_name exe_env proc_name =
L.task_progress Typ.Procname.pp proc_name ~f:(fun () ->
Callbacks.analyze_proc_name exe_env proc_name )
in
fun target ->
if Config.memcached then Memcached.connect () ;
let exe_env = Exe_env.mk () in
(* clear cache for each source file to avoid it growing unboundedly *) (* clear cache for each source file to avoid it growing unboundedly *)
clear_caches () ; clear_caches () ;
Callbacks.analyze_file exe_env source_file ; ( match target with
if Config.write_html then Printer.write_all_html_files source_file ) ; | Procname procname ->
analyze_proc_name exe_env procname
| File source_file ->
analyze_source_file exe_env source_file ) ;
if Config.memcached then Memcached.disconnect () if Config.memcached then Memcached.disconnect ()
@ -95,17 +105,14 @@ let main ~changed_files =
Config.results_dir ; Config.results_dir ;
(* empty all caches to minimize the process heap to have less work to do when forking *) (* empty all caches to minimize the process heap to have less work to do when forking *)
clear_caches () ; clear_caches () ;
( if Int.equal Config.jobs 1 then ( if Int.equal Config.jobs 1 then (
Tasks.run_sequentially ~f:analyze_source_file source_files_to_analyze ; let target_files = List.rev_map source_files_to_analyze ~f:(fun sf -> TaskScheduler.File sf) in
Tasks.run_sequentially ~f:analyze_target target_files ;
L.progress "@\nAnalysis finished in %as@." Pp.elapsed_time () ) L.progress "@\nAnalysis finished in %as@." Pp.elapsed_time () )
else else (
let source_files_to_analyze =
List.permute source_files_to_analyze
~random_state:(Random.State.make (Array.create ~len:1 0))
in
L.environment_info "Parallel jobs: %d@." Config.jobs ; L.environment_info "Parallel jobs: %d@." Config.jobs ;
let tasks = TaskScheduler.schedule source_files_to_analyze in
(* Prepare tasks one cluster at a time while executing in parallel *) (* Prepare tasks one cluster at a time while executing in parallel *)
let tasks = Tasks.gen_of_list source_files_to_analyze in let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~tasks in
let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_source_file ~tasks in
Tasks.Runner.run runner ) ; Tasks.Runner.run runner ) ;
output_json_makefile_stats source_files_to_analyze output_json_makefile_stats source_files_to_analyze

@ -0,0 +1,100 @@
(*
* Copyright (c) 2019-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module L = Logging
type target = Procname of Typ.Procname.t | File of SourceFile.t
type 'a task_generator = 'a Tasks.task_generator
let chain (gen1 : 'a task_generator) (gen2 : 'a task_generator) : 'a task_generator =
let n_tasks = gen1.n_tasks + gen2.n_tasks in
let gen1_returned_empty = ref false in
let gen1_is_empty () =
gen1_returned_empty := !gen1_returned_empty || gen1.is_empty () ;
!gen1_returned_empty
in
let is_empty () = gen1_is_empty () && gen2.is_empty () in
let next x = if gen1_is_empty () then gen2.next x else gen1.next x in
{n_tasks; is_empty; next}
let count_procedures () =
let db = ResultsDatabase.get_database () in
let stmt = Sqlite3.prepare db "SELECT COUNT(rowid) FROM procedures" in
let count =
match SqliteUtils.result_single_column_option db ~log:"counting procedures" stmt with
| Some (Sqlite3.Data.INT i64) ->
Int64.to_int i64 |> Option.value ~default:Int.max_value
| _ ->
L.die InternalError "Got no result trying to count procedures"
in
L.debug Analysis Quiet "Found %d procedures in procedures table.@." count ;
count
(** choose some reasonable minimum capacity that also is a prime number *)
let initial_call_graph_capacity = 1009
let bottom_up sources : target task_generator =
(* this will potentially grossly overapproximate the tasks *)
let n_tasks = count_procedures () in
let g = CallGraph.create initial_call_graph_capacity in
let initialized = ref false in
let pending : CallGraph.Node.t list ref = ref [] in
let scheduled = ref Typ.Procname.Set.empty in
let is_empty () =
!initialized && List.is_empty !pending && Typ.Procname.Set.is_empty !scheduled
in
let rec next_aux () =
match !pending with
| [] ->
pending := CallGraph.get_unflagged_leaves g ;
if List.is_empty !pending then None else next_aux ()
| n :: ns when n.flag || not (CallGraph.mem g n.id) ->
pending := ns ;
next_aux ()
| n :: ns ->
pending := ns ;
scheduled := Typ.Procname.Set.add n.pname !scheduled ;
CallGraph.flag_reachable g n.pname ;
Some (Procname n.pname)
in
let next target_opt =
(* do construction here, to avoid having the call graph into forked workers *)
if not !initialized then (
CallGraph.build_from_sources g sources ;
initialized := true ) ;
( match target_opt with
| None ->
()
| Some (File _) ->
assert false
| Some (Procname pname) ->
scheduled := Typ.Procname.Set.remove pname !scheduled ;
CallGraph.remove_reachable g pname ) ;
next_aux ()
in
{n_tasks; is_empty; next}
let of_sources sources =
let gen =
List.rev_map sources ~f:(fun sf -> File sf)
|> List.permute ~random_state:(Random.State.make (Array.create ~len:1 0))
|> Tasks.gen_of_list
in
let next x =
let res = gen.next x in
match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v
in
{gen with next}
let schedule sources =
if Config.call_graph_schedule then chain (bottom_up sources) (of_sources sources)
else of_sources sources

@ -0,0 +1,11 @@
(*
* Copyright (c) 2019-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
type target = Procname of Typ.Procname.t | File of SourceFile.t
val schedule : SourceFile.t list -> target Tasks.task_generator

@ -140,23 +140,26 @@ let create_perf_stats_report source_file =
PerfStats.get_reporter (PerfStats.Backend source_file) () PerfStats.get_reporter (PerfStats.Backend source_file) ()
(** Invoke all procedure and cluster callbacks on a given environment. *) let analyze_procedures exe_env procs_to_analyze source_file_opt =
let analyze_file (exe_env : Exe_env.t) source_file =
let saved_language = !Language.curr_language in let saved_language = !Language.curr_language in
Option.iter source_file_opt ~f:(fun source_file ->
if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ) ;
let analyze_ondemand summary proc_desc = iterate_procedure_callbacks exe_env summary proc_desc in let analyze_ondemand summary proc_desc = iterate_procedure_callbacks exe_env summary proc_desc in
(* Invoke procedure callbacks using on-demand analysis schedulling *)
Ondemand.set_callbacks {Ondemand.exe_env; analyze_ondemand} ; Ondemand.set_callbacks {Ondemand.exe_env; analyze_ondemand} ;
let procs_to_analyze =
(* analyze all the currently defined procedures *)
SourceFiles.proc_names_of_source source_file
in
if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ;
let analyze_proc_name pname = ignore (Ondemand.analyze_proc_name pname : Summary.t option) in let analyze_proc_name pname = ignore (Ondemand.analyze_proc_name pname : Summary.t option) in
List.iter ~f:analyze_proc_name procs_to_analyze ; List.iter ~f:analyze_proc_name procs_to_analyze ;
(* Invoke cluster callbacks. *) Option.iter source_file_opt ~f:(fun source_file ->
iterate_cluster_callbacks procs_to_analyze exe_env source_file ; iterate_cluster_callbacks procs_to_analyze exe_env source_file ;
(* Perf logging needs to remain at the end - after analysis work is complete *) create_perf_stats_report source_file ) ;
create_perf_stats_report source_file ;
(* Unregister callbacks *)
Ondemand.unset_callbacks () ; Ondemand.unset_callbacks () ;
Language.curr_language := saved_language Language.curr_language := saved_language
(** Invoke all procedure and cluster callbacks on a given environment. *)
let analyze_file (exe_env : Exe_env.t) source_file =
let procs_to_analyze = SourceFiles.proc_names_of_source source_file in
analyze_procedures exe_env procs_to_analyze (Some source_file)
(** Invoke procedure callbacks on a given environment. *)
let analyze_proc_name (exe_env : Exe_env.t) proc_name = analyze_procedures exe_env [proc_name] None

@ -38,3 +38,6 @@ val register_cluster_callback : name:string -> Language.t -> cluster_callback_t
val analyze_file : Exe_env.t -> SourceFile.t -> unit val analyze_file : Exe_env.t -> SourceFile.t -> unit
(** Invoke all the registered callbacks on the given file. *) (** Invoke all the registered callbacks on the given file. *)
val analyze_proc_name : Exe_env.t -> Typ.Procname.t -> unit
(** Invoke all the registered callbacks on the given procedure. *)

@ -866,6 +866,10 @@ and buck_targets_blacklist =
~meta:"regex" "Skip capture of buck targets matched by the specified regular expression." ~meta:"regex" "Skip capture of buck targets matched by the specified regular expression."
and call_graph_schedule =
CLOpt.mk_bool ~long:"call-graph-schedule" ~default:false "use call graph for scheduling analysis"
and capture = and capture =
CLOpt.mk_bool ~long:"capture" ~default:true CLOpt.mk_bool ~long:"capture" ~default:true
"capture and translate source files into infer's intermediate language for analysis" "capture and translate source files into infer's intermediate language for analysis"
@ -2611,6 +2615,8 @@ and buck_targets_blacklist = !buck_targets_blacklist
and bufferoverrun = !bufferoverrun and bufferoverrun = !bufferoverrun
and call_graph_schedule = !call_graph_schedule
and capture = and capture =
(* take `--clang-frontend-action` as the source of truth as long as that option exists *) (* take `--clang-frontend-action` as the source of truth as long as that option exists *)
match !clang_frontend_action with match !clang_frontend_action with

@ -261,6 +261,8 @@ val buck_targets_blacklist : string list
val bufferoverrun : bool val bufferoverrun : bool
val call_graph_schedule : bool
val capture : bool val capture : bool
val capture_blacklist : string option val capture_blacklist : string option

@ -1,3 +1,2 @@
build_systems/codetoanalyze/objc_missing_fld/A.m, badOnlyOneNDA, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDA(),start of procedure predA(),start of procedure implOnlyFn:,return from a call to A::implOnlyFn:,Executing synthesized getter delegate,Condition is true,return from a call to predA,Taking false branch] build_systems/codetoanalyze/objc_missing_fld/A.m, badOnlyOneNDA, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDA(),start of procedure predA(),start of procedure implOnlyFn:,return from a call to A::implOnlyFn:,Executing synthesized getter delegate,Condition is true,return from a call to predA,Taking false branch]
build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 3, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),Taking true branch] build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),start of procedure predB(),start of procedure implOnlyFn:,return from a call to A::implOnlyFn:,Executing synthesized getter delegate,Condition is true,return from a call to predB,Taking false branch]
build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),Taking false branch]

Loading…
Cancel
Save