[processpool] collect results from children

Summary:
Modify the scheduler to collect results from children at the end of the
parallel execution. Use this to collect backend stats and log their
aggregated sum.

Reviewed By: ezgicicek

Differential Revision: D16358867

fbshipit-source-id: 775792ef7
master
Jules Villard 6 years ago committed by Facebook Github Bot
parent deb9afe121
commit 4fde351565

@ -60,6 +60,15 @@ let copy from ~into =
into.summary_has_model_queries <- summary_has_model_queries into.summary_has_model_queries <- summary_has_model_queries
let merge stats1 stats2 =
{ summary_file_try_load= stats1.summary_file_try_load + stats2.summary_file_try_load
; summary_read_from_disk= stats1.summary_read_from_disk + stats2.summary_read_from_disk
; summary_cache_hits= stats1.summary_cache_hits + stats2.summary_cache_hits
; summary_cache_misses= stats1.summary_cache_misses + stats2.summary_cache_misses
; summary_has_model_queries= stats1.summary_has_model_queries + stats2.summary_has_model_queries
}
let initial = let initial =
{ summary_file_try_load= 0 { summary_file_try_load= 0
; summary_read_from_disk= 0 ; summary_read_from_disk= 0

@ -10,6 +10,8 @@ open! IStd
type t type t
val initial : t
val incr_summary_file_try_load : unit -> unit val incr_summary_file_try_load : unit -> unit
(** a query to the filesystem attempting to load a summary file *) (** a query to the filesystem attempting to load a summary file *)
@ -30,3 +32,6 @@ val get : unit -> t
(** get the stats so far *) (** get the stats so far *)
val pp : Format.formatter -> t -> unit val pp : Format.formatter -> t -> unit
val merge : t -> t -> t
(** return a new value that adds up the stats in both arguments *)

@ -123,6 +123,19 @@ let main ~changed_files =
L.environment_info "Parallel jobs: %d@." Config.jobs ; L.environment_info "Parallel jobs: %d@." Config.jobs ;
let tasks = TaskScheduler.schedule source_files_to_analyze in let tasks = TaskScheduler.schedule source_files_to_analyze in
(* Prepare tasks one cluster at a time while executing in parallel *) (* Prepare tasks one cluster at a time while executing in parallel *)
let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~tasks in let runner =
Tasks.Runner.run runner ) ; Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~child_epilogue:BackendStats.get
~tasks
in
let all_stats = Tasks.Runner.run runner in
let stats =
Array.fold all_stats ~init:BackendStats.initial ~f:(fun collated_stats stats_opt ->
match stats_opt with
| None ->
collated_stats
| Some stats ->
L.debug Analysis Quiet "gotstats:@\n%a@." BackendStats.pp stats ;
BackendStats.merge stats collated_stats )
in
L.debug Analysis Quiet "collected stats:@\n%a@." BackendStats.pp stats ) ;
output_json_makefile_stats source_files_to_analyze output_json_makefile_stats source_files_to_analyze

@ -22,21 +22,18 @@ let fork_protect ~f x =
(* get different streams of random numbers in each fork, in particular to lessen contention in (* get different streams of random numbers in each fork, in particular to lessen contention in
`Filename.mk_temp` *) `Filename.mk_temp` *)
Random.self_init () ; Random.self_init () ;
Epilogues.register
~f:(fun () -> L.debug Analysis Quiet "%a@." BackendStats.pp (BackendStats.get ()))
~description:"dumping summaries stats" ;
f x f x
module Runner = struct module Runner = struct
type 'a t = 'a ProcessPool.t type ('work, 'final) t = ('work, 'final) ProcessPool.t
let create ~jobs ~f ~tasks = let create ~jobs ~f ~child_epilogue ~tasks =
PerfEvent.( PerfEvent.(
log (fun logger -> log_begin_event logger ~categories:["sys"] ~name:"fork prepare" ())) ; log (fun logger -> log_begin_event logger ~categories:["sys"] ~name:"fork prepare" ())) ;
ResultsDatabase.db_close () ; ResultsDatabase.db_close () ;
let pool = let pool =
ProcessPool.create ~jobs ~f ~tasks ProcessPool.create ~jobs ~f ~child_epilogue ~tasks
~child_prelude: ~child_prelude:
((* hack: run post-fork bookkeeping stuff by passing a dummy function to [fork_protect] *) ((* hack: run post-fork bookkeeping stuff by passing a dummy function to [fork_protect] *)
fork_protect ~f:(fun () -> ())) fork_protect ~f:(fun () -> ()))

@ -21,11 +21,16 @@ val fork_protect : f:('a -> 'b) -> 'a -> 'b
(** A runner accepts new tasks repeatedly for parallel execution *) (** A runner accepts new tasks repeatedly for parallel execution *)
module Runner : sig module Runner : sig
type 'a t type ('work, 'final) t
val create : jobs:int -> f:'a doer -> tasks:'a task_generator -> 'a t val create :
jobs:int
-> f:'work doer
-> child_epilogue:(unit -> 'final)
-> tasks:'work task_generator
-> ('work, 'final) t
(** Create a runner running [jobs] jobs in parallel *) (** Create a runner running [jobs] jobs in parallel *)
val run : 'a t -> unit val run : (_, 'final) t -> 'final option Array.t
(** Start the given tasks with the runner and wait until completion *) (** Start the given tasks with the runner and wait until completion *)
end end

@ -9,6 +9,8 @@ open! IStd
module F = Format module F = Format
module L = Logging module L = Logging
let log_or_die fmt = if Config.keep_going then L.internal_error fmt else L.die InternalError fmt
type child_info = {pid: Pid.t; down_pipe: Out_channel.t} type child_info = {pid: Pid.t; down_pipe: Out_channel.t}
(** The master's abstraction of state for workers. (** The master's abstraction of state for workers.
@ -26,17 +28,17 @@ type 'a task_generator =
; next: unit -> 'a option } ; next: unit -> 'a option }
(** the state of the pool *) (** the state of the pool *)
type 'a t = type ('work, 'final) t =
{ jobs: int { jobs: int
(** number of jobs running in parallel, i.e. number of children we are responsible for *) (** number of jobs running in parallel, i.e. number of children we are responsible for *)
; slots: child_info Array.t ; slots: child_info Array.t
(** array of child processes with their pids and channels we can use to send work down to (** array of child processes with their pids and channels we can use to send work down to
each child *) each child *)
; children_states: 'a child_state Array.t (** array tracking the state of each worker *) ; children_states: 'work child_state Array.t (** array tracking the state of each worker *)
; children_updates: Unix.File_descr.t ; children_updates: Unix.File_descr.t
(** all the children send updates up the same pipe to the pool *) (** all the children send updates up the same pipe to the pool *)
; task_bar: TaskBar.t ; task_bar: TaskBar.t
; tasks: 'a task_generator (** generator for work remaining to be done *) } ; tasks: 'work task_generator (** generator for work remaining to be done *) }
(** {2 Constants} *) (** {2 Constants} *)
@ -232,6 +234,8 @@ let process_updates pool buffer =
| UpdateStatus (slot, t, status) -> | UpdateStatus (slot, t, status) ->
TaskBar.update_status pool.task_bar ~slot t status TaskBar.update_status pool.task_bar ~slot t status
| Crash slot -> | Crash slot ->
(* NOTE: the workers only send this message if {!Config.keep_going} is not [true] so if
we receive it we know we should fail hard *)
let {pid} = pool.slots.(slot) in let {pid} = pool.slots.(slot) in
(* clean crash, give the child process a chance to cleanup *) (* clean crash, give the child process a chance to cleanup *)
Unix.wait (`Pid pid) |> ignore ; Unix.wait (`Pid pid) |> ignore ;
@ -254,14 +258,39 @@ let process_updates pool buffer =
) )
type 'a final_worker_message = Finished of int * 'a option | FinalCrash of int
let collect_results (pool : (_, 'final) t) =
let failed = ref false in
let updates_in = Unix.in_channel_of_descr pool.children_updates in
(* use [Array.init] just to collect n messages, the order in the array will not be the same as the
slots of the workers but that's ok *)
Array.init pool.jobs ~f:(fun i ->
if !failed then None
else
match (Marshal.from_channel updates_in : 'final final_worker_message) with
| exception (End_of_file | Failure _) ->
failed := true ;
log_or_die "@[<v>error reading %dth final values from children@]%!" i ;
None
| FinalCrash slot ->
(* NOTE: the workers only send this message if {!Config.keep_going} is not [true] so if
we receive it we know we should fail hard *)
killall pool ~slot "see backtrace above"
| Finished (_slot, data) ->
data )
(** terminate all worker processes *) (** terminate all worker processes *)
let wait_all pool = let wait_all pool =
(* tell each alive worker to go home and wait(2) them, one by one; the order doesn't matter since (* tell each alive worker to go home *)
we want to wait for all of them eventually anyway. *) Array.iter pool.slots ~f:(fun {down_pipe} ->
marshal_to_pipe down_pipe GoHome ; Out_channel.close down_pipe ) ;
let results = collect_results pool in
(* wait(2) workers one by one; the order doesn't matter since we want to wait for all of them
eventually anyway. *)
let errors = let errors =
Array.foldi ~init:[] pool.slots ~f:(fun slot errors {pid; down_pipe} -> Array.foldi ~init:[] pool.slots ~f:(fun slot errors {pid} ->
marshal_to_pipe down_pipe GoHome ;
Out_channel.close down_pipe ;
match Unix.wait (`Pid pid) with match Unix.wait (`Pid pid) with
| _pid, Ok () -> | _pid, Ok () ->
errors errors
@ -269,21 +298,33 @@ let wait_all pool =
(* Collect all children errors and die only at the end to avoid creating zombies. *) (* Collect all children errors and die only at the end to avoid creating zombies. *)
(slot, status) :: errors ) (slot, status) :: errors )
in in
if not (List.is_empty errors) then ( if not (List.is_empty errors) then
let log_or_die = if Config.keep_going then L.internal_error else L.die InternalError in
let pp_error f (slot, status) = let pp_error f (slot, status) =
F.fprintf f "Error in infer subprocess %d: %s@." slot F.fprintf f "Error in infer subprocess %d: %s@." slot
(Unix.Exit_or_signal.to_string_hum status) (Unix.Exit_or_signal.to_string_hum status)
in in
log_or_die "@[<v>%a@]%!" (Pp.seq ~print_env:Pp.text_break ~sep:"" pp_error) errors log_or_die "@[<v>%a@]%!" (Pp.seq ~print_env:Pp.text_break ~sep:"" pp_error) errors ) ;
results
(** worker loop: wait for tasks and run [f] on them until we are told to go home *) (** worker loop: wait for tasks and run [f] on them until we are told to go home *)
let rec child_loop ~slot send_to_parent receive_from_parent ~f = let rec child_loop ~slot send_to_parent send_final receive_from_parent ~f ~epilogue =
send_to_parent (Ready slot) ; send_to_parent (Ready slot) ;
match receive_from_parent () with match receive_from_parent () with
| GoHome -> | GoHome -> (
() match epilogue () with
| data ->
send_final (Finished (slot, Some data))
| exception e ->
IExn.reraise_if e ~f:(fun () ->
if Config.keep_going then (
L.internal_error "Error running epilogue in subprocess %d: %a@." slot Exn.pp e ;
send_final (Finished (slot, None)) ;
false )
else (
(* crash hard, but first let the master know that we have crashed *)
send_final (FinalCrash slot) ;
true ) ) )
| Do stuff -> | Do stuff ->
( try f stuff ( try f stuff
with e -> with e ->
@ -296,14 +337,14 @@ let rec child_loop ~slot send_to_parent receive_from_parent ~f =
(* crash hard, but first let the master know that we have crashed *) (* crash hard, but first let the master know that we have crashed *)
send_to_parent (Crash slot) ; send_to_parent (Crash slot) ;
true ) ) ) ; true ) ) ) ;
child_loop ~slot send_to_parent receive_from_parent ~f child_loop ~slot send_to_parent send_final receive_from_parent ~f ~epilogue
(** Fork a new child and start it so that it is ready for work. (** Fork a new child and start it so that it is ready for work.
The child inherits [updates_w] to send updates up to the parent, and a new pipe is set up for The child inherits [updates_w] to send updates up to the parent, and a new pipe is set up for
the parent to send instructions down to the child. *) the parent to send instructions down to the child. *)
let fork_child ~child_prelude ~slot (updates_r, updates_w) ~f = let fork_child ~child_prelude ~slot (updates_r, updates_w) ~f ~epilogue =
let to_child_r, to_child_w = Unix.pipe () in let to_child_r, to_child_w = Unix.pipe () in
match Unix.fork () with match Unix.fork () with
| `In_the_child -> | `In_the_child ->
@ -316,6 +357,9 @@ let fork_child ~child_prelude ~slot (updates_r, updates_w) ~f =
child_prelude () ; child_prelude () ;
let updates_oc = Unix.out_channel_of_descr updates_w in let updates_oc = Unix.out_channel_of_descr updates_w in
let send_to_parent (message : worker_message) = marshal_to_pipe updates_oc message in let send_to_parent (message : worker_message) = marshal_to_pipe updates_oc message in
let send_final (final_message : 'a final_worker_message) =
marshal_to_pipe updates_oc final_message
in
(* Function to send updates up the pipe to the parent instead of directly to the task (* Function to send updates up the pipe to the parent instead of directly to the task
bar. This is because only the parent knows about all the children, hence it's in charge of bar. This is because only the parent knows about all the children, hence it's in charge of
actually updating the task bar. *) actually updating the task bar. *)
@ -337,7 +381,7 @@ let fork_child ~child_prelude ~slot (updates_r, updates_w) ~f =
PerfEvent.(log (fun logger -> log_end_event logger ())) ; PerfEvent.(log (fun logger -> log_end_event logger ())) ;
x x
in in
child_loop ~slot send_to_parent receive_from_parent ~f ; child_loop ~slot send_to_parent send_final receive_from_parent ~f ~epilogue ;
Out_channel.close updates_oc ; Out_channel.close updates_oc ;
In_channel.close orders_ic ; In_channel.close orders_ic ;
Epilogues.run () ; Epilogues.run () ;
@ -348,14 +392,22 @@ let fork_child ~child_prelude ~slot (updates_r, updates_w) ~f =
let create : let create :
jobs:int -> child_prelude:(unit -> unit) -> f:('a -> unit) -> tasks:'a task_generator -> 'a t = jobs:int
fun ~jobs ~child_prelude ~f ~tasks -> -> child_prelude:(unit -> unit)
-> f:('work -> unit)
-> child_epilogue:(unit -> 'final)
-> tasks:'work task_generator
-> ('work, 'final) t =
fun ~jobs ~child_prelude ~f ~child_epilogue ~tasks ->
let task_bar = TaskBar.create ~jobs in let task_bar = TaskBar.create ~jobs in
(* Pipe to communicate from children to parent. Only one pipe is needed: the messages sent by (* Pipe to communicate from children to parent. Only one pipe is needed: the messages sent by
children include the identifier of the child sending the message (its [slot]). This way there children include the identifier of the child sending the message (its [slot]). This way there
is only one pipe to wait on for updates. *) is only one pipe to wait on for updates. *)
let ((pipe_child_r, pipe_child_w) as status_pipe) = Unix.pipe () in let ((pipe_child_r, pipe_child_w) as status_pipe) = Unix.pipe () in
let slots = Array.init jobs ~f:(fun slot -> fork_child ~child_prelude ~slot status_pipe ~f) in let slots =
Array.init jobs ~f:(fun slot ->
fork_child ~child_prelude ~slot status_pipe ~f ~epilogue:child_epilogue )
in
(* we have forked the child processes and are now in the parent *) (* we have forked the child processes and are now in the parent *)
let[@warning "-26"] pipe_child_w = Unix.close pipe_child_w in let[@warning "-26"] pipe_child_w = Unix.close pipe_child_w in
let children_updates = pipe_child_r in let children_updates = pipe_child_r in
@ -373,11 +425,12 @@ let run pool =
while not (pool.tasks.is_empty () && all_children_idle pool) do while not (pool.tasks.is_empty () && all_children_idle pool) do
process_updates pool buffer ; TaskBar.refresh pool.task_bar process_updates pool buffer ; TaskBar.refresh pool.task_bar
done ; done ;
wait_all pool ; let results = wait_all pool in
TaskBar.finish pool.task_bar TaskBar.finish pool.task_bar ; results
let run pool = let run pool =
PerfEvent.(log (fun logger -> log_instant_event logger ~name:"start process pool" Global)) ; PerfEvent.(log (fun logger -> log_instant_event logger ~name:"start process pool" Global)) ;
run pool ; let results = run pool in
PerfEvent.(log (fun logger -> log_instant_event logger ~name:"end process pool" Global)) PerfEvent.(log (fun logger -> log_instant_event logger ~name:"end process pool" Global)) ;
results

@ -23,8 +23,9 @@ open! IStd
See also {!module-ProcessPoolState}. *) See also {!module-ProcessPoolState}. *)
(** A ['a t] process pool accepts tasks of type ['a]. ['a] will be marshalled over a Unix pipe.*) (** A [('work, 'final) t] process pool accepts tasks of type ['work] and produces an array of
type _ t results of type ['final]. ['work] and ['final] will be marshalled over a Unix pipe.*)
type (_, _) t
(** abstraction for generating jobs *) (** abstraction for generating jobs *)
type 'a task_generator = type 'a task_generator =
@ -43,8 +44,14 @@ type 'a task_generator =
} }
val create : val create :
jobs:int -> child_prelude:(unit -> unit) -> f:('a -> unit) -> tasks:'a task_generator -> 'a t jobs:int
-> child_prelude:(unit -> unit)
-> f:('work -> unit)
-> child_epilogue:(unit -> 'final)
-> tasks:'work task_generator
-> ('work, 'final) t
(** Create a new pool of processes running [jobs] jobs in parallel *) (** Create a new pool of processes running [jobs] jobs in parallel *)
val run : 'a t -> unit val run : (_, 'final) t -> 'final option Array.t
(** use the processes in the given process pool to run all the given tasks in parallel. *) (** use the processes in the given process pool to run all the given tasks in parallel and return
the results of the epilogues *)

@ -56,8 +56,12 @@ let run_compilation_database compilation_database should_capture_file =
L.progress "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ; L.progress "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ;
let compilation_commands = List.map ~f:create_cmd compilation_data in let compilation_commands = List.map ~f:create_cmd compilation_data in
let tasks = Tasks.gen_of_list compilation_commands in let tasks = Tasks.gen_of_list compilation_commands in
let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:invoke_cmd ~tasks in (* no stats to record so [child_epilogue] does nothing and we ignore the return
Tasks.Runner.run runner ; {!Tasks.Runner.run} *)
let runner =
Tasks.Runner.create ~jobs:Config.jobs ~f:invoke_cmd ~child_epilogue:(fun () -> ()) ~tasks
in
Tasks.Runner.run runner |> ignore ;
L.progress "@." ; L.progress "@." ;
L.(debug Analysis Medium) "Ran %d jobs" number_of_jobs L.(debug Analysis Medium) "Ran %d jobs" number_of_jobs

Loading…
Cancel
Save