[merge] forked global tenv merge

Summary:
In Buck/Java the global type environments of each buck target captured need to be merged. So do the capture DBs. These two tasks can be done concurrently, as both have a computation and an I/O component, and interleaving them should improve perf.

Indeed, profiling the merge process with `offcputime.py` and `cpudist.py` (BPF tools) showed a significant amount of off-cpu time in tests (>40%) as well as a distribution of timings for off-cpu intervals that agrees with IO on a fast medium (ssd).

This diff forks a process to merge the type environments while doing the DB merge as normal. Initial results show an almost 2x improvement.

Reviewed By: skcho

Differential Revision: D28438808

fbshipit-source-id: 89c96f25b
master
Nikos Gorogiannis 4 years ago committed by Facebook GitHub Bot
parent b38cc767fa
commit 68ae6ee6f3

@ -12,22 +12,47 @@ module YBU = Yojson.Basic.Util
(** Module to merge the results of capture for different buck targets. *)
let merge_global_tenvs infer_deps_file =
let time0 = Mtime_clock.counter () in
let global_tenv = Tenv.create () in
let merge infer_out_src =
let global_tenv_path =
ResultsDirEntryName.get_path ~results_dir:infer_out_src JavaGlobalTypeEnvironment
|> DB.filename_from_string
module TenvMerger = struct
let merge_global_tenvs infer_deps_file =
let time0 = Mtime_clock.counter () in
let global_tenv = Tenv.create () in
let merge infer_out_src =
let global_tenv_path =
ResultsDirEntryName.get_path ~results_dir:infer_out_src JavaGlobalTypeEnvironment
|> DB.filename_from_string
in
Tenv.read global_tenv_path
|> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv)
in
Tenv.read global_tenv_path |> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv)
in
Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ;
let time1 = Mtime_clock.counter () in
Tenv.store_global global_tenv ;
L.progress "Merging type environments took %a, of which %a were spent storing the global tenv@."
Mtime.Span.pp (Mtime_clock.count time0) Mtime.Span.pp (Mtime_clock.count time1)
Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ;
let time1 = Mtime_clock.counter () in
Tenv.store_global global_tenv ;
L.progress "Merging type environments took %a, of which %a were spent storing the global tenv@."
Mtime.Span.pp (Mtime_clock.count time0) Mtime.Span.pp (Mtime_clock.count time1)
let merge_global_tenvs infer_deps_file =
ScubaLogging.execute_with_time_logging "merge_captured_tenvs" (fun () ->
merge_global_tenvs infer_deps_file )
let start infer_deps_file =
match Unix.fork () with
| `In_the_child ->
ForkUtils.protect ~f:merge_global_tenvs infer_deps_file ;
L.exit 0
| `In_the_parent child_pid ->
child_pid
let wait child_pid =
match Unix.waitpid child_pid with
| Error _ as err ->
L.die InternalError "Worker terminated abnormally: %s.@\n"
(Unix.Exit_or_signal.to_string_hum err)
| Ok () ->
()
end
let merge_json_results infer_out_src json_entry =
let main_changed_fs_file = ResultsDir.get_path json_entry in
@ -62,9 +87,9 @@ let merge_captured_targets () =
let time0 = Mtime_clock.counter () in
L.progress "Merging captured Buck targets...@\n%!" ;
let infer_deps_file = ResultsDir.get_path CaptureDependencies in
let tenv_merger_child = TenvMerger.start infer_deps_file in
DBWriter.merge ~infer_deps_file ;
ScubaLogging.execute_with_time_logging "merge_captured_tenvs" (fun () ->
merge_global_tenvs infer_deps_file ) ;
TenvMerger.wait tenv_merger_child ;
let targets_num =
let counter = ref 0 in
let incr_counter _line = incr counter in

Loading…
Cancel
Save