From 68ae6ee6f35b4c8e8f108c5e61ee6163159e59a0 Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Mon, 17 May 2021 01:05:52 -0700 Subject: [PATCH] [merge] forked global tenv merge Summary: In Buck/Java the global type environments of each buck target captured need to be merged. So do the capture DBs. These two tasks can be done concurrently, as both have a computation and an I/O component, and interleaving them should improve perf. Indeed, profiling the merge process with `offcputime.py` and `cpudist.py` (BPF tools) showed a significant amount of off-cpu time in tests (>40%) as well as a distribution of timings for off-cpu intervals that agrees with IO on a fast medium (ssd). This diff forks a process to merge the type environments while doing the DB merge as normal. Initial results show an almost 2x improvement. Reviewed By: skcho Differential Revision: D28438808 fbshipit-source-id: 89c96f25b --- infer/src/backend/mergeCapture.ml | 57 ++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/infer/src/backend/mergeCapture.ml b/infer/src/backend/mergeCapture.ml index 4dc57335b..697c9d559 100644 --- a/infer/src/backend/mergeCapture.ml +++ b/infer/src/backend/mergeCapture.ml @@ -12,22 +12,47 @@ module YBU = Yojson.Basic.Util (** Module to merge the results of capture for different buck targets. *) -let merge_global_tenvs infer_deps_file = - let time0 = Mtime_clock.counter () in - let global_tenv = Tenv.create () in - let merge infer_out_src = - let global_tenv_path = - ResultsDirEntryName.get_path ~results_dir:infer_out_src JavaGlobalTypeEnvironment - |> DB.filename_from_string +module TenvMerger = struct + let merge_global_tenvs infer_deps_file = + let time0 = Mtime_clock.counter () in + let global_tenv = Tenv.create () in + let merge infer_out_src = + let global_tenv_path = + ResultsDirEntryName.get_path ~results_dir:infer_out_src JavaGlobalTypeEnvironment + |> DB.filename_from_string + in + Tenv.read global_tenv_path + |> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv) in - Tenv.read global_tenv_path |> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv) - in - Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ; - let time1 = Mtime_clock.counter () in - Tenv.store_global global_tenv ; - L.progress "Merging type environments took %a, of which %a were spent storing the global tenv@." - Mtime.Span.pp (Mtime_clock.count time0) Mtime.Span.pp (Mtime_clock.count time1) + Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ; + let time1 = Mtime_clock.counter () in + Tenv.store_global global_tenv ; + L.progress "Merging type environments took %a, of which %a were spent storing the global tenv@." + Mtime.Span.pp (Mtime_clock.count time0) Mtime.Span.pp (Mtime_clock.count time1) + + + let merge_global_tenvs infer_deps_file = + ScubaLogging.execute_with_time_logging "merge_captured_tenvs" (fun () -> + merge_global_tenvs infer_deps_file ) + + + let start infer_deps_file = + match Unix.fork () with + | `In_the_child -> + ForkUtils.protect ~f:merge_global_tenvs infer_deps_file ; + L.exit 0 + | `In_the_parent child_pid -> + child_pid + + let wait child_pid = + match Unix.waitpid child_pid with + | Error _ as err -> + L.die InternalError "Worker terminated abnormally: %s.@\n" + (Unix.Exit_or_signal.to_string_hum err) + | Ok () -> + () +end let merge_json_results infer_out_src json_entry = let main_changed_fs_file = ResultsDir.get_path json_entry in @@ -62,9 +87,9 @@ let merge_captured_targets () = let time0 = Mtime_clock.counter () in L.progress "Merging captured Buck targets...@\n%!" ; let infer_deps_file = ResultsDir.get_path CaptureDependencies in + let tenv_merger_child = TenvMerger.start infer_deps_file in DBWriter.merge ~infer_deps_file ; - ScubaLogging.execute_with_time_logging "merge_captured_tenvs" (fun () -> - merge_global_tenvs infer_deps_file ) ; + TenvMerger.wait tenv_merger_child ; let targets_num = let counter = ref 0 in let incr_counter _line = incr counter in