From b212c4e20d68d232f4f63881bffcb539d756a2aa Mon Sep 17 00:00:00 2001 From: Fernando Gasperi Jabalera Date: Fri, 7 Feb 2020 03:41:33 -0800 Subject: [PATCH] [schedulers] Lazy initialization to avoid fork copies Summary: The main job the schedulers do is building their work queues. That's being performed before the workers are forked which means they get copied into all of them. These changes push the initialization of the schedulers just after the forking takes place. Reviewed By: ngorogiannis Differential Revision: D19769741 fbshipit-source-id: 0b20ddd5c --- infer/src/backend/InferAnalyze.ml | 6 +++--- infer/src/backend/Tasks.ml | 8 ++++++-- infer/src/backend/Tasks.mli | 2 +- infer/src/base/ProcessPool.ml | 4 ++-- infer/src/base/ProcessPool.mli | 2 +- infer/src/integration/CaptureCompilationDatabase.ml | 2 +- infer/src/unit/RestartSchedulerTests.ml | 4 ++++ 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index f217d57ec..6912ef705 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -116,7 +116,7 @@ let get_source_files_to_analyze ~changed_files = source_files_to_analyze -let schedule sources = +let tasks_generator_builder_for sources = if Config.call_graph_schedule then ( CLOpt.warnf "WARNING: '--call-graph-schedule' is deprecated. Use '--scheduler' instead.@." ; SyntacticCallGraph.make sources ) @@ -137,12 +137,12 @@ let analyze source_files_to_analyze = BackendStats.get () ) else ( L.environment_info "Parallel jobs: %d@." Config.jobs ; - let tasks = schedule source_files_to_analyze in + let build_tasks_generator () = tasks_generator_builder_for source_files_to_analyze in (* Prepare tasks one cluster at a time while executing in parallel *) RestartScheduler.setup () ; let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~child_epilogue:BackendStats.get - ~tasks + ~tasks:build_tasks_generator in let workers_stats = Tasks.Runner.run runner in RestartScheduler.clean () ; diff --git a/infer/src/backend/Tasks.ml b/infer/src/backend/Tasks.ml index 9ed9e8d20..12ebbfdbb 100644 --- a/infer/src/backend/Tasks.ml +++ b/infer/src/backend/Tasks.ml @@ -11,6 +11,11 @@ type 'a doer = 'a -> unit let fork_protect ~f x = BackendStats.reset () ; ForkUtils.protect ~f x +let with_new_db_connection ~f () = + ResultsDatabase.new_database_connection () ; + f () + + module Runner = struct type ('work, 'final) t = ('work, 'final) ProcessPool.t @@ -19,12 +24,11 @@ module Runner = struct log (fun logger -> log_begin_event logger ~categories:["sys"] ~name:"fork prepare" ())) ; ResultsDatabase.db_close () ; let pool = - ProcessPool.create ~jobs ~f ~child_epilogue ~tasks + ProcessPool.create ~jobs ~f ~child_epilogue ~tasks:(with_new_db_connection ~f:tasks) ~child_prelude: ((* hack: run post-fork bookkeeping stuff by passing a dummy function to [fork_protect] *) fork_protect ~f:(fun () -> ())) in - ResultsDatabase.new_database_connection () ; PerfEvent.(log (fun logger -> log_end_event logger ())) ; pool diff --git a/infer/src/backend/Tasks.mli b/infer/src/backend/Tasks.mli index b95ecc9a3..21ab557e2 100644 --- a/infer/src/backend/Tasks.mli +++ b/infer/src/backend/Tasks.mli @@ -23,7 +23,7 @@ module Runner : sig jobs:int -> f:'work doer -> child_epilogue:(unit -> 'final) - -> tasks:'work ProcessPool.TaskGenerator.t + -> tasks:(unit -> 'work ProcessPool.TaskGenerator.t) -> ('work, 'final) t (** Create a runner running [jobs] jobs in parallel *) diff --git a/infer/src/base/ProcessPool.ml b/infer/src/base/ProcessPool.ml index 6a336396f..27c1db83b 100644 --- a/infer/src/base/ProcessPool.ml +++ b/infer/src/base/ProcessPool.ml @@ -430,7 +430,7 @@ let create : -> child_prelude:(unit -> unit) -> f:('work -> unit) -> child_epilogue:(unit -> 'final) - -> tasks:'work TaskGenerator.t + -> tasks:(unit -> 'work TaskGenerator.t) -> ('work, 'final) t = fun ~jobs ~child_prelude ~f ~child_epilogue ~tasks -> let file_lock = Utils.create_file_lock () in @@ -447,7 +447,7 @@ let create : let[@warning "-26"] pipe_child_w = Unix.close pipe_child_w in let children_updates = pipe_child_r in let children_states = Array.create ~len:jobs Initializing in - {slots; children_updates; jobs; task_bar; tasks; children_states; file_lock} + {slots; children_updates; jobs; task_bar; tasks= tasks (); children_states; file_lock} let run pool = diff --git a/infer/src/base/ProcessPool.mli b/infer/src/base/ProcessPool.mli index 8fe7b8a86..3ab0bf84a 100644 --- a/infer/src/base/ProcessPool.mli +++ b/infer/src/base/ProcessPool.mli @@ -58,7 +58,7 @@ val create : -> child_prelude:(unit -> unit) -> f:('work -> unit) -> child_epilogue:(unit -> 'final) - -> tasks:'work TaskGenerator.t + -> tasks:(unit -> 'work TaskGenerator.t) -> ('work, 'final) t (** Create a new pool of processes running [jobs] jobs in parallel *) diff --git a/infer/src/integration/CaptureCompilationDatabase.ml b/infer/src/integration/CaptureCompilationDatabase.ml index aafec22a5..24ba09521 100644 --- a/infer/src/integration/CaptureCompilationDatabase.ml +++ b/infer/src/integration/CaptureCompilationDatabase.ml @@ -54,7 +54,7 @@ let run_compilation_database compilation_database should_capture_file = "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ; L.progress "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ; let compilation_commands = List.map ~f:create_cmd compilation_data in - let tasks = ProcessPool.TaskGenerator.of_list compilation_commands in + let tasks () = ProcessPool.TaskGenerator.of_list compilation_commands in (* no stats to record so [child_epilogue] does nothing and we ignore the return {!Tasks.Runner.run} *) let runner = diff --git a/infer/src/unit/RestartSchedulerTests.ml b/infer/src/unit/RestartSchedulerTests.ml index 35814afe4..e6b8bd3ba 100644 --- a/infer/src/unit/RestartSchedulerTests.ml +++ b/infer/src/unit/RestartSchedulerTests.ml @@ -10,6 +10,10 @@ open OUnit2 let a_pname = Procname.from_string_c_fun "a_c_fun_name" +(* Tests are organized like this instead of using one function per test because + OUnit run tests in parallel and since all tests use the same output directory + (inter-out-unit) the file locks would collide because they all live in a + directory called procnames_locks inside the output dir. *) let tests_wrapper _test_ctxt = ProcLocker.( setup () ;