[schedulers] Lazy initialization to avoid fork copies

Summary: The main job the schedulers do is building their work queues. That's being performed before the workers are forked which means they get copied into all of them. These changes push the initialization of the schedulers just after the forking takes place. Reviewed By: ngorogiannis Differential Revision: D19769741 fbshipit-source-id: 0b20ddd5c
6 years ago · b212c4e20d
parent bda56adb74
commit b212c4e20d
7 changed files with 18 additions and 10 deletions
--- a/infer/src/backend/InferAnalyze.ml
+++ b/infer/src/backend/InferAnalyze.ml
@ -116,7 +116,7 @@ let get_source_files_to_analyze ~changed_files =
  source_files_to_analyze


-let schedule sources =
+let tasks_generator_builder_for sources =
  if Config.call_graph_schedule then (
    CLOpt.warnf "WARNING: '--call-graph-schedule' is deprecated. Use '--scheduler' instead.@." ;
    SyntacticCallGraph.make sources )
@ -137,12 +137,12 @@ let analyze source_files_to_analyze =
    BackendStats.get () )
  else (
    L.environment_info "Parallel jobs: %d@." Config.jobs ;
-    let tasks = schedule source_files_to_analyze in
+    let build_tasks_generator () = tasks_generator_builder_for source_files_to_analyze in
    (* Prepare tasks one cluster at a time while executing in parallel *)
    RestartScheduler.setup () ;
    let runner =
      Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~child_epilogue:BackendStats.get
-        ~tasks
+        ~tasks:build_tasks_generator
    in
    let workers_stats = Tasks.Runner.run runner in
    RestartScheduler.clean () ;
--- a/infer/src/backend/Tasks.ml
+++ b/infer/src/backend/Tasks.ml
@ -11,6 +11,11 @@ type 'a doer = 'a -> unit

 let fork_protect ~f x = BackendStats.reset () ; ForkUtils.protect ~f x

+let with_new_db_connection ~f () =
+  ResultsDatabase.new_database_connection () ;
+  f ()
+
+
 module Runner = struct
  type ('work, 'final) t = ('work, 'final) ProcessPool.t

@ -19,12 +24,11 @@ module Runner = struct
      log (fun logger -> log_begin_event logger ~categories:["sys"] ~name:"fork prepare" ())) ;
    ResultsDatabase.db_close () ;
    let pool =
-      ProcessPool.create ~jobs ~f ~child_epilogue ~tasks
+      ProcessPool.create ~jobs ~f ~child_epilogue ~tasks:(with_new_db_connection ~f:tasks)
        ~child_prelude:
          ((* hack: run post-fork bookkeeping stuff by passing a dummy function to [fork_protect] *)
           fork_protect ~f:(fun () -> ()))
    in
-    ResultsDatabase.new_database_connection () ;
    PerfEvent.(log (fun logger -> log_end_event logger ())) ;
    pool

--- a/infer/src/backend/Tasks.mli
+++ b/infer/src/backend/Tasks.mli
@ -23,7 +23,7 @@ module Runner : sig
       jobs:int
    -> f:'work doer
    -> child_epilogue:(unit -> 'final)
-    -> tasks:'work ProcessPool.TaskGenerator.t
+    -> tasks:(unit -> 'work ProcessPool.TaskGenerator.t)
    -> ('work, 'final) t
  (** Create a runner running [jobs] jobs in parallel *)

--- a/infer/src/base/ProcessPool.ml
+++ b/infer/src/base/ProcessPool.ml
@ -430,7 +430,7 @@ let create :
    -> child_prelude:(unit -> unit)
    -> f:('work -> unit)
    -> child_epilogue:(unit -> 'final)
-    -> tasks:'work TaskGenerator.t
+    -> tasks:(unit -> 'work TaskGenerator.t)
    -> ('work, 'final) t =
 fun ~jobs ~child_prelude ~f ~child_epilogue ~tasks ->
  let file_lock = Utils.create_file_lock () in
@ -447,7 +447,7 @@ let create :
  let[@warning "-26"] pipe_child_w = Unix.close pipe_child_w in
  let children_updates = pipe_child_r in
  let children_states = Array.create ~len:jobs Initializing in
-  {slots; children_updates; jobs; task_bar; tasks; children_states; file_lock}
+  {slots; children_updates; jobs; task_bar; tasks= tasks (); children_states; file_lock}


 let run pool =
--- a/infer/src/base/ProcessPool.mli
+++ b/infer/src/base/ProcessPool.mli
@ -58,7 +58,7 @@ val create :
  -> child_prelude:(unit -> unit)
  -> f:('work -> unit)
  -> child_epilogue:(unit -> 'final)
-  -> tasks:'work TaskGenerator.t
+  -> tasks:(unit -> 'work TaskGenerator.t)
  -> ('work, 'final) t
 (** Create a new pool of processes running [jobs] jobs in parallel *)

--- a/infer/src/integration/CaptureCompilationDatabase.ml
+++ b/infer/src/integration/CaptureCompilationDatabase.ml
@ -54,7 +54,7 @@ let run_compilation_database compilation_database should_capture_file =
    "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ;
  L.progress "Starting %s %d files@\n%!" Config.clang_frontend_action_string number_of_jobs ;
  let compilation_commands = List.map ~f:create_cmd compilation_data in
-  let tasks = ProcessPool.TaskGenerator.of_list compilation_commands in
+  let tasks () = ProcessPool.TaskGenerator.of_list compilation_commands in
  (* no stats to record so [child_epilogue] does nothing and we ignore the return
     {!Tasks.Runner.run} *)
  let runner =
--- a/infer/src/unit/RestartSchedulerTests.ml
+++ b/infer/src/unit/RestartSchedulerTests.ml
@ -10,6 +10,10 @@ open OUnit2

 let a_pname = Procname.from_string_c_fun "a_c_fun_name"

+(* Tests are organized like this instead of using one function per test because
+   OUnit run tests in parallel and since all tests use the same output directory
+   (inter-out-unit) the file locks would collide because they all live in a
+   directory called procnames_locks inside the output dir. *)
 let tests_wrapper _test_ctxt =
  ProcLocker.(
    setup () ;