From 35ad7dd8bf4e43d11df3a60e96d7592d1e3be973 Mon Sep 17 00:00:00 2001 From: Martino Luca Date: Tue, 19 Sep 2017 02:24:55 -0700 Subject: [PATCH] [AL] Use Parmap to schedule parallel processes on compilation-database-based analyses Summary: Parmap delivers a better scheduling, which works as a pipeline, as opposed to what existed before, which schedules processes in batches. Reviewed By: jvillard Differential Revision: D5678661 fbshipit-source-id: a632c71 --- infer/src/base/Config.ml | 2 + infer/src/base/Config.mli | 2 + infer/src/base/Process.ml | 67 ------------- infer/src/base/Process.mli | 9 -- .../integration/CaptureCompilationDatabase.ml | 98 +++++++++++-------- infer/src/integration/CompilationDatabase.ml | 3 + infer/src/integration/CompilationDatabase.mli | 2 + 7 files changed, 66 insertions(+), 117 deletions(-) diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index bf49c3c1f..ed1430708 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -153,6 +153,8 @@ let lint_dotty_dir_name = "lint_dotty" let lint_issues_dir_name = "lint_issues" +let linters_failed_sentinel_filename = "linters_failed_sentinel" + (** letters used in the analysis output *) let log_analysis_file = "F" diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 1d54715d5..990bfc79d 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -147,6 +147,8 @@ val lint_dotty_dir_name : string val lint_issues_dir_name : string +val linters_failed_sentinel_filename : string + val load_average : float option val log_analysis_crash : string diff --git a/infer/src/base/Process.ml b/infer/src/base/Process.ml index beb943aa3..1c0199c06 100644 --- a/infer/src/base/Process.ml +++ b/infer/src/base/Process.ml @@ -34,73 +34,6 @@ let create_process_and_wait ~prog ~args = (String.concat ~sep:" " (prog :: args)) (Unix.Exit_or_signal.to_string_hum status) -(** Given a process id and a function that describes the command that the process id - represents, prints a message explaining the command and its status, if in debug or stats mode. - It also prints a dot to show progress of jobs being finished. *) -let print_status ~fail_on_failed_job f pid status = - L.(debug Analysis Medium) - "%a%s@." - (fun fmt pid -> F.pp_print_string fmt (f pid)) - pid (Unix.Exit_or_signal.to_string_hum status) ; - L.progress ".%!" ; - match status with - | Error err when fail_on_failed_job - -> L.exit (match err with `Exit_non_zero i -> i | `Signal _ -> 1) - | _ - -> () - -let start_current_jobs_count () = ref 0 - -let waited_for_jobs = ref 0 - -module PidMap = Caml.Map.Make (Pid) - -(** [wait_for_son pid_child f jobs_count] wait for pid_child - and all the other children and update the current jobs count. - Use f to print the job status *) -let rec wait_for_child ~fail_on_failed_job f current_jobs_count jobs_map = - let pid, status = Unix.wait `Any in - Pervasives.decr current_jobs_count ; - Pervasives.incr waited_for_jobs ; - print_status ~fail_on_failed_job f pid status ; - jobs_map := PidMap.remove pid !jobs_map ; - if not (PidMap.is_empty !jobs_map) then - wait_for_child ~fail_on_failed_job f current_jobs_count jobs_map - -let pid_to_program jobsMap pid = - try PidMap.find pid jobsMap - with Not_found -> "" - -(** [run_jobs_in_parallel jobs_stack gen_prog prog_to_string] runs the jobs in the given stack, by - spawning the jobs in batches of n, where n is [Config.jobs]. It then waits for all those jobs - and starts a new batch and so on. [gen_prog] should return a tuple [(dir_opt, command, args, - env)] where [dir_opt] is an optional directory to chdir to before executing [command] with - [args] in [env]. [prog_to_string] is used for printing information about the job's status. *) -let run_jobs_in_parallel ?(fail_on_failed_job= false) jobs_stack gen_prog prog_to_string = - let run_job () = - let jobs_map = ref PidMap.empty in - let current_jobs_count = start_current_jobs_count () in - while not (Stack.is_empty jobs_stack) do - let job_prog = Stack.pop_exn jobs_stack in - let dir_opt, prog, args, env = gen_prog job_prog in - Pervasives.incr current_jobs_count ; - match Unix.fork () with - | `In_the_child - -> Option.iter dir_opt ~f:Unix.chdir ; - Unix.exec ~prog ~argv:(prog :: args) ~env ~use_path:false |> Unix.handle_unix_error - |> never_returns - | `In_the_parent pid_child - -> jobs_map := PidMap.add pid_child (prog_to_string job_prog) !jobs_map ; - if Int.equal (Stack.length jobs_stack) 0 || !current_jobs_count >= Config.jobs then - wait_for_child ~fail_on_failed_job - (pid_to_program !jobs_map) - current_jobs_count jobs_map - done - in - run_job () ; - L.progress ".@." ; - L.(debug Analysis Medium) "Waited for %d jobs" !waited_for_jobs - let pipeline ~producer_prog ~producer_args ~consumer_prog ~consumer_args = let pipe_in, pipe_out = Unix.pipe () in match Unix.fork () with diff --git a/infer/src/base/Process.mli b/infer/src/base/Process.mli index a86814e55..4deb4389b 100644 --- a/infer/src/base/Process.mli +++ b/infer/src/base/Process.mli @@ -18,15 +18,6 @@ val print_error_and_exit : ?exit_code:int -> ('a, Format.formatter, unit, 'b) fo (** Prints an error message to a log file, prints a message saying that the error can be found in that file, and exist, with default code 1 or a given code. *) -val run_jobs_in_parallel : - ?fail_on_failed_job:bool -> 'a Stack.t -> ('a -> string option * string * string list * Unix.env) - -> ('a -> string) -> unit -(** [run_jobs_in_parallel jobs_stack gen_prog prog_to_string] runs the jobs in the given stack, by - spawning the jobs in batches of n, where n is [Config.jobs]. It then waits for all those jobs - and starts a new batch and so on. [gen_prog] should return a tuple [(dir_opt, command, args, - env)] where [dir_opt] is an optional directory to chdir to before executing [command] with - [args] in [env]. [prog_to_string] is used for printing information about the job's status. *) - val pipeline : producer_prog:string -> producer_args:string list -> consumer_prog:string -> consumer_args:string list -> Unix.Exit_or_signal.t * Unix.Exit_or_signal.t diff --git a/infer/src/integration/CaptureCompilationDatabase.ml b/infer/src/integration/CaptureCompilationDatabase.ml index 6cac62851..d4faab4d4 100644 --- a/infer/src/integration/CaptureCompilationDatabase.ml +++ b/infer/src/integration/CaptureCompilationDatabase.ml @@ -12,57 +12,73 @@ module F = Format module CLOpt = CommandLineOption module L = Logging -let capture_text = - if Config.equal_analyzer Config.analyzer Config.Linters then "linting" else "translating" +type cmd = {cwd: string; prog: string; args: string} -let create_files_stack compilation_database should_capture_file = - let stack = Stack.create () in - let add_to_stack file _ = if should_capture_file file then Stack.push stack file in - CompilationDatabase.iter compilation_database add_to_stack ; stack +let create_cmd (compilation_data: CompilationDatabase.compilation_data) = + let swap_command cmd = + if String.is_suffix ~suffix:"++" cmd then Config.wrappers_dir ^/ "clang++" + else Config.wrappers_dir ^/ "clang" + in + let arg_file = + ClangQuotes.mk_arg_file "cdb_clang_args_" ClangQuotes.EscapedNoQuotes [compilation_data.args] + in + {cwd= compilation_data.dir; prog= swap_command compilation_data.command; args= arg_file} -let swap_command cmd = - let plusplus = "++" in - let clang = "clang" in - let clangplusplus = "clang++" in - if String.is_suffix ~suffix:plusplus cmd then Config.wrappers_dir ^/ clangplusplus - else Config.wrappers_dir ^/ clang +(* A sentinel is a file which indicates that a failure occurred in another infer process. + Because infer processes run in parallel but do not share any memory, we use the + filesystem to signal failures across processes. *) +let sentinel_exists sentinel_opt = + let file_exists sentinel = PVariant.( = ) (Sys.file_exists sentinel) `Yes in + Option.value_map ~default:false sentinel_opt ~f:file_exists -let run_compilation_file compilation_database file = - try - let compilation_data = CompilationDatabase.find compilation_database file in - let wrapper_cmd = swap_command compilation_data.command in - let arg_file = - ClangQuotes.mk_arg_file "cdb_clang_args_" ClangQuotes.EscapedNoQuotes [compilation_data.args] +let invoke_cmd ~fail_sentinel cmd = + if sentinel_exists fail_sentinel then L.progress "E%!" + else ( + Unix.chdir cmd.cwd ; + let pid = + Unix.fork_exec ~prog:cmd.prog ~argv:[cmd.prog; ("@" ^ cmd.args); "-fsyntax-only"] + ~use_path:false () in - let args = [("@" ^ arg_file)] in - let env = - `Extend - [ ( CLOpt.args_env_var - , String.concat ~sep:(String.of_char CLOpt.env_var_sep) - (Option.to_list (Sys.getenv CLOpt.args_env_var) @ ["--fcp-syntax-only"]) ) ] + let create_sentinel_if_needed () = + let create_empty_file fname = Utils.with_file_out ~f:(fun _ -> ()) fname in + Option.iter fail_sentinel ~f:create_empty_file in - (Some compilation_data.dir, wrapper_cmd, args, env) - with Not_found -> - Process.print_error_and_exit "Failed to find compilation data for %a@\n%!" SourceFile.pp file + match Unix.waitpid pid with + | Ok () + -> L.progress ".%!" + | Error _ + -> L.progress "!%!" ; create_sentinel_if_needed () ) let run_compilation_database compilation_database should_capture_file = - let number_of_files = CompilationDatabase.get_size compilation_database in - L.(debug Capture Quiet) "Starting %s %d files@\n%!" capture_text number_of_files ; - L.progress "Starting %s %d files@\n%!" capture_text number_of_files ; - let jobs_stack = create_files_stack compilation_database should_capture_file in - let capture_text_upper = String.capitalize capture_text in - let job_to_string file = Format.asprintf "%s %a" capture_text_upper SourceFile.pp file in - let fail_on_failed_job = - if Config.linters_ignore_clang_failures then false + let compilation_data = + CompilationDatabase.filter_compilation_data compilation_database ~f:should_capture_file + in + let number_of_jobs = List.length compilation_data in + let capture_text = + if Config.equal_analyzer Config.analyzer Config.Linters then "linting" else "translating" + in + L.(debug Capture Quiet) "Starting %s %d files@\n%!" capture_text number_of_jobs ; + L.progress "Starting %s %d files@\n%!" capture_text number_of_jobs ; + let sequence = Parmap.L (List.map ~f:create_cmd compilation_data) in + let fail_sentinel_fname = Config.results_dir ^/ Config.linters_failed_sentinel_filename in + let fail_sentinel = + if Config.linters_ignore_clang_failures then None else match Config.buck_compilation_database with - | Some NoDeps - -> Config.clang_frontend_do_lint - | _ - -> false + | Some NoDeps when Config.clang_frontend_do_lint + -> Some fail_sentinel_fname + | Some NoDeps | Some Deps _ | None + -> None in - Process.run_jobs_in_parallel ~fail_on_failed_job jobs_stack - (run_compilation_file compilation_database) job_to_string + Utils.rmtree fail_sentinel_fname ; + let chunksize = min (List.length compilation_data / Config.jobs + 1) 10 in + Parmap.pariter ~ncores:Config.jobs ~chunksize (invoke_cmd ~fail_sentinel) sequence ; + L.progress "@." ; + L.(debug Analysis Medium) "Ran %d jobs" number_of_jobs ; + if sentinel_exists fail_sentinel then ( + L.progress + "Failure detected, capture did not finish successfully. Use `--linters-ignore-clang-failures` to ignore compilation errors. Terminating@." ; + L.exit 1 ) (** Computes the compilation database files. *) let get_compilation_database_files_buck ~prog ~args = diff --git a/infer/src/integration/CompilationDatabase.ml b/infer/src/integration/CompilationDatabase.ml index b21cd2498..2ff752739 100644 --- a/infer/src/integration/CompilationDatabase.ml +++ b/infer/src/integration/CompilationDatabase.ml @@ -20,6 +20,9 @@ let get_size database = SourceFile.Map.cardinal !database let iter database f = SourceFile.Map.iter f !database +let filter_compilation_data database ~f = + SourceFile.Map.filter (fun s _ -> f s) !database |> SourceFile.Map.bindings |> List.map ~f:snd + let find database key = SourceFile.Map.find key !database let parse_command_and_arguments command_and_arguments = diff --git a/infer/src/integration/CompilationDatabase.mli b/infer/src/integration/CompilationDatabase.mli index 2dbf866d1..4917bf936 100644 --- a/infer/src/integration/CompilationDatabase.mli +++ b/infer/src/integration/CompilationDatabase.mli @@ -19,6 +19,8 @@ val get_size : t -> int val iter : t -> (SourceFile.t -> compilation_data -> unit) -> unit +val filter_compilation_data : t -> f:(SourceFile.t -> bool) -> compilation_data list + val find : t -> SourceFile.t -> compilation_data val decode_json_file : t -> [< `Escaped of string | `Raw of string] -> unit