From 5406fa3224de0d680d41cd4e537479e5cabe37af Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Mon, 7 Sep 2020 08:47:24 -0700 Subject: [PATCH] [scheduler][restart] use filenames instead of procnames for dependencies Summary: Limit communication bandwidth and serialisation burden by sending procedure filename strings (which are bounded at ~100 bytes) instead of serialising procnames through the socket to the scheduler (which are unbounded and have been seen to reach ~30kB in the worst case for templated procedures). Context: Under the restart scheduler, a worker working on a procedure X that discovers a race on a dependency Y it needs fails the computation of X and sends to the scheduler the procname Y. The next time X is about to be rescheduled, the scheduler checks whether Y is still being analysed, by checking if the lock for Y still exists. This check uses the procedure filename already, so we can send that instead. Reviewed By: jvillard Differential Revision: D23554995 fbshipit-source-id: 9828e71a2 --- infer/src/absint/TaskSchedulerTypes.ml | 2 +- infer/src/backend/FileScheduler.mli | 2 +- infer/src/backend/InferAnalyze.ml | 8 +++++--- infer/src/backend/ProcLocker.ml | 12 +++++++----- infer/src/backend/ProcLocker.mli | 2 +- infer/src/backend/RestartScheduler.ml | 21 ++++++++++++--------- infer/src/backend/RestartScheduler.mli | 2 +- infer/src/backend/SyntacticCallGraph.ml | 2 +- infer/src/backend/SyntacticCallGraph.mli | 2 +- 9 files changed, 30 insertions(+), 23 deletions(-) diff --git a/infer/src/absint/TaskSchedulerTypes.ml b/infer/src/absint/TaskSchedulerTypes.ml index 304da6d2a..9a5ecc0b6 100644 --- a/infer/src/absint/TaskSchedulerTypes.ml +++ b/infer/src/absint/TaskSchedulerTypes.ml @@ -8,6 +8,6 @@ open! IStd (** for the Restart scheduler: raise when a worker tries to analyze a procedure already being analyzed by another process *) -exception ProcnameAlreadyLocked of Procname.t +exception ProcnameAlreadyLocked of {dependency_filename: string} type target = Procname of Procname.t | File of SourceFile.t diff --git a/infer/src/backend/FileScheduler.mli b/infer/src/backend/FileScheduler.mli index 945815b23..db3d4ada8 100644 --- a/infer/src/backend/FileScheduler.mli +++ b/infer/src/backend/FileScheduler.mli @@ -6,4 +6,4 @@ *) open! IStd -val make : SourceFile.t list -> (TaskSchedulerTypes.target, Procname.t) ProcessPool.TaskGenerator.t +val make : SourceFile.t list -> (TaskSchedulerTypes.target, string) ProcessPool.TaskGenerator.t diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index cdfbb11af..c6277c5ef 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -24,7 +24,7 @@ let clear_caches () = clear_caches_except_lrus () -let analyze_target : (TaskSchedulerTypes.target, Procname.t) Tasks.doer = +let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer = let analyze_source_file exe_env source_file = if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ; DB.Results_dir.init source_file ; @@ -35,7 +35,8 @@ let analyze_target : (TaskSchedulerTypes.target, Procname.t) Tasks.doer = DotCfg.emit_frontend_cfg (Topl.sourcefile ()) (Topl.cfg ()) ; if Config.write_html then Printer.write_all_html_files source_file ; None - with TaskSchedulerTypes.ProcnameAlreadyLocked pname -> Some pname ) + with TaskSchedulerTypes.ProcnameAlreadyLocked {dependency_filename} -> + Some dependency_filename ) in (* In call-graph scheduling, log progress every [per_procedure_logging_granularity] procedures. The default roughly reflects the average number of procedures in a C++ file. *) @@ -51,7 +52,8 @@ let analyze_target : (TaskSchedulerTypes.target, Procname.t) Tasks.doer = try Ondemand.analyze_proc_name_toplevel exe_env proc_name ; None - with TaskSchedulerTypes.ProcnameAlreadyLocked pname -> Some pname + with TaskSchedulerTypes.ProcnameAlreadyLocked {dependency_filename} -> + Some dependency_filename in fun target -> let exe_env = Exe_env.mk () in diff --git a/infer/src/backend/ProcLocker.ml b/infer/src/backend/ProcLocker.ml index cf6c19320..5aceb2faf 100644 --- a/infer/src/backend/ProcLocker.ml +++ b/infer/src/backend/ProcLocker.ml @@ -33,24 +33,26 @@ let setup () = let clean () = () -let filename_from pname = locks_dir ^/ Procname.to_filename pname +let lock_of_filename filename = locks_dir ^/ filename + +let lock_of_procname pname = lock_of_filename (Procname.to_filename pname) let unlock pname = record_time_of ~log_f:log_unlock_time ~f:(fun () -> - try Unix.unlink (filename_from pname) + try Unix.unlink (lock_of_procname pname) with Unix.Unix_error (Unix.ENOENT, _, _) -> raise (UnlockNotLocked pname) ) let try_lock pname = record_time_of ~log_f:log_lock_time ~f:(fun () -> try - Unix.symlink ~target:locks_target ~link_name:(filename_from pname) ; + Unix.symlink ~target:locks_target ~link_name:(lock_of_procname pname) ; true with Unix.Unix_error (Unix.EEXIST, _, _) -> false ) -let is_locked pname = +let is_locked ~proc_filename = try - ignore (Unix.lstat (filename_from pname)) ; + ignore (Unix.lstat (lock_of_filename proc_filename)) ; true with Unix.Unix_error (Unix.ENOENT, _, _) -> false diff --git a/infer/src/backend/ProcLocker.mli b/infer/src/backend/ProcLocker.mli index d0897d7d1..71c8e64e1 100644 --- a/infer/src/backend/ProcLocker.mli +++ b/infer/src/backend/ProcLocker.mli @@ -23,4 +23,4 @@ val clean : unit -> unit (** This should be called when locks will no longer be used to remove any files or state that's not necessary. *) -val is_locked : Procname.t -> bool +val is_locked : proc_filename:string -> bool diff --git a/infer/src/backend/RestartScheduler.ml b/infer/src/backend/RestartScheduler.ml index d21ebb589..a920a4d62 100644 --- a/infer/src/backend/RestartScheduler.ml +++ b/infer/src/backend/RestartScheduler.ml @@ -7,9 +7,9 @@ open! IStd module L = Logging -type work_with_dependency = {work: TaskSchedulerTypes.target; need_to_finish: Procname.t option} +type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option} -let of_list (lst : work_with_dependency list) : ('a, Procname.t) ProcessPool.TaskGenerator.t = +let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGenerator.t = let content = Queue.of_list lst in let remaining = ref (Queue.length content) in let remaining_tasks () = !remaining in @@ -18,12 +18,12 @@ let of_list (lst : work_with_dependency list) : ('a, Procname.t) ProcessPool.Tas match result with | None -> decr remaining - | Some _ as need_to_finish -> - Queue.enqueue content {work; need_to_finish} + | Some _ as dependency_filename_opt -> + Queue.enqueue content {work; dependency_filename_opt} in let work_if_dependency_allows w = - match w.need_to_finish with - | Some pname when ProcLocker.is_locked pname -> + match w.dependency_filename_opt with + | Some dependency_filename when ProcLocker.is_locked ~proc_filename:dependency_filename -> Queue.enqueue content w ; None | None | Some _ -> @@ -38,10 +38,11 @@ let make sources = List.map sources ~f:SourceFiles.proc_names_of_source |> List.concat |> List.rev_map ~f:(fun procname -> - {work= TaskSchedulerTypes.Procname procname; need_to_finish= None} ) + {work= TaskSchedulerTypes.Procname procname; dependency_filename_opt= None} ) in let files = - List.map sources ~f:(fun file -> {work= TaskSchedulerTypes.File file; need_to_finish= None}) + List.map sources ~f:(fun file -> + {work= TaskSchedulerTypes.File file; dependency_filename_opt= None} ) in let permute = List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) in permute pnames @ permute files |> of_list @@ -90,7 +91,9 @@ let lock_exn pname = if ProcLocker.try_lock pname then record_locked_proc pname else ( unlock_all () ; - raise (TaskSchedulerTypes.ProcnameAlreadyLocked pname) ) ) + raise + (TaskSchedulerTypes.ProcnameAlreadyLocked {dependency_filename= Procname.to_filename pname}) + ) ) let unlock pname = diff --git a/infer/src/backend/RestartScheduler.mli b/infer/src/backend/RestartScheduler.mli index 79f08e1d3..4b7163c0a 100644 --- a/infer/src/backend/RestartScheduler.mli +++ b/infer/src/backend/RestartScheduler.mli @@ -14,4 +14,4 @@ val lock_exn : Procname.t -> unit val unlock : Procname.t -> unit -val make : SourceFile.t list -> (TaskSchedulerTypes.target, Procname.t) ProcessPool.TaskGenerator.t +val make : SourceFile.t list -> (TaskSchedulerTypes.target, string) ProcessPool.TaskGenerator.t diff --git a/infer/src/backend/SyntacticCallGraph.ml b/infer/src/backend/SyntacticCallGraph.ml index d9d75861c..5f7ad4ff1 100644 --- a/infer/src/backend/SyntacticCallGraph.ml +++ b/infer/src/backend/SyntacticCallGraph.ml @@ -96,7 +96,7 @@ let build_from_sources sources = g -let bottom_up sources : (TaskSchedulerTypes.target, Procname.t) ProcessPool.TaskGenerator.t = +let bottom_up sources : (TaskSchedulerTypes.target, string) ProcessPool.TaskGenerator.t = let open TaskSchedulerTypes in let syntactic_call_graph = build_from_sources sources in let remaining = ref (CallGraph.n_procs syntactic_call_graph) in diff --git a/infer/src/backend/SyntacticCallGraph.mli b/infer/src/backend/SyntacticCallGraph.mli index 8cc758d43..4262506b5 100644 --- a/infer/src/backend/SyntacticCallGraph.mli +++ b/infer/src/backend/SyntacticCallGraph.mli @@ -6,7 +6,7 @@ *) open! IStd -val make : SourceFile.t list -> (TaskSchedulerTypes.target, Procname.t) ProcessPool.TaskGenerator.t +val make : SourceFile.t list -> (TaskSchedulerTypes.target, string) ProcessPool.TaskGenerator.t (** task generator that works by - loading the syntactic call graph from the capture DB