From 386f303b1d1f060f068bcda3b5383891e0b7777a Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Thu, 10 Sep 2020 02:37:03 -0700 Subject: [PATCH] [scheduler][restart] use proc_uids instead of serialised procnames as targets Summary: Eliminate the need to serialise procnames when sending work from the restart scheduler to the workers, by sending the proc_uid instead. This is (much) shorter than the byte representation of the proc_name and it's the primary DB key of the procedures table, so it can be used by the worker to obtain the full procname. Also, reduce GC churn by using folds in the scheduler startup instead of copying lists over and over. Reviewed By: jberdine Differential Revision: D23566131 fbshipit-source-id: 1472aa990 --- infer/src/absint/TaskSchedulerTypes.ml | 6 ++++- infer/src/backend/FileScheduler.ml | 4 +-- infer/src/backend/InferAnalyze.ml | 21 +++++++++++++++ infer/src/backend/RestartScheduler.ml | 34 ++++++++++++++++--------- infer/src/backend/SyntacticCallGraph.ml | 4 +-- infer/src/base/ResultsDatabase.ml | 7 +++-- 6 files changed, 57 insertions(+), 19 deletions(-) diff --git a/infer/src/absint/TaskSchedulerTypes.ml b/infer/src/absint/TaskSchedulerTypes.ml index 9a5ecc0b6..836e87292 100644 --- a/infer/src/absint/TaskSchedulerTypes.ml +++ b/infer/src/absint/TaskSchedulerTypes.ml @@ -10,4 +10,8 @@ open! IStd analyzed by another process *) exception ProcnameAlreadyLocked of {dependency_filename: string} -type target = Procname of Procname.t | File of SourceFile.t +type target = + | Procname of Procname.t + | File of SourceFile.t + | ProcUID of string + (** matches primary key of [procedures] and [specs] tables; see [ResultsDatabase.ml] *) diff --git a/infer/src/backend/FileScheduler.ml b/infer/src/backend/FileScheduler.ml index ec6a0bc70..272a7ff74 100644 --- a/infer/src/backend/FileScheduler.ml +++ b/infer/src/backend/FileScheduler.ml @@ -14,8 +14,8 @@ let make sources = |> ProcessPool.TaskGenerator.of_list in let next x = - let res = gen.next x in + gen.next x (* see defn of gen above to see why res should never match Some (Procname _) *) - match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v + |> Option.map ~f:(function File _ as v -> v | Procname _ | ProcUID _ -> assert false) in {gen with next} diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index c6277c5ef..55274ab9c 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -24,6 +24,25 @@ let clear_caches () = clear_caches_except_lrus () +let proc_name_of_uid = + let statement = + ResultsDatabase.register_statement "SELECT proc_name FROM procedures WHERE proc_uid = :k" + in + fun proc_uid -> + ResultsDatabase.with_registered_statement statement ~f:(fun db stmt -> + Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT proc_uid) + |> SqliteUtils.check_result_code db ~log:"proc_name of proc_uid bind proc_uid" ; + let result_option = + SqliteUtils.result_option ~finalize:false db ~log:"proc_name of proc_uid" stmt + ~read_row:(fun stmt -> Sqlite3.column stmt 0 |> Procname.SQLite.deserialize) + in + match result_option with + | Some proc_name -> + proc_name + | None -> + L.die InternalError "Requested non-existent proc_uid: %s@." proc_uid ) + + let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer = let analyze_source_file exe_env source_file = if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ; @@ -62,6 +81,8 @@ let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer = match target with | Procname procname -> analyze_proc_name exe_env procname + | ProcUID proc_uid -> + proc_name_of_uid proc_uid |> analyze_proc_name exe_env | File source_file -> analyze_source_file exe_env source_file diff --git a/infer/src/backend/RestartScheduler.ml b/infer/src/backend/RestartScheduler.ml index a920a4d62..1e15d51a1 100644 --- a/infer/src/backend/RestartScheduler.ml +++ b/infer/src/backend/RestartScheduler.ml @@ -9,8 +9,7 @@ module L = Logging type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option} -let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGenerator.t = - let content = Queue.of_list lst in +let of_queue content : ('a, string) ProcessPool.TaskGenerator.t = let remaining = ref (Queue.length content) in let remaining_tasks () = !remaining in let is_empty () = Int.equal !remaining 0 in @@ -34,18 +33,29 @@ let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGen let make sources = - let pnames = - List.map sources ~f:SourceFiles.proc_names_of_source - |> List.concat - |> List.rev_map ~f:(fun procname -> - {work= TaskSchedulerTypes.Procname procname; dependency_filename_opt= None} ) + let target_count = ref 0 in + let cons_proc_uid_work acc procname = + incr target_count ; + let proc_uid = Procname.to_unique_id procname in + {work= TaskSchedulerTypes.ProcUID proc_uid; dependency_filename_opt= None} :: acc in - let files = - List.map sources ~f:(fun file -> - {work= TaskSchedulerTypes.File file; dependency_filename_opt= None} ) + let pname_targets = + List.fold sources ~init:[] ~f:(fun init source -> + SourceFiles.proc_names_of_source source |> List.fold ~init ~f:cons_proc_uid_work ) in - let permute = List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) in - permute pnames @ permute files |> of_list + let make_file_work file = + incr target_count ; + {work= TaskSchedulerTypes.File file; dependency_filename_opt= None} + in + let file_targets = List.rev_map sources ~f:make_file_work in + let queue = Queue.create ~capacity:!target_count () in + let permute_and_enqueue targets = + List.permute targets ~random_state:(Random.State.make (Array.create ~len:1 0)) + |> List.iter ~f:(Queue.enqueue queue) + in + permute_and_enqueue pname_targets ; + permute_and_enqueue file_targets ; + of_queue queue let if_restart_scheduler f = diff --git a/infer/src/backend/SyntacticCallGraph.ml b/infer/src/backend/SyntacticCallGraph.ml index 5f7ad4ff1..15d69d407 100644 --- a/infer/src/backend/SyntacticCallGraph.ml +++ b/infer/src/backend/SyntacticCallGraph.ml @@ -138,8 +138,8 @@ let bottom_up sources : (TaskSchedulerTypes.target, string) ProcessPool.TaskGene decr remaining ; decr scheduled ; CallGraph.remove syntactic_call_graph pname - | File _ -> - L.die InternalError "Only Procnames are scheduled but File target was received" + | File _ | ProcUID _ -> + L.die InternalError "Only Procnames are scheduled but File/ProcUID target was received" in {remaining_tasks; is_empty; finished; next} diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index b52a8724d..b5c81de45 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -12,8 +12,8 @@ module L = Logging let results_dir_get_path entry = ResultsDirEntryName.get_path ~results_dir:Config.results_dir entry let procedures_schema prefix = - (* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support - it *) + (* [proc_uid] is meant to only be used with [Procname.to_unique_id] + [Marshal]ed values must never be used as keys. *) Printf.sprintf {| CREATE TABLE IF NOT EXISTS %sprocedures @@ -30,6 +30,7 @@ let procedures_schema prefix = let source_files_schema prefix = + (* [Marshal]ed values must never be used as keys. [source_file] has a custom serialiser *) Printf.sprintf {| CREATE TABLE IF NOT EXISTS %ssource_files @@ -43,6 +44,8 @@ let source_files_schema prefix = let specs_schema prefix = + (* [proc_uid] is meant to only be used with [Procname.to_unique_id] + [Marshal]ed values must never be used as keys. *) Printf.sprintf {| CREATE TABLE IF NOT EXISTS %sspecs