From 386f303b1d1f060f068bcda3b5383891e0b7777a Mon Sep 17 00:00:00 2001
From: Nikos Gorogiannis <nikosg@fb.com>
Date: Thu, 10 Sep 2020 02:37:03 -0700
Subject: [PATCH] [scheduler][restart] use proc_uids instead of serialised
 procnames as targets

Summary:
Eliminate the need to serialise procnames when sending work from the restart scheduler to the workers, by sending the proc_uid instead.  This is (much) shorter than the byte representation of the proc_name and it's the primary DB key of the procedures table, so it can be used by the worker to obtain the full procname.

Also, reduce GC churn by using folds in the scheduler startup instead of copying lists over and over.

Reviewed By: jberdine

Differential Revision: D23566131

fbshipit-source-id: 1472aa990
---
 infer/src/absint/TaskSchedulerTypes.ml  |  6 ++++-
 infer/src/backend/FileScheduler.ml      |  4 +--
 infer/src/backend/InferAnalyze.ml       | 21 +++++++++++++++
 infer/src/backend/RestartScheduler.ml   | 34 ++++++++++++++++---------
 infer/src/backend/SyntacticCallGraph.ml |  4 +--
 infer/src/base/ResultsDatabase.ml       |  7 +++--
 6 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/infer/src/absint/TaskSchedulerTypes.ml b/infer/src/absint/TaskSchedulerTypes.ml
index 9a5ecc0b6..836e87292 100644
--- a/infer/src/absint/TaskSchedulerTypes.ml
+++ b/infer/src/absint/TaskSchedulerTypes.ml
@@ -10,4 +10,8 @@ open! IStd
     analyzed by another process *)
 exception ProcnameAlreadyLocked of {dependency_filename: string}
 
-type target = Procname of Procname.t | File of SourceFile.t
+type target =
+  | Procname of Procname.t
+  | File of SourceFile.t
+  | ProcUID of string
+      (** matches primary key of [procedures] and [specs] tables; see [ResultsDatabase.ml] *)
diff --git a/infer/src/backend/FileScheduler.ml b/infer/src/backend/FileScheduler.ml
index ec6a0bc70..272a7ff74 100644
--- a/infer/src/backend/FileScheduler.ml
+++ b/infer/src/backend/FileScheduler.ml
@@ -14,8 +14,8 @@ let make sources =
     |> ProcessPool.TaskGenerator.of_list
   in
   let next x =
-    let res = gen.next x in
+    gen.next x
     (* see defn of gen above to see why res should never match Some (Procname _) *)
-    match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v
+    |> Option.map ~f:(function File _ as v -> v | Procname _ | ProcUID _ -> assert false)
   in
   {gen with next}
diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml
index c6277c5ef..55274ab9c 100644
--- a/infer/src/backend/InferAnalyze.ml
+++ b/infer/src/backend/InferAnalyze.ml
@@ -24,6 +24,25 @@ let clear_caches () =
   clear_caches_except_lrus ()
 
 
+let proc_name_of_uid =
+  let statement =
+    ResultsDatabase.register_statement "SELECT proc_name FROM procedures WHERE proc_uid = :k"
+  in
+  fun proc_uid ->
+    ResultsDatabase.with_registered_statement statement ~f:(fun db stmt ->
+        Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT proc_uid)
+        |> SqliteUtils.check_result_code db ~log:"proc_name of proc_uid bind proc_uid" ;
+        let result_option =
+          SqliteUtils.result_option ~finalize:false db ~log:"proc_name of proc_uid" stmt
+            ~read_row:(fun stmt -> Sqlite3.column stmt 0 |> Procname.SQLite.deserialize)
+        in
+        match result_option with
+        | Some proc_name ->
+            proc_name
+        | None ->
+            L.die InternalError "Requested non-existent proc_uid: %s@." proc_uid )
+
+
 let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
   let analyze_source_file exe_env source_file =
     if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ;
@@ -62,6 +81,8 @@ let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
     match target with
     | Procname procname ->
         analyze_proc_name exe_env procname
+    | ProcUID proc_uid ->
+        proc_name_of_uid proc_uid |> analyze_proc_name exe_env
     | File source_file ->
         analyze_source_file exe_env source_file
 
diff --git a/infer/src/backend/RestartScheduler.ml b/infer/src/backend/RestartScheduler.ml
index a920a4d62..1e15d51a1 100644
--- a/infer/src/backend/RestartScheduler.ml
+++ b/infer/src/backend/RestartScheduler.ml
@@ -9,8 +9,7 @@ module L = Logging
 
 type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option}
 
-let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGenerator.t =
-  let content = Queue.of_list lst in
+let of_queue content : ('a, string) ProcessPool.TaskGenerator.t =
   let remaining = ref (Queue.length content) in
   let remaining_tasks () = !remaining in
   let is_empty () = Int.equal !remaining 0 in
@@ -34,18 +33,29 @@ let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGen
 
 
 let make sources =
-  let pnames =
-    List.map sources ~f:SourceFiles.proc_names_of_source
-    |> List.concat
-    |> List.rev_map ~f:(fun procname ->
-           {work= TaskSchedulerTypes.Procname procname; dependency_filename_opt= None} )
+  let target_count = ref 0 in
+  let cons_proc_uid_work acc procname =
+    incr target_count ;
+    let proc_uid = Procname.to_unique_id procname in
+    {work= TaskSchedulerTypes.ProcUID proc_uid; dependency_filename_opt= None} :: acc
   in
-  let files =
-    List.map sources ~f:(fun file ->
-        {work= TaskSchedulerTypes.File file; dependency_filename_opt= None} )
+  let pname_targets =
+    List.fold sources ~init:[] ~f:(fun init source ->
+        SourceFiles.proc_names_of_source source |> List.fold ~init ~f:cons_proc_uid_work )
   in
-  let permute = List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) in
-  permute pnames @ permute files |> of_list
+  let make_file_work file =
+    incr target_count ;
+    {work= TaskSchedulerTypes.File file; dependency_filename_opt= None}
+  in
+  let file_targets = List.rev_map sources ~f:make_file_work in
+  let queue = Queue.create ~capacity:!target_count () in
+  let permute_and_enqueue targets =
+    List.permute targets ~random_state:(Random.State.make (Array.create ~len:1 0))
+    |> List.iter ~f:(Queue.enqueue queue)
+  in
+  permute_and_enqueue pname_targets ;
+  permute_and_enqueue file_targets ;
+  of_queue queue
 
 
 let if_restart_scheduler f =
diff --git a/infer/src/backend/SyntacticCallGraph.ml b/infer/src/backend/SyntacticCallGraph.ml
index 5f7ad4ff1..15d69d407 100644
--- a/infer/src/backend/SyntacticCallGraph.ml
+++ b/infer/src/backend/SyntacticCallGraph.ml
@@ -138,8 +138,8 @@ let bottom_up sources : (TaskSchedulerTypes.target, string) ProcessPool.TaskGene
         decr remaining ;
         decr scheduled ;
         CallGraph.remove syntactic_call_graph pname
-    | File _ ->
-        L.die InternalError "Only Procnames are scheduled but File target was received"
+    | File _ | ProcUID _ ->
+        L.die InternalError "Only Procnames are scheduled but File/ProcUID target was received"
   in
   {remaining_tasks; is_empty; finished; next}
 
diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml
index b52a8724d..b5c81de45 100644
--- a/infer/src/base/ResultsDatabase.ml
+++ b/infer/src/base/ResultsDatabase.ml
@@ -12,8 +12,8 @@ module L = Logging
 let results_dir_get_path entry = ResultsDirEntryName.get_path ~results_dir:Config.results_dir entry
 
 let procedures_schema prefix =
-  (* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support
-     it *)
+  (* [proc_uid] is meant to only be used with [Procname.to_unique_id]
+     [Marshal]ed values must never be used as keys. *)
   Printf.sprintf
     {|
       CREATE TABLE IF NOT EXISTS %sprocedures
@@ -30,6 +30,7 @@ let procedures_schema prefix =
 
 
 let source_files_schema prefix =
+  (* [Marshal]ed values must never be used as keys. [source_file] has a custom serialiser *)
   Printf.sprintf
     {|
       CREATE TABLE IF NOT EXISTS %ssource_files
@@ -43,6 +44,8 @@ let source_files_schema prefix =
 
 
 let specs_schema prefix =
+  (* [proc_uid] is meant to only be used with [Procname.to_unique_id]
+     [Marshal]ed values must never be used as keys. *)
   Printf.sprintf
     {|
       CREATE TABLE IF NOT EXISTS %sspecs