[scheduler][restart] use proc_uids instead of serialised procnames as targets

Summary: Eliminate the need to serialise procnames when sending work from the restart scheduler to the workers, by sending the proc_uid instead. This is (much) shorter than the byte representation of the proc_name and it's the primary DB key of the procedures table, so it can be used by the worker to obtain the full procname. Also, reduce GC churn by using folds in the scheduler startup instead of copying lists over and over. Reviewed By: jberdine Differential Revision: D23566131 fbshipit-source-id: 1472aa990
5 years ago · 386f303b1d
parent 18ac47210a
commit 386f303b1d
6 changed files with 57 additions and 19 deletions
--- a/infer/src/absint/TaskSchedulerTypes.ml
+++ b/infer/src/absint/TaskSchedulerTypes.ml
@ -10,4 +10,8 @@ open! IStd
    analyzed by another process *)
 exception ProcnameAlreadyLocked of {dependency_filename: string}

-type target = Procname of Procname.t | File of SourceFile.t
+type target =
+  | Procname of Procname.t
+  | File of SourceFile.t
+  | ProcUID of string
+      (** matches primary key of [procedures] and [specs] tables; see [ResultsDatabase.ml] *)
--- a/infer/src/backend/FileScheduler.ml
+++ b/infer/src/backend/FileScheduler.ml
@ -14,8 +14,8 @@ let make sources =
    |> ProcessPool.TaskGenerator.of_list
  in
  let next x =
-    let res = gen.next x in
+    gen.next x
    (* see defn of gen above to see why res should never match Some (Procname _) *)
-    match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v
+    |> Option.map ~f:(function File _ as v -> v | Procname _ | ProcUID _ -> assert false)
  in
  {gen with next}
--- a/infer/src/backend/InferAnalyze.ml
+++ b/infer/src/backend/InferAnalyze.ml
@ -24,6 +24,25 @@ let clear_caches () =
  clear_caches_except_lrus ()


+let proc_name_of_uid =
+  let statement =
+    ResultsDatabase.register_statement "SELECT proc_name FROM procedures WHERE proc_uid = :k"
+  in
+  fun proc_uid ->
+    ResultsDatabase.with_registered_statement statement ~f:(fun db stmt ->
+        Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT proc_uid)
+        |> SqliteUtils.check_result_code db ~log:"proc_name of proc_uid bind proc_uid" ;
+        let result_option =
+          SqliteUtils.result_option ~finalize:false db ~log:"proc_name of proc_uid" stmt
+            ~read_row:(fun stmt -> Sqlite3.column stmt 0 |> Procname.SQLite.deserialize)
+        in
+        match result_option with
+        | Some proc_name ->
+            proc_name
+        | None ->
+            L.die InternalError "Requested non-existent proc_uid: %s@." proc_uid )
+
+
 let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
  let analyze_source_file exe_env source_file =
    if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ;
@ -62,6 +81,8 @@ let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
    match target with
    | Procname procname ->
        analyze_proc_name exe_env procname
+    | ProcUID proc_uid ->
+        proc_name_of_uid proc_uid |> analyze_proc_name exe_env
    | File source_file ->
        analyze_source_file exe_env source_file

--- a/infer/src/backend/RestartScheduler.ml
+++ b/infer/src/backend/RestartScheduler.ml
@ -9,8 +9,7 @@ module L = Logging

 type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option}

-let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGenerator.t =
-  let content = Queue.of_list lst in
+let of_queue content : ('a, string) ProcessPool.TaskGenerator.t =
  let remaining = ref (Queue.length content) in
  let remaining_tasks () = !remaining in
  let is_empty () = Int.equal !remaining 0 in
@ -34,18 +33,29 @@ let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGen


 let make sources =
-  let pnames =
-    List.map sources ~f:SourceFiles.proc_names_of_source
-    |> List.concat
-    |> List.rev_map ~f:(fun procname ->
-           {work= TaskSchedulerTypes.Procname procname; dependency_filename_opt= None} )
+  let target_count = ref 0 in
+  let cons_proc_uid_work acc procname =
+    incr target_count ;
+    let proc_uid = Procname.to_unique_id procname in
+    {work= TaskSchedulerTypes.ProcUID proc_uid; dependency_filename_opt= None} :: acc
  in
-  let files =
-    List.map sources ~f:(fun file ->
-        {work= TaskSchedulerTypes.File file; dependency_filename_opt= None} )
+  let pname_targets =
+    List.fold sources ~init:[] ~f:(fun init source ->
+        SourceFiles.proc_names_of_source source |> List.fold ~init ~f:cons_proc_uid_work )
  in
-  let permute = List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) in
-  permute pnames @ permute files |> of_list
+  let make_file_work file =
+    incr target_count ;
+    {work= TaskSchedulerTypes.File file; dependency_filename_opt= None}
+  in
+  let file_targets = List.rev_map sources ~f:make_file_work in
+  let queue = Queue.create ~capacity:!target_count () in
+  let permute_and_enqueue targets =
+    List.permute targets ~random_state:(Random.State.make (Array.create ~len:1 0))
+    |> List.iter ~f:(Queue.enqueue queue)
+  in
+  permute_and_enqueue pname_targets ;
+  permute_and_enqueue file_targets ;
+  of_queue queue


 let if_restart_scheduler f =
--- a/infer/src/backend/SyntacticCallGraph.ml
+++ b/infer/src/backend/SyntacticCallGraph.ml
@ -138,8 +138,8 @@ let bottom_up sources : (TaskSchedulerTypes.target, string) ProcessPool.TaskGene
        decr remaining ;
        decr scheduled ;
        CallGraph.remove syntactic_call_graph pname
-    | File _ ->
-        L.die InternalError "Only Procnames are scheduled but File target was received"
+    | File _ | ProcUID _ ->
+        L.die InternalError "Only Procnames are scheduled but File/ProcUID target was received"
  in
  {remaining_tasks; is_empty; finished; next}

--- a/infer/src/base/ResultsDatabase.ml
+++ b/infer/src/base/ResultsDatabase.ml
@ -12,8 +12,8 @@ module L = Logging
 let results_dir_get_path entry = ResultsDirEntryName.get_path ~results_dir:Config.results_dir entry

 let procedures_schema prefix =
-  (* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support
-     it *)
+  (* [proc_uid] is meant to only be used with [Procname.to_unique_id]
+     [Marshal]ed values must never be used as keys. *)
  Printf.sprintf
    {|
      CREATE TABLE IF NOT EXISTS %sprocedures
@ -30,6 +30,7 @@ let procedures_schema prefix =


 let source_files_schema prefix =
+  (* [Marshal]ed values must never be used as keys. [source_file] has a custom serialiser *)
  Printf.sprintf
    {|
      CREATE TABLE IF NOT EXISTS %ssource_files
@ -43,6 +44,8 @@ let source_files_schema prefix =


 let specs_schema prefix =
+  (* [proc_uid] is meant to only be used with [Procname.to_unique_id]
+     [Marshal]ed values must never be used as keys. *)
  Printf.sprintf
    {|
      CREATE TABLE IF NOT EXISTS %sspecs