[scheduler][restart] use proc_uids instead of serialised procnames as targets

Summary:
Eliminate the need to serialise procnames when sending work from the restart scheduler to the workers, by sending the proc_uid instead.  This is (much) shorter than the byte representation of the proc_name and it's the primary DB key of the procedures table, so it can be used by the worker to obtain the full procname.

Also, reduce GC churn by using folds in the scheduler startup instead of copying lists over and over.

Reviewed By: jberdine

Differential Revision: D23566131

fbshipit-source-id: 1472aa990
master
Nikos Gorogiannis 4 years ago committed by Facebook GitHub Bot
parent 18ac47210a
commit 386f303b1d

@ -10,4 +10,8 @@ open! IStd
analyzed by another process *) analyzed by another process *)
exception ProcnameAlreadyLocked of {dependency_filename: string} exception ProcnameAlreadyLocked of {dependency_filename: string}
type target = Procname of Procname.t | File of SourceFile.t type target =
| Procname of Procname.t
| File of SourceFile.t
| ProcUID of string
(** matches primary key of [procedures] and [specs] tables; see [ResultsDatabase.ml] *)

@ -14,8 +14,8 @@ let make sources =
|> ProcessPool.TaskGenerator.of_list |> ProcessPool.TaskGenerator.of_list
in in
let next x = let next x =
let res = gen.next x in gen.next x
(* see defn of gen above to see why res should never match Some (Procname _) *) (* see defn of gen above to see why res should never match Some (Procname _) *)
match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v |> Option.map ~f:(function File _ as v -> v | Procname _ | ProcUID _ -> assert false)
in in
{gen with next} {gen with next}

@ -24,6 +24,25 @@ let clear_caches () =
clear_caches_except_lrus () clear_caches_except_lrus ()
let proc_name_of_uid =
let statement =
ResultsDatabase.register_statement "SELECT proc_name FROM procedures WHERE proc_uid = :k"
in
fun proc_uid ->
ResultsDatabase.with_registered_statement statement ~f:(fun db stmt ->
Sqlite3.bind stmt 1 (Sqlite3.Data.TEXT proc_uid)
|> SqliteUtils.check_result_code db ~log:"proc_name of proc_uid bind proc_uid" ;
let result_option =
SqliteUtils.result_option ~finalize:false db ~log:"proc_name of proc_uid" stmt
~read_row:(fun stmt -> Sqlite3.column stmt 0 |> Procname.SQLite.deserialize)
in
match result_option with
| Some proc_name ->
proc_name
| None ->
L.die InternalError "Requested non-existent proc_uid: %s@." proc_uid )
let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer = let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
let analyze_source_file exe_env source_file = let analyze_source_file exe_env source_file =
if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ; if Topl.is_active () then DB.Results_dir.init (Topl.sourcefile ()) ;
@ -62,6 +81,8 @@ let analyze_target : (TaskSchedulerTypes.target, string) Tasks.doer =
match target with match target with
| Procname procname -> | Procname procname ->
analyze_proc_name exe_env procname analyze_proc_name exe_env procname
| ProcUID proc_uid ->
proc_name_of_uid proc_uid |> analyze_proc_name exe_env
| File source_file -> | File source_file ->
analyze_source_file exe_env source_file analyze_source_file exe_env source_file

@ -9,8 +9,7 @@ module L = Logging
type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option} type work_with_dependency = {work: TaskSchedulerTypes.target; dependency_filename_opt: string option}
let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGenerator.t = let of_queue content : ('a, string) ProcessPool.TaskGenerator.t =
let content = Queue.of_list lst in
let remaining = ref (Queue.length content) in let remaining = ref (Queue.length content) in
let remaining_tasks () = !remaining in let remaining_tasks () = !remaining in
let is_empty () = Int.equal !remaining 0 in let is_empty () = Int.equal !remaining 0 in
@ -34,18 +33,29 @@ let of_list (lst : work_with_dependency list) : ('a, string) ProcessPool.TaskGen
let make sources = let make sources =
let pnames = let target_count = ref 0 in
List.map sources ~f:SourceFiles.proc_names_of_source let cons_proc_uid_work acc procname =
|> List.concat incr target_count ;
|> List.rev_map ~f:(fun procname -> let proc_uid = Procname.to_unique_id procname in
{work= TaskSchedulerTypes.Procname procname; dependency_filename_opt= None} ) {work= TaskSchedulerTypes.ProcUID proc_uid; dependency_filename_opt= None} :: acc
in in
let files = let pname_targets =
List.map sources ~f:(fun file -> List.fold sources ~init:[] ~f:(fun init source ->
{work= TaskSchedulerTypes.File file; dependency_filename_opt= None} ) SourceFiles.proc_names_of_source source |> List.fold ~init ~f:cons_proc_uid_work )
in in
let permute = List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) in let make_file_work file =
permute pnames @ permute files |> of_list incr target_count ;
{work= TaskSchedulerTypes.File file; dependency_filename_opt= None}
in
let file_targets = List.rev_map sources ~f:make_file_work in
let queue = Queue.create ~capacity:!target_count () in
let permute_and_enqueue targets =
List.permute targets ~random_state:(Random.State.make (Array.create ~len:1 0))
|> List.iter ~f:(Queue.enqueue queue)
in
permute_and_enqueue pname_targets ;
permute_and_enqueue file_targets ;
of_queue queue
let if_restart_scheduler f = let if_restart_scheduler f =

@ -138,8 +138,8 @@ let bottom_up sources : (TaskSchedulerTypes.target, string) ProcessPool.TaskGene
decr remaining ; decr remaining ;
decr scheduled ; decr scheduled ;
CallGraph.remove syntactic_call_graph pname CallGraph.remove syntactic_call_graph pname
| File _ -> | File _ | ProcUID _ ->
L.die InternalError "Only Procnames are scheduled but File target was received" L.die InternalError "Only Procnames are scheduled but File/ProcUID target was received"
in in
{remaining_tasks; is_empty; finished; next} {remaining_tasks; is_empty; finished; next}

@ -12,8 +12,8 @@ module L = Logging
let results_dir_get_path entry = ResultsDirEntryName.get_path ~results_dir:Config.results_dir entry let results_dir_get_path entry = ResultsDirEntryName.get_path ~results_dir:Config.results_dir entry
let procedures_schema prefix = let procedures_schema prefix =
(* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support (* [proc_uid] is meant to only be used with [Procname.to_unique_id]
it *) [Marshal]ed values must never be used as keys. *)
Printf.sprintf Printf.sprintf
{| {|
CREATE TABLE IF NOT EXISTS %sprocedures CREATE TABLE IF NOT EXISTS %sprocedures
@ -30,6 +30,7 @@ let procedures_schema prefix =
let source_files_schema prefix = let source_files_schema prefix =
(* [Marshal]ed values must never be used as keys. [source_file] has a custom serialiser *)
Printf.sprintf Printf.sprintf
{| {|
CREATE TABLE IF NOT EXISTS %ssource_files CREATE TABLE IF NOT EXISTS %ssource_files
@ -43,6 +44,8 @@ let source_files_schema prefix =
let specs_schema prefix = let specs_schema prefix =
(* [proc_uid] is meant to only be used with [Procname.to_unique_id]
[Marshal]ed values must never be used as keys. *)
Printf.sprintf Printf.sprintf
{| {|
CREATE TABLE IF NOT EXISTS %sspecs CREATE TABLE IF NOT EXISTS %sspecs

Loading…
Cancel
Save