[sqlite] do merge in-memory

Summary:
- Instead of merging one target DB into the main DB at a time, merge all target DBs into an in-memory DB (thus, no writing) and then dump it into the main DB at the end.  This makes merging faster.
- When using the sqlite write daemon, there is no reason to drive the merge process from the master, sending each individual target to merge down the socket and doing one DB merge at a time.  Here we move all the DB merging logic in the daemon, and expose a single function that does it all.
- Refactor some common functionality (notably the `iter_infer_deps` function is now in `Utils`) and remove dead files.

This can be also done using a temporary DB (which is not limited to memory) but this showed worse perf in tests than the in-memory solution as well as the current state of things (! possibly Sqlite-version related?).

Reviewed By: skcho

Differential Revision: D17182862

fbshipit-source-id: a6f81937d
master
Nikos Gorogiannis 5 years ago committed by Facebook Github Bot
parent c131e2e669
commit 16819fa1a4

@ -7,28 +7,46 @@
open! IStd
module L = Logging
module YB = Yojson.Basic
module YBU = Yojson.Basic.Util
(** Module to merge the results of capture for different buck targets. *)
let merge_global_tenvs infer_deps_file =
let time0 = Mtime_clock.counter () in
let global_tenv = Tenv.create () in
let merge ~infer_out_src =
let merge infer_out_src =
let global_tenv_path =
infer_out_src ^/ Config.global_tenv_filename |> DB.filename_from_string
in
Tenv.read global_tenv_path
|> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv)
in
MergeResults.iter_infer_deps infer_deps_file ~f:merge ;
Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ;
Tenv.store_global global_tenv ;
L.progress "Merging type environments took %a@." Mtime.Span.pp (Mtime_clock.count time0)
let merge_changed_functions_json infer_out_src =
let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in
let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in
let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in
let changed_json = try YB.from_file changed_fs_file |> YBU.to_list with Sys_error _ -> [] in
let all_fs =
`List
(List.dedup_and_sort
~compare:(fun s1 s2 ->
match (s1, s2) with `String s1, `String s2 -> String.compare s1 s2 | _ -> 0 )
(List.append main_json changed_json))
in
YB.to_file main_changed_fs_file all_fs
let merge_changed_functions () =
L.progress "Merging changed functions files...@." ;
let infer_deps_file = Config.(results_dir ^/ buck_infer_deps_file_name) in
MergeResults.merge_buck_changed_functions infer_deps_file ;
Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge_changed_functions_json
infer_deps_file ;
L.progress "Done merging changed functions files@."
@ -36,7 +54,7 @@ let merge_captured_targets () =
let time0 = Mtime_clock.counter () in
L.progress "Merging captured Buck targets...@\n%!" ;
let infer_deps_file = Config.(results_dir ^/ buck_infer_deps_file_name) in
MergeResults.merge_buck_flavors_results infer_deps_file ;
DBWriter.merge ~infer_deps_file ;
if Config.genrule_master_mode then merge_global_tenvs infer_deps_file ;
L.progress "Merging captured Buck targets took %a@\n%!" Mtime.Span.pp (Mtime_clock.count time0)

@ -108,11 +108,11 @@ module Implementation = struct
NULL). All the rows that pass this filter are inserted/updated into the main table. *)
Sqlite3.exec db
{|
INSERT OR REPLACE INTO procedures
INSERT OR REPLACE INTO memdb.procedures
SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees
FROM (
attached.procedures AS sub
LEFT OUTER JOIN procedures AS main
LEFT OUTER JOIN memdb.procedures AS main
ON sub.proc_name = main.proc_name )
WHERE
main.attr_kind IS NULL
@ -127,7 +127,7 @@ module Implementation = struct
let db = ResultsDatabase.get_database () in
Sqlite3.exec db
{|
INSERT OR REPLACE INTO source_files
INSERT OR REPLACE INTO memdb.source_files
SELECT source_file, type_environment, integer_type_widths, procedure_names, 1
FROM attached.source_files
|}
@ -135,7 +135,14 @@ module Implementation = struct
~log:(Printf.sprintf "copying source_files of database '%s'" db_file)
let merge_dbs ~infer_out_src =
let copy_to_main db =
Sqlite3.exec db {| INSERT OR REPLACE INTO procedures SELECT * FROM memdb.procedures |}
|> SqliteUtils.check_result_code db ~log:"Copying procedures into main db" ;
Sqlite3.exec db {| INSERT OR REPLACE INTO source_files SELECT * FROM memdb.source_files |}
|> SqliteUtils.check_result_code db ~log:"Copying source_files into main db"
let merge_db infer_out_src =
let db_file = infer_out_src ^/ ResultsDatabase.database_filename in
let main_db = ResultsDatabase.get_database () in
Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file)
@ -145,8 +152,18 @@ module Implementation = struct
merge_source_files_table ~db_file ;
Sqlite3.exec main_db "DETACH attached"
|> SqliteUtils.check_result_code main_db
~log:(Printf.sprintf "detaching database '%s'" db_file) ;
()
~log:(Printf.sprintf "detaching database '%s'" db_file)
let merge infer_deps_file =
let main_db = ResultsDatabase.get_database () in
Sqlite3.exec main_db "ATTACH ':memory:' AS memdb"
|> SqliteUtils.check_result_code main_db ~log:"attaching memdb" ;
ResultsDatabase.create_tables ~prefix:"memdb." main_db ;
Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge_db infer_deps_file ;
copy_to_main main_db ;
Sqlite3.exec main_db "DETACH memdb"
|> SqliteUtils.check_result_code main_db ~log:"detaching memdb"
let canonicalize () =
@ -157,9 +174,8 @@ module Implementation = struct
let reset_capture_tables () =
let db = ResultsDatabase.get_database () in
SqliteUtils.exec db ~log:"drop procedures table" ~stmt:"DROP TABLE procedures" ;
ResultsDatabase.create_procedures_table db ;
SqliteUtils.exec db ~log:"drop source_files table" ~stmt:"DROP TABLE source_files" ;
ResultsDatabase.create_source_files_table db
ResultsDatabase.create_tables db
end
module Command = struct
@ -178,7 +194,7 @@ module Command = struct
; integer_type_widths: Sqlite3.Data.t
; proc_names: Sqlite3.Data.t }
| MarkAllSourceFilesStale
| MergeDBs of {infer_out_src: string}
| Merge of {infer_deps_file: string}
| Vacuum
| ResetCaptureTables
| Handshake
@ -191,8 +207,8 @@ module Command = struct
"AddSourceFile"
| MarkAllSourceFilesStale ->
"MarkAllSourceFilesStale"
| MergeDBs _ ->
"MergeDBs"
| Merge _ ->
"Merge"
| Vacuum ->
"Vacuum"
| ResetCaptureTables ->
@ -213,8 +229,8 @@ module Command = struct
Implementation.add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names
| MarkAllSourceFilesStale ->
Implementation.mark_all_source_files_stale ()
| MergeDBs {infer_out_src} ->
Implementation.merge_dbs ~infer_out_src
| Merge {infer_deps_file} ->
Implementation.merge infer_deps_file
| Vacuum ->
Implementation.canonicalize ()
| ResetCaptureTables ->
@ -325,7 +341,7 @@ let add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names =
let mark_all_source_files_stale () = perform Command.MarkAllSourceFilesStale
let merge_dbs ~infer_out_src = Command.MergeDBs {infer_out_src} |> perform
let merge ~infer_deps_file = Command.Merge {infer_deps_file} |> perform
let canonicalize () = perform Command.Vacuum

@ -30,7 +30,7 @@ val add_source_file :
val mark_all_source_files_stale : unit -> unit
val merge_dbs : infer_out_src:string -> unit
val merge : infer_deps_file:string -> unit
val canonicalize : unit -> unit
(** put the database on disk in deterministic form *)

@ -1,52 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module L = Logging
module YB = Yojson.Basic
module YBU = Yojson.Basic.Util
let merge_changed_functions_json ~infer_out_src =
let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in
let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in
let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in
let changed_json = try YB.from_file changed_fs_file |> YBU.to_list with Sys_error _ -> [] in
let all_fs =
`List
(List.dedup_and_sort
~compare:(fun s1 s2 ->
match (s1, s2) with `String s1, `String s2 -> String.compare s1 s2 | _ -> 0 )
(List.append main_json changed_json))
in
YB.to_file main_changed_fs_file all_fs
let iter_infer_deps infer_deps_file ~f =
let one_line line =
match String.split ~on:'\t' line with
| [_; _; target_results_dir] ->
let infer_out_src =
if Filename.is_relative target_results_dir then
Filename.dirname (Config.project_root ^/ "buck-out") ^/ target_results_dir
else target_results_dir
in
f ~infer_out_src
| _ ->
assert false
in
match Utils.read_file infer_deps_file with
| Ok lines ->
List.iter ~f:one_line lines
| Error error ->
L.internal_error "Couldn't read deps file '%s': %s" infer_deps_file error
let merge_buck_flavors_results infer_deps_file =
iter_infer_deps infer_deps_file ~f:DBWriter.merge_dbs
let merge_buck_changed_functions infer_deps_file =
iter_infer_deps infer_deps_file ~f:merge_changed_functions_json

@ -1,18 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
val merge_buck_flavors_results : string -> unit
(** Merge the results from sub-invocations of infer inside buck-out/. Takes as argument the
infer_deps file. *)
val merge_buck_changed_functions : string -> unit
(** Merge the changed functions from sub-invocations of infer inside buck-out/. Takes as argument the
infer_deps file. *)
val iter_infer_deps : string -> f:(infer_out_src:string -> unit) -> unit

@ -12,8 +12,9 @@ let database_filename = "results.db"
let database_fullpath = Config.results_dir ^/ database_filename
let procedures_schema =
{|CREATE TABLE IF NOT EXISTS procedures
let procedures_schema prefix =
Printf.sprintf
{|CREATE TABLE IF NOT EXISTS %sprocedures
( proc_name TEXT PRIMARY KEY
, proc_name_hum TEXT
, attr_kind INTEGER NOT NULL
@ -22,27 +23,35 @@ let procedures_schema =
, cfg BLOB
, callees BLOB NOT NULL
)|}
prefix
let source_files_schema =
{|CREATE TABLE IF NOT EXISTS source_files
let source_files_schema prefix =
Printf.sprintf
{|CREATE TABLE IF NOT EXISTS %ssource_files
( source_file TEXT PRIMARY KEY
, type_environment BLOB NOT NULL
, integer_type_widths BLOB
, procedure_names BLOB NOT NULL
, freshly_captured INT NOT NULL )|}
prefix
let schema_hum = Printf.sprintf "%s;\n%s" procedures_schema source_files_schema
let schema_hum = Printf.sprintf "%s;\n%s" (procedures_schema "") (source_files_schema "")
let create_procedures_table db =
let create_procedures_table ~prefix db =
(* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support
it *)
SqliteUtils.exec db ~log:"creating procedures table" ~stmt:procedures_schema
SqliteUtils.exec db ~log:"creating procedures table" ~stmt:(procedures_schema prefix)
let create_source_files_table db =
SqliteUtils.exec db ~log:"creating source_files table" ~stmt:source_files_schema
let create_source_files_table ~prefix db =
SqliteUtils.exec db ~log:"creating source_files table" ~stmt:(source_files_schema prefix)
let create_tables ?(prefix = "") db =
create_procedures_table ~prefix db ;
create_source_files_table ~prefix db
let create_db () =
@ -50,8 +59,7 @@ let create_db () =
let db = Sqlite3.db_open ~mutex:`FULL temp_db in
SqliteUtils.exec db ~log:"sqlite page size"
~stmt:(Printf.sprintf "PRAGMA page_size=%d" Config.sqlite_page_size) ;
create_procedures_table db ;
create_source_files_table db ;
create_tables db ;
(* This should be the default but better be sure, otherwise we cannot access the database concurrently. This has to happen before setting WAL mode. *)
SqliteUtils.exec db ~log:"locking mode=NORMAL" ~stmt:"PRAGMA locking_mode=NORMAL" ;
( match Config.sqlite_vfs with

@ -16,16 +16,12 @@ val database_fullpath : string
val schema_hum : string
(** some human-readable string describing the tables *)
val create_tables : ?prefix:string -> Sqlite3.db -> unit
val get_database : unit -> Sqlite3.db
(** The results database. You should always use this function to access the database, as the
connection to it may change during the execution (see [new_database_connection]). *)
val create_procedures_table : Sqlite3.db -> unit
(** create a procedures table in the database*)
val create_source_files_table : Sqlite3.db -> unit
(** create a source files table in the database*)
val new_database_connection : unit -> unit
(** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *)

@ -418,3 +418,24 @@ let get_available_memory_MB () =
in
if Sys.file_exists proc_meminfo <> `Yes then None
else with_file_in proc_meminfo ~f:scan_for_expected_output
let iter_infer_deps ~project_root ~f infer_deps_file =
let buck_root = project_root ^/ "buck-out" in
let one_line line =
match String.split ~on:'\t' line with
| [_; _; target_results_dir] ->
let infer_out_src =
if Filename.is_relative target_results_dir then
Filename.dirname (buck_root ^/ target_results_dir)
else target_results_dir
in
f infer_out_src
| _ ->
Die.die InternalError "Couldn't parse deps file '%s', line: %s" infer_deps_file line
in
match read_file infer_deps_file with
| Ok lines ->
List.iter ~f:one_line lines
| Error error ->
Die.die InternalError "Couldn't read deps file '%s': %s" infer_deps_file error

@ -143,3 +143,7 @@ val do_in_dir : dir:string -> f:(unit -> 'a) -> 'a
val get_available_memory_MB : unit -> int option
(** On Linux systems, return [Some x] where [MemAvailable x] is in [/proc/meminfo].
Returns [None] in all other cases. *)
val iter_infer_deps : project_root:string -> f:(string -> unit) -> string -> unit
(** Parse each line of the given infer_deps.txt file (split on tabs, assume 3 elements per line)
and run [f] on the third element. [project_root] is an argument to avoid dependency cycles. *)

Loading…
Cancel
Save