From 16819fa1a481e3d2b238c109bcf55270ab2bd65e Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Fri, 20 Sep 2019 04:01:41 -0700 Subject: [PATCH] [sqlite] do merge in-memory Summary: - Instead of merging one target DB into the main DB at a time, merge all target DBs into an in-memory DB (thus, no writing) and then dump it into the main DB at the end. This makes merging faster. - When using the sqlite write daemon, there is no reason to drive the merge process from the master, sending each individual target to merge down the socket and doing one DB merge at a time. Here we move all the DB merging logic in the daemon, and expose a single function that does it all. - Refactor some common functionality (notably the `iter_infer_deps` function is now in `Utils`) and remove dead files. This can be also done using a temporary DB (which is not limited to memory) but this showed worse perf in tests than the in-memory solution as well as the current state of things (! possibly Sqlite-version related?). Reviewed By: skcho Differential Revision: D17182862 fbshipit-source-id: a6f81937d --- infer/src/backend/mergeCapture.ml | 26 ++++++++++++--- infer/src/base/DBWriter.ml | 44 +++++++++++++++++-------- infer/src/base/DBWriter.mli | 2 +- infer/src/base/MergeResults.ml | 52 ------------------------------ infer/src/base/MergeResults.mli | 18 ----------- infer/src/base/ResultsDatabase.ml | 30 ++++++++++------- infer/src/base/ResultsDatabase.mli | 8 ++--- infer/src/base/Utils.ml | 21 ++++++++++++ infer/src/base/Utils.mli | 4 +++ 9 files changed, 99 insertions(+), 106 deletions(-) delete mode 100644 infer/src/base/MergeResults.ml delete mode 100644 infer/src/base/MergeResults.mli diff --git a/infer/src/backend/mergeCapture.ml b/infer/src/backend/mergeCapture.ml index a4ed583b7..f6913e3ec 100644 --- a/infer/src/backend/mergeCapture.ml +++ b/infer/src/backend/mergeCapture.ml @@ -7,28 +7,46 @@ open! IStd module L = Logging +module YB = Yojson.Basic +module YBU = Yojson.Basic.Util (** Module to merge the results of capture for different buck targets. *) let merge_global_tenvs infer_deps_file = let time0 = Mtime_clock.counter () in let global_tenv = Tenv.create () in - let merge ~infer_out_src = + let merge infer_out_src = let global_tenv_path = infer_out_src ^/ Config.global_tenv_filename |> DB.filename_from_string in Tenv.read global_tenv_path |> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv) in - MergeResults.iter_infer_deps infer_deps_file ~f:merge ; + Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge infer_deps_file ; Tenv.store_global global_tenv ; L.progress "Merging type environments took %a@." Mtime.Span.pp (Mtime_clock.count time0) +let merge_changed_functions_json infer_out_src = + let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in + let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in + let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in + let changed_json = try YB.from_file changed_fs_file |> YBU.to_list with Sys_error _ -> [] in + let all_fs = + `List + (List.dedup_and_sort + ~compare:(fun s1 s2 -> + match (s1, s2) with `String s1, `String s2 -> String.compare s1 s2 | _ -> 0 ) + (List.append main_json changed_json)) + in + YB.to_file main_changed_fs_file all_fs + + let merge_changed_functions () = L.progress "Merging changed functions files...@." ; let infer_deps_file = Config.(results_dir ^/ buck_infer_deps_file_name) in - MergeResults.merge_buck_changed_functions infer_deps_file ; + Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge_changed_functions_json + infer_deps_file ; L.progress "Done merging changed functions files@." @@ -36,7 +54,7 @@ let merge_captured_targets () = let time0 = Mtime_clock.counter () in L.progress "Merging captured Buck targets...@\n%!" ; let infer_deps_file = Config.(results_dir ^/ buck_infer_deps_file_name) in - MergeResults.merge_buck_flavors_results infer_deps_file ; + DBWriter.merge ~infer_deps_file ; if Config.genrule_master_mode then merge_global_tenvs infer_deps_file ; L.progress "Merging captured Buck targets took %a@\n%!" Mtime.Span.pp (Mtime_clock.count time0) diff --git a/infer/src/base/DBWriter.ml b/infer/src/base/DBWriter.ml index 1753d7b2d..1356c233f 100644 --- a/infer/src/base/DBWriter.ml +++ b/infer/src/base/DBWriter.ml @@ -108,11 +108,11 @@ module Implementation = struct NULL). All the rows that pass this filter are inserted/updated into the main table. *) Sqlite3.exec db {| - INSERT OR REPLACE INTO procedures + INSERT OR REPLACE INTO memdb.procedures SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees FROM ( attached.procedures AS sub - LEFT OUTER JOIN procedures AS main + LEFT OUTER JOIN memdb.procedures AS main ON sub.proc_name = main.proc_name ) WHERE main.attr_kind IS NULL @@ -127,7 +127,7 @@ module Implementation = struct let db = ResultsDatabase.get_database () in Sqlite3.exec db {| - INSERT OR REPLACE INTO source_files + INSERT OR REPLACE INTO memdb.source_files SELECT source_file, type_environment, integer_type_widths, procedure_names, 1 FROM attached.source_files |} @@ -135,7 +135,14 @@ module Implementation = struct ~log:(Printf.sprintf "copying source_files of database '%s'" db_file) - let merge_dbs ~infer_out_src = + let copy_to_main db = + Sqlite3.exec db {| INSERT OR REPLACE INTO procedures SELECT * FROM memdb.procedures |} + |> SqliteUtils.check_result_code db ~log:"Copying procedures into main db" ; + Sqlite3.exec db {| INSERT OR REPLACE INTO source_files SELECT * FROM memdb.source_files |} + |> SqliteUtils.check_result_code db ~log:"Copying source_files into main db" + + + let merge_db infer_out_src = let db_file = infer_out_src ^/ ResultsDatabase.database_filename in let main_db = ResultsDatabase.get_database () in Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file) @@ -145,8 +152,18 @@ module Implementation = struct merge_source_files_table ~db_file ; Sqlite3.exec main_db "DETACH attached" |> SqliteUtils.check_result_code main_db - ~log:(Printf.sprintf "detaching database '%s'" db_file) ; - () + ~log:(Printf.sprintf "detaching database '%s'" db_file) + + + let merge infer_deps_file = + let main_db = ResultsDatabase.get_database () in + Sqlite3.exec main_db "ATTACH ':memory:' AS memdb" + |> SqliteUtils.check_result_code main_db ~log:"attaching memdb" ; + ResultsDatabase.create_tables ~prefix:"memdb." main_db ; + Utils.iter_infer_deps ~project_root:Config.project_root ~f:merge_db infer_deps_file ; + copy_to_main main_db ; + Sqlite3.exec main_db "DETACH memdb" + |> SqliteUtils.check_result_code main_db ~log:"detaching memdb" let canonicalize () = @@ -157,9 +174,8 @@ module Implementation = struct let reset_capture_tables () = let db = ResultsDatabase.get_database () in SqliteUtils.exec db ~log:"drop procedures table" ~stmt:"DROP TABLE procedures" ; - ResultsDatabase.create_procedures_table db ; SqliteUtils.exec db ~log:"drop source_files table" ~stmt:"DROP TABLE source_files" ; - ResultsDatabase.create_source_files_table db + ResultsDatabase.create_tables db end module Command = struct @@ -178,7 +194,7 @@ module Command = struct ; integer_type_widths: Sqlite3.Data.t ; proc_names: Sqlite3.Data.t } | MarkAllSourceFilesStale - | MergeDBs of {infer_out_src: string} + | Merge of {infer_deps_file: string} | Vacuum | ResetCaptureTables | Handshake @@ -191,8 +207,8 @@ module Command = struct "AddSourceFile" | MarkAllSourceFilesStale -> "MarkAllSourceFilesStale" - | MergeDBs _ -> - "MergeDBs" + | Merge _ -> + "Merge" | Vacuum -> "Vacuum" | ResetCaptureTables -> @@ -213,8 +229,8 @@ module Command = struct Implementation.add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names | MarkAllSourceFilesStale -> Implementation.mark_all_source_files_stale () - | MergeDBs {infer_out_src} -> - Implementation.merge_dbs ~infer_out_src + | Merge {infer_deps_file} -> + Implementation.merge infer_deps_file | Vacuum -> Implementation.canonicalize () | ResetCaptureTables -> @@ -325,7 +341,7 @@ let add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names = let mark_all_source_files_stale () = perform Command.MarkAllSourceFilesStale -let merge_dbs ~infer_out_src = Command.MergeDBs {infer_out_src} |> perform +let merge ~infer_deps_file = Command.Merge {infer_deps_file} |> perform let canonicalize () = perform Command.Vacuum diff --git a/infer/src/base/DBWriter.mli b/infer/src/base/DBWriter.mli index a5798226f..6c50db234 100644 --- a/infer/src/base/DBWriter.mli +++ b/infer/src/base/DBWriter.mli @@ -30,7 +30,7 @@ val add_source_file : val mark_all_source_files_stale : unit -> unit -val merge_dbs : infer_out_src:string -> unit +val merge : infer_deps_file:string -> unit val canonicalize : unit -> unit (** put the database on disk in deterministic form *) diff --git a/infer/src/base/MergeResults.ml b/infer/src/base/MergeResults.ml deleted file mode 100644 index 18c9f482e..000000000 --- a/infer/src/base/MergeResults.ml +++ /dev/null @@ -1,52 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) -open! IStd -module L = Logging -module YB = Yojson.Basic -module YBU = Yojson.Basic.Util - -let merge_changed_functions_json ~infer_out_src = - let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in - let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in - let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in - let changed_json = try YB.from_file changed_fs_file |> YBU.to_list with Sys_error _ -> [] in - let all_fs = - `List - (List.dedup_and_sort - ~compare:(fun s1 s2 -> - match (s1, s2) with `String s1, `String s2 -> String.compare s1 s2 | _ -> 0 ) - (List.append main_json changed_json)) - in - YB.to_file main_changed_fs_file all_fs - - -let iter_infer_deps infer_deps_file ~f = - let one_line line = - match String.split ~on:'\t' line with - | [_; _; target_results_dir] -> - let infer_out_src = - if Filename.is_relative target_results_dir then - Filename.dirname (Config.project_root ^/ "buck-out") ^/ target_results_dir - else target_results_dir - in - f ~infer_out_src - | _ -> - assert false - in - match Utils.read_file infer_deps_file with - | Ok lines -> - List.iter ~f:one_line lines - | Error error -> - L.internal_error "Couldn't read deps file '%s': %s" infer_deps_file error - - -let merge_buck_flavors_results infer_deps_file = - iter_infer_deps infer_deps_file ~f:DBWriter.merge_dbs - - -let merge_buck_changed_functions infer_deps_file = - iter_infer_deps infer_deps_file ~f:merge_changed_functions_json diff --git a/infer/src/base/MergeResults.mli b/infer/src/base/MergeResults.mli deleted file mode 100644 index 5ca1f26f3..000000000 --- a/infer/src/base/MergeResults.mli +++ /dev/null @@ -1,18 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd - -val merge_buck_flavors_results : string -> unit -(** Merge the results from sub-invocations of infer inside buck-out/. Takes as argument the - infer_deps file. *) - -val merge_buck_changed_functions : string -> unit -(** Merge the changed functions from sub-invocations of infer inside buck-out/. Takes as argument the - infer_deps file. *) - -val iter_infer_deps : string -> f:(infer_out_src:string -> unit) -> unit diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index 7ec9194e0..f8361737d 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -12,8 +12,9 @@ let database_filename = "results.db" let database_fullpath = Config.results_dir ^/ database_filename -let procedures_schema = - {|CREATE TABLE IF NOT EXISTS procedures +let procedures_schema prefix = + Printf.sprintf + {|CREATE TABLE IF NOT EXISTS %sprocedures ( proc_name TEXT PRIMARY KEY , proc_name_hum TEXT , attr_kind INTEGER NOT NULL @@ -22,27 +23,35 @@ let procedures_schema = , cfg BLOB , callees BLOB NOT NULL )|} + prefix -let source_files_schema = - {|CREATE TABLE IF NOT EXISTS source_files +let source_files_schema prefix = + Printf.sprintf + {|CREATE TABLE IF NOT EXISTS %ssource_files ( source_file TEXT PRIMARY KEY , type_environment BLOB NOT NULL , integer_type_widths BLOB , procedure_names BLOB NOT NULL , freshly_captured INT NOT NULL )|} + prefix -let schema_hum = Printf.sprintf "%s;\n%s" procedures_schema source_files_schema +let schema_hum = Printf.sprintf "%s;\n%s" (procedures_schema "") (source_files_schema "") -let create_procedures_table db = +let create_procedures_table ~prefix db = (* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support it *) - SqliteUtils.exec db ~log:"creating procedures table" ~stmt:procedures_schema + SqliteUtils.exec db ~log:"creating procedures table" ~stmt:(procedures_schema prefix) -let create_source_files_table db = - SqliteUtils.exec db ~log:"creating source_files table" ~stmt:source_files_schema +let create_source_files_table ~prefix db = + SqliteUtils.exec db ~log:"creating source_files table" ~stmt:(source_files_schema prefix) + + +let create_tables ?(prefix = "") db = + create_procedures_table ~prefix db ; + create_source_files_table ~prefix db let create_db () = @@ -50,8 +59,7 @@ let create_db () = let db = Sqlite3.db_open ~mutex:`FULL temp_db in SqliteUtils.exec db ~log:"sqlite page size" ~stmt:(Printf.sprintf "PRAGMA page_size=%d" Config.sqlite_page_size) ; - create_procedures_table db ; - create_source_files_table db ; + create_tables db ; (* This should be the default but better be sure, otherwise we cannot access the database concurrently. This has to happen before setting WAL mode. *) SqliteUtils.exec db ~log:"locking mode=NORMAL" ~stmt:"PRAGMA locking_mode=NORMAL" ; ( match Config.sqlite_vfs with diff --git a/infer/src/base/ResultsDatabase.mli b/infer/src/base/ResultsDatabase.mli index a0d2a9b27..3d1aec1ff 100644 --- a/infer/src/base/ResultsDatabase.mli +++ b/infer/src/base/ResultsDatabase.mli @@ -16,16 +16,12 @@ val database_fullpath : string val schema_hum : string (** some human-readable string describing the tables *) +val create_tables : ?prefix:string -> Sqlite3.db -> unit + val get_database : unit -> Sqlite3.db (** The results database. You should always use this function to access the database, as the connection to it may change during the execution (see [new_database_connection]). *) -val create_procedures_table : Sqlite3.db -> unit -(** create a procedures table in the database*) - -val create_source_files_table : Sqlite3.db -> unit -(** create a source files table in the database*) - val new_database_connection : unit -> unit (** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *) diff --git a/infer/src/base/Utils.ml b/infer/src/base/Utils.ml index 03f3562af..88f22ca19 100644 --- a/infer/src/base/Utils.ml +++ b/infer/src/base/Utils.ml @@ -418,3 +418,24 @@ let get_available_memory_MB () = in if Sys.file_exists proc_meminfo <> `Yes then None else with_file_in proc_meminfo ~f:scan_for_expected_output + + +let iter_infer_deps ~project_root ~f infer_deps_file = + let buck_root = project_root ^/ "buck-out" in + let one_line line = + match String.split ~on:'\t' line with + | [_; _; target_results_dir] -> + let infer_out_src = + if Filename.is_relative target_results_dir then + Filename.dirname (buck_root ^/ target_results_dir) + else target_results_dir + in + f infer_out_src + | _ -> + Die.die InternalError "Couldn't parse deps file '%s', line: %s" infer_deps_file line + in + match read_file infer_deps_file with + | Ok lines -> + List.iter ~f:one_line lines + | Error error -> + Die.die InternalError "Couldn't read deps file '%s': %s" infer_deps_file error diff --git a/infer/src/base/Utils.mli b/infer/src/base/Utils.mli index 7b95451fd..e4c91fa30 100644 --- a/infer/src/base/Utils.mli +++ b/infer/src/base/Utils.mli @@ -143,3 +143,7 @@ val do_in_dir : dir:string -> f:(unit -> 'a) -> 'a val get_available_memory_MB : unit -> int option (** On Linux systems, return [Some x] where [MemAvailable x] is in [/proc/meminfo]. Returns [None] in all other cases. *) + +val iter_infer_deps : project_root:string -> f:(string -> unit) -> string -> unit +(** Parse each line of the given infer_deps.txt file (split on tabs, assume 3 elements per line) + and run [f] on the third element. [project_root] is an argument to avoid dependency cycles. *)