From 83aea33c6870722a1d1565e490153d295326eba2 Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Mon, 2 Sep 2019 05:18:01 -0700 Subject: [PATCH] [sqlite] move all writes to one module Summary: Write contention is becoming a problem in parallel capture (eg when make runs with high parallelism) or when analysis writes CFGs to the DB in parallel (eg when analysing blocks in ObC). This is believed to lead to BUSY errors in Sqlite. This is step 1 of a process where all writes are cordoned-off in one module, and fixing the interface for that module. Reviewed By: skcho Differential Revision: D16985034 fbshipit-source-id: 3d7ce381b --- infer/src/IR/Attributes.ml | 57 ++--------- infer/src/IR/SourceFiles.ml | 41 ++------ infer/src/backend/mergeCapture.ml | 2 +- infer/src/base/DBWriter.ml | 148 +++++++++++++++++++++++++++++ infer/src/base/DBWriter.mli | 33 +++++++ infer/src/base/MergeResults.ml | 55 +---------- infer/src/base/MergeResults.mli | 2 +- infer/src/base/ResultsDatabase.ml | 5 - infer/src/base/ResultsDatabase.mli | 3 - infer/src/integration/Driver.ml | 2 +- 10 files changed, 202 insertions(+), 146 deletions(-) create mode 100644 infer/src/base/DBWriter.ml create mode 100644 infer/src/base/DBWriter.mli diff --git a/infer/src/IR/Attributes.ml b/infer/src/IR/Attributes.ml index 13f9175e3..2f7b981b3 100644 --- a/infer/src/IR/Attributes.ml +++ b/infer/src/IR/Attributes.ml @@ -32,56 +32,13 @@ let proc_kind_of_attr (proc_attributes : ProcAttributes.t) = else ProcUndefined -let replace_statement = - (* The innermost SELECT returns either the current attributes_kind and source_file associated with - the given proc name, or default values of (-1,""). These default values have the property that - they are always "less than" any legit value. More precisely, MAX ensures that some value is - returned even if there is no row satisfying WHERE (we'll get NULL in that case, the value in - the row otherwise). COALESCE then returns the first non-NULL value, which will be either the - value of the row corresponding to that pname in the DB, or the default if no such row exists. - - The next (second-outermost) SELECT filters out that value if it is "more defined" than the ones - we would like to insert (which will never be the case if the default values are returned). If - not, it returns a trivial row (consisting solely of NULL since we don't use its values) and the - INSERT OR REPLACE will proceed and insert or update the values stored into the DB for that - pname. *) - (* TRICK: use the source file to be more deterministic in case the same procedure name is defined - in several files *) - (* TRICK: older versions of sqlite (prior to version 3.15.0 (2016-10-14)) do not support row - values so the lexicographic ordering for (:akind, :sfile) is done by hand *) - ResultsDatabase.register_statement - {| -INSERT OR REPLACE INTO procedures -SELECT :pname, :proc_name_hum, :akind, :sfile, :pattr, :cfg, :callees -FROM ( - SELECT NULL - FROM ( - SELECT COALESCE(MAX(attr_kind),-1) AS attr_kind, - COALESCE(MAX(source_file),"") AS source_file - FROM procedures - WHERE proc_name = :pname ) - WHERE attr_kind < :akind - OR (attr_kind = :akind AND source_file <= :sfile) )|} - - -let replace pname pname_blob akind loc_file attr_blob proc_desc callees = - ResultsDatabase.with_registered_statement replace_statement ~f:(fun db replace_stmt -> - Sqlite3.bind replace_stmt 1 (* :pname *) pname_blob - |> SqliteUtils.check_result_code db ~log:"replace bind pname" ; - Sqlite3.bind replace_stmt 2 - (* :proc_name_hum *) (Sqlite3.Data.TEXT (Typ.Procname.to_string pname)) - |> SqliteUtils.check_result_code db ~log:"replace bind proc_name_hum" ; - Sqlite3.bind replace_stmt 3 (* :akind *) (Sqlite3.Data.INT (int64_of_attributes_kind akind)) - |> SqliteUtils.check_result_code db ~log:"replace bind attribute kind" ; - Sqlite3.bind replace_stmt 4 (* :sfile *) loc_file - |> SqliteUtils.check_result_code db ~log:"replace bind source file" ; - Sqlite3.bind replace_stmt 5 (* :pattr *) attr_blob - |> SqliteUtils.check_result_code db ~log:"replace bind proc attributes" ; - Sqlite3.bind replace_stmt 6 (* :cfg *) (Procdesc.SQLite.serialize proc_desc) - |> SqliteUtils.check_result_code db ~log:"replace bind cfg" ; - Sqlite3.bind replace_stmt 7 (* :callees *) (Typ.Procname.SQLiteList.serialize callees) - |> SqliteUtils.check_result_code db ~log:"replace bind callees" ; - SqliteUtils.result_unit db ~finalize:false ~log:"Attributes.replace" replace_stmt ) +let replace pname pname_blob akind source_file attributes proc_desc callees = + let pname_str = Typ.Procname.to_string pname in + let akind_int64 = int64_of_attributes_kind akind in + let proc_desc_blob = Procdesc.SQLite.serialize proc_desc in + let callees_blob = Typ.Procname.SQLiteList.serialize callees in + DBWriter.replace_attributes ~pname_str ~pname:pname_blob ~akind:akind_int64 ~source_file + ~attributes ~proc_desc:proc_desc_blob ~callees:callees_blob let find_more_defined_statement = diff --git a/infer/src/IR/SourceFiles.ml b/infer/src/IR/SourceFiles.ml index fa1ba1a2e..4c7194e72 100644 --- a/infer/src/IR/SourceFiles.ml +++ b/infer/src/IR/SourceFiles.ml @@ -8,13 +8,6 @@ open! IStd module F = Format module L = Logging -let store_statement = - ResultsDatabase.register_statement - {| - INSERT OR REPLACE INTO source_files - VALUES (:source, :tenv, :integer_type_widths, :proc_names, :freshly_captured) |} - - let select_existing_statement = ResultsDatabase.register_statement "SELECT type_environment, procedure_names FROM source_files WHERE source_file = :source AND \ @@ -65,26 +58,11 @@ let add source_file cfg tenv integer_type_widths = sure that all attributes were written to disk (but not necessarily flushed) *) SqliteUtils.with_transaction (ResultsDatabase.get_database ()) ~f:(fun () -> Cfg.store source_file cfg ) ; - ResultsDatabase.with_registered_statement store_statement ~f:(fun db store_stmt -> - SourceFile.SQLite.serialize source_file - |> Sqlite3.bind store_stmt 1 - (* :source *) - |> SqliteUtils.check_result_code db ~log:"store bind source file" ; - Tenv.SQLite.serialize tenv |> Sqlite3.bind store_stmt 2 - (* :tenv *) - |> SqliteUtils.check_result_code db ~log:"store bind type environment" ; - Typ.IntegerWidths.SQLite.serialize integer_type_widths - |> Sqlite3.bind store_stmt 3 - (* :integer_type_widths *) - |> SqliteUtils.check_result_code db ~log:"store bind integer type widths" ; - Typ.Procname.SQLiteList.serialize proc_names - |> Sqlite3.bind store_stmt 4 - (* :proc_names *) - |> SqliteUtils.check_result_code db ~log:"store bind proc names" ; - Sqlite3.bind store_stmt 5 (Sqlite3.Data.INT Int64.one) - (* :freshly_captured *) - |> SqliteUtils.check_result_code db ~log:"store freshness" ; - SqliteUtils.result_unit ~finalize:false ~log:"Cfg.store" db store_stmt ) + DBWriter.add_source_file + ~source_file:(SourceFile.SQLite.serialize source_file) + ~tenv:(Tenv.SQLite.serialize tenv) + ~integer_type_widths:(Typ.IntegerWidths.SQLite.serialize integer_type_widths) + ~proc_names:(Typ.Procname.SQLiteList.serialize proc_names) let get_all ~filter () = @@ -159,14 +137,7 @@ let is_freshly_captured source = |> Option.value_map ~default:false ~f:deserialize_freshly_captured ) -let mark_all_stale_statement = - ResultsDatabase.register_statement "UPDATE source_files SET freshly_captured = 0" - - -let mark_all_stale () = - ResultsDatabase.with_registered_statement mark_all_stale_statement ~f:(fun db stmt -> - SqliteUtils.result_unit db ~finalize:false ~log:"mark_all_stale" stmt ) - +let mark_all_stale () = DBWriter.mark_all_source_files_stale () let select_all_source_files_statement = ResultsDatabase.register_statement diff --git a/infer/src/backend/mergeCapture.ml b/infer/src/backend/mergeCapture.ml index f7c150818..a4ed583b7 100644 --- a/infer/src/backend/mergeCapture.ml +++ b/infer/src/backend/mergeCapture.ml @@ -13,7 +13,7 @@ module L = Logging let merge_global_tenvs infer_deps_file = let time0 = Mtime_clock.counter () in let global_tenv = Tenv.create () in - let merge infer_out_src = + let merge ~infer_out_src = let global_tenv_path = infer_out_src ^/ Config.global_tenv_filename |> DB.filename_from_string in diff --git a/infer/src/base/DBWriter.ml b/infer/src/base/DBWriter.ml new file mode 100644 index 000000000..0ff5a6167 --- /dev/null +++ b/infer/src/base/DBWriter.ml @@ -0,0 +1,148 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +let attribute_replace_statement = + (* The innermost SELECT returns either the current attributes_kind and source_file associated with + the given proc name, or default values of (-1,""). These default values have the property that + they are always "less than" any legit value. More precisely, MAX ensures that some value is + returned even if there is no row satisfying WHERE (we'll get NULL in that case, the value in + the row otherwise). COALESCE then returns the first non-NULL value, which will be either the + value of the row corresponding to that pname in the DB, or the default if no such row exists. + + The next (second-outermost) SELECT filters out that value if it is "more defined" than the ones + we would like to insert (which will never be the case if the default values are returned). If + not, it returns a trivial row (consisting solely of NULL since we don't use its values) and the + INSERT OR REPLACE will proceed and insert or update the values stored into the DB for that + pname. *) + (* TRICK: use the source file to be more deterministic in case the same procedure name is defined + in several files *) + (* TRICK: older versions of sqlite (prior to version 3.15.0 (2016-10-14)) do not support row + values so the lexicographic ordering for (:akind, :sfile) is done by hand *) + ResultsDatabase.register_statement + {| + INSERT OR REPLACE INTO procedures + SELECT :pname, :proc_name_hum, :akind, :sfile, :pattr, :cfg, :callees + FROM ( + SELECT NULL + FROM ( + SELECT COALESCE(MAX(attr_kind),-1) AS attr_kind, + COALESCE(MAX(source_file),"") AS source_file + FROM procedures + WHERE proc_name = :pname ) + WHERE attr_kind < :akind + OR (attr_kind = :akind AND source_file <= :sfile) ) + |} + + +let replace_attributes ~pname_str ~pname ~akind ~source_file ~attributes ~proc_desc ~callees = + ResultsDatabase.with_registered_statement attribute_replace_statement ~f:(fun db replace_stmt -> + Sqlite3.bind replace_stmt 1 (* :pname *) pname + |> SqliteUtils.check_result_code db ~log:"replace bind pname" ; + Sqlite3.bind replace_stmt 2 (* :proc_name_hum *) (Sqlite3.Data.TEXT pname_str) + |> SqliteUtils.check_result_code db ~log:"replace bind proc_name_hum" ; + Sqlite3.bind replace_stmt 3 (* :akind *) (Sqlite3.Data.INT akind) + |> SqliteUtils.check_result_code db ~log:"replace bind attribute kind" ; + Sqlite3.bind replace_stmt 4 (* :sfile *) source_file + |> SqliteUtils.check_result_code db ~log:"replace bind source file" ; + Sqlite3.bind replace_stmt 5 (* :pattr *) attributes + |> SqliteUtils.check_result_code db ~log:"replace bind proc attributes" ; + Sqlite3.bind replace_stmt 6 (* :cfg *) proc_desc + |> SqliteUtils.check_result_code db ~log:"replace bind cfg" ; + Sqlite3.bind replace_stmt 7 (* :callees *) callees + |> SqliteUtils.check_result_code db ~log:"replace bind callees" ; + SqliteUtils.result_unit db ~finalize:false ~log:"Attributes.replace" replace_stmt ) + + +let source_file_store_statement = + ResultsDatabase.register_statement + {| + INSERT OR REPLACE INTO source_files + VALUES (:source, :tenv, :integer_type_widths, :proc_names, :freshly_captured) + |} + + +let add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names = + ResultsDatabase.with_registered_statement source_file_store_statement ~f:(fun db store_stmt -> + Sqlite3.bind store_stmt 1 source_file + (* :source *) + |> SqliteUtils.check_result_code db ~log:"store bind source file" ; + Sqlite3.bind store_stmt 2 tenv + (* :tenv *) + |> SqliteUtils.check_result_code db ~log:"store bind type environment" ; + Sqlite3.bind store_stmt 3 integer_type_widths + (* :integer_type_widths *) + |> SqliteUtils.check_result_code db ~log:"store bind integer type widths" ; + Sqlite3.bind store_stmt 4 proc_names + (* :proc_names *) + |> SqliteUtils.check_result_code db ~log:"store bind proc names" ; + Sqlite3.bind store_stmt 5 (Sqlite3.Data.INT Int64.one) + (* :freshly_captured *) + |> SqliteUtils.check_result_code db ~log:"store freshness" ; + SqliteUtils.result_unit ~finalize:false ~log:"Cfg.store" db store_stmt ) + + +let mark_all_source_files_stale_statement = + ResultsDatabase.register_statement "UPDATE source_files SET freshly_captured = 0" + + +let mark_all_source_files_stale () = + ResultsDatabase.with_registered_statement mark_all_source_files_stale_statement + ~f:(fun db stmt -> SqliteUtils.result_unit db ~finalize:false ~log:"mark_all_stale" stmt) + + +let merge_procedures_table ~db_file = + let db = ResultsDatabase.get_database () in + (* Do the merge purely in SQL for great speed. The query works by doing a left join between the + sub-table and the main one, and applying the same "more defined" logic as in Attributes in the + cases where a proc_name is present in both the sub-table and the main one (main.attr_kind != + NULL). All the rows that pass this filter are inserted/updated into the main table. *) + Sqlite3.exec db + {| + INSERT OR REPLACE INTO procedures + SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees + FROM ( + attached.procedures AS sub + LEFT OUTER JOIN procedures AS main + ON sub.proc_name = main.proc_name ) + WHERE + main.attr_kind IS NULL + OR main.attr_kind < sub.attr_kind + OR (main.attr_kind = sub.attr_kind AND main.source_file < sub.source_file) + |} + |> SqliteUtils.check_result_code db + ~log:(Printf.sprintf "copying procedures of database '%s'" db_file) + + +let merge_source_files_table ~db_file = + let db = ResultsDatabase.get_database () in + Sqlite3.exec db + {| + INSERT OR REPLACE INTO source_files + SELECT source_file, type_environment, integer_type_widths, procedure_names, 1 + FROM attached.source_files + |} + |> SqliteUtils.check_result_code db + ~log:(Printf.sprintf "copying source_files of database '%s'" db_file) + + +let merge_dbs ~infer_out_src = + let db_file = infer_out_src ^/ ResultsDatabase.database_filename in + let main_db = ResultsDatabase.get_database () in + Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file) + |> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "attaching database '%s'" db_file) ; + merge_procedures_table ~db_file ; + merge_source_files_table ~db_file ; + Sqlite3.exec main_db "DETACH attached" + |> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "detaching database '%s'" db_file) ; + () + + +let canonicalize () = + let db = ResultsDatabase.get_database () in + SqliteUtils.exec db ~log:"running VACUUM" ~stmt:"VACUUM" diff --git a/infer/src/base/DBWriter.mli b/infer/src/base/DBWriter.mli new file mode 100644 index 000000000..e7fefc32f --- /dev/null +++ b/infer/src/base/DBWriter.mli @@ -0,0 +1,33 @@ +(* + * Copyright (c) 2009-2013, Monoidics ltd. + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +val replace_attributes : + pname_str:string + -> pname:Sqlite3.Data.t + -> akind:int64 + -> source_file:Sqlite3.Data.t + -> attributes:Sqlite3.Data.t + -> proc_desc:Sqlite3.Data.t + -> callees:Sqlite3.Data.t + -> unit + +val add_source_file : + source_file:Sqlite3.Data.t + -> tenv:Sqlite3.Data.t + -> integer_type_widths:Sqlite3.Data.t + -> proc_names:Sqlite3.Data.t + -> unit + +val mark_all_source_files_stale : unit -> unit + +val merge_dbs : infer_out_src:string -> unit + +val canonicalize : unit -> unit +(** put the database on disk in deterministic form *) diff --git a/infer/src/base/MergeResults.ml b/infer/src/base/MergeResults.ml index da23a982c..18c9f482e 100644 --- a/infer/src/base/MergeResults.ml +++ b/infer/src/base/MergeResults.ml @@ -9,54 +9,7 @@ module L = Logging module YB = Yojson.Basic module YBU = Yojson.Basic.Util -let merge_procedures_table ~db_file = - let db = ResultsDatabase.get_database () in - (* Do the merge purely in SQL for great speed. The query works by doing a left join between the - sub-table and the main one, and applying the same "more defined" logic as in Attributes in the - cases where a proc_name is present in both the sub-table and the main one (main.attr_kind != - NULL). All the rows that pass this filter are inserted/updated into the main table. *) - Sqlite3.exec db - {| -INSERT OR REPLACE INTO procedures -SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees -FROM ( - attached.procedures AS sub - LEFT OUTER JOIN procedures AS main - ON sub.proc_name = main.proc_name ) -WHERE - main.attr_kind IS NULL - OR main.attr_kind < sub.attr_kind - OR (main.attr_kind = sub.attr_kind AND main.source_file < sub.source_file) -|} - |> SqliteUtils.check_result_code db - ~log:(Printf.sprintf "copying procedures of database '%s'" db_file) - - -let merge_source_files_table ~db_file = - let db = ResultsDatabase.get_database () in - Sqlite3.exec db - {| - INSERT OR REPLACE INTO source_files - SELECT source_file, type_environment, integer_type_widths, procedure_names, 1 - FROM attached.source_files -|} - |> SqliteUtils.check_result_code db - ~log:(Printf.sprintf "copying source_files of database '%s'" db_file) - - -let merge_dbs infer_out_src = - let db_file = infer_out_src ^/ ResultsDatabase.database_filename in - let main_db = ResultsDatabase.get_database () in - Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file) - |> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "attaching database '%s'" db_file) ; - merge_procedures_table ~db_file ; - merge_source_files_table ~db_file ; - Sqlite3.exec main_db "DETACH attached" - |> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "detaching database '%s'" db_file) ; - () - - -let merge_changed_functions_json infer_out_src = +let merge_changed_functions_json ~infer_out_src = let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in @@ -80,7 +33,7 @@ let iter_infer_deps infer_deps_file ~f = Filename.dirname (Config.project_root ^/ "buck-out") ^/ target_results_dir else target_results_dir in - f infer_out_src + f ~infer_out_src | _ -> assert false in @@ -91,7 +44,9 @@ let iter_infer_deps infer_deps_file ~f = L.internal_error "Couldn't read deps file '%s': %s" infer_deps_file error -let merge_buck_flavors_results infer_deps_file = iter_infer_deps infer_deps_file ~f:merge_dbs +let merge_buck_flavors_results infer_deps_file = + iter_infer_deps infer_deps_file ~f:DBWriter.merge_dbs + let merge_buck_changed_functions infer_deps_file = iter_infer_deps infer_deps_file ~f:merge_changed_functions_json diff --git a/infer/src/base/MergeResults.mli b/infer/src/base/MergeResults.mli index e64dfbe6e..5ca1f26f3 100644 --- a/infer/src/base/MergeResults.mli +++ b/infer/src/base/MergeResults.mli @@ -15,4 +15,4 @@ val merge_buck_changed_functions : string -> unit (** Merge the changed functions from sub-invocations of infer inside buck-out/. Takes as argument the infer_deps file. *) -val iter_infer_deps : string -> f:(string -> unit) -> unit +val iter_infer_deps : string -> f:(infer_out_src:string -> unit) -> unit diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index 3418d8ad2..b34bbb30d 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -154,9 +154,4 @@ end include UnsafeDatabaseRef -let db_canonicalize () = - let db = get_database () in - SqliteUtils.exec db ~log:"running VACUUM" ~stmt:"VACUUM" - - let () = Epilogues.register_late ~f:db_close ~description:"closing database connection" diff --git a/infer/src/base/ResultsDatabase.mli b/infer/src/base/ResultsDatabase.mli index f3accf4b1..b2b5439c1 100644 --- a/infer/src/base/ResultsDatabase.mli +++ b/infer/src/base/ResultsDatabase.mli @@ -22,9 +22,6 @@ val get_database : unit -> Sqlite3.db val new_database_connection : unit -> unit (** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *) -val db_canonicalize : unit -> unit -(** put the database on disk in deterministic form *) - val db_close : unit -> unit (** close the current connection to the database *) diff --git a/infer/src/integration/Driver.ml b/infer/src/integration/Driver.ml index 9bce5b9df..21206bbfb 100644 --- a/infer/src/integration/Driver.ml +++ b/infer/src/integration/Driver.ml @@ -76,7 +76,7 @@ let register_perf_stats_report stats_type = get rid of non-deterministic outputs.*) let clean_results_dir () = let cache_capture = Config.(flavors || genrule_mode) in - if cache_capture then ResultsDatabase.db_canonicalize () ; + if cache_capture then DBWriter.canonicalize () ; (* make sure we are done with the database *) ResultsDatabase.db_close () ; (* In Buck flavors mode we keep all capture data, but in Java mode we keep only the tenv *)