[sqlite] move all writes to one module

Summary:
Write contention is becoming a problem in parallel capture (eg when make runs with high parallelism) or when analysis writes CFGs to the DB in parallel (eg when analysing blocks in ObC).  This is believed to lead to BUSY errors in Sqlite.

This is step 1 of a process where all writes are cordoned-off in one module, and fixing the interface for that module.

Reviewed By: skcho

Differential Revision: D16985034

fbshipit-source-id: 3d7ce381b
master
Nikos Gorogiannis 5 years ago committed by Facebook Github Bot
parent c07555a768
commit 83aea33c68

@ -32,56 +32,13 @@ let proc_kind_of_attr (proc_attributes : ProcAttributes.t) =
else ProcUndefined else ProcUndefined
let replace_statement = let replace pname pname_blob akind source_file attributes proc_desc callees =
(* The innermost SELECT returns either the current attributes_kind and source_file associated with let pname_str = Typ.Procname.to_string pname in
the given proc name, or default values of (-1,""). These default values have the property that let akind_int64 = int64_of_attributes_kind akind in
they are always "less than" any legit value. More precisely, MAX ensures that some value is let proc_desc_blob = Procdesc.SQLite.serialize proc_desc in
returned even if there is no row satisfying WHERE (we'll get NULL in that case, the value in let callees_blob = Typ.Procname.SQLiteList.serialize callees in
the row otherwise). COALESCE then returns the first non-NULL value, which will be either the DBWriter.replace_attributes ~pname_str ~pname:pname_blob ~akind:akind_int64 ~source_file
value of the row corresponding to that pname in the DB, or the default if no such row exists. ~attributes ~proc_desc:proc_desc_blob ~callees:callees_blob
The next (second-outermost) SELECT filters out that value if it is "more defined" than the ones
we would like to insert (which will never be the case if the default values are returned). If
not, it returns a trivial row (consisting solely of NULL since we don't use its values) and the
INSERT OR REPLACE will proceed and insert or update the values stored into the DB for that
pname. *)
(* TRICK: use the source file to be more deterministic in case the same procedure name is defined
in several files *)
(* TRICK: older versions of sqlite (prior to version 3.15.0 (2016-10-14)) do not support row
values so the lexicographic ordering for (:akind, :sfile) is done by hand *)
ResultsDatabase.register_statement
{|
INSERT OR REPLACE INTO procedures
SELECT :pname, :proc_name_hum, :akind, :sfile, :pattr, :cfg, :callees
FROM (
SELECT NULL
FROM (
SELECT COALESCE(MAX(attr_kind),-1) AS attr_kind,
COALESCE(MAX(source_file),"") AS source_file
FROM procedures
WHERE proc_name = :pname )
WHERE attr_kind < :akind
OR (attr_kind = :akind AND source_file <= :sfile) )|}
let replace pname pname_blob akind loc_file attr_blob proc_desc callees =
ResultsDatabase.with_registered_statement replace_statement ~f:(fun db replace_stmt ->
Sqlite3.bind replace_stmt 1 (* :pname *) pname_blob
|> SqliteUtils.check_result_code db ~log:"replace bind pname" ;
Sqlite3.bind replace_stmt 2
(* :proc_name_hum *) (Sqlite3.Data.TEXT (Typ.Procname.to_string pname))
|> SqliteUtils.check_result_code db ~log:"replace bind proc_name_hum" ;
Sqlite3.bind replace_stmt 3 (* :akind *) (Sqlite3.Data.INT (int64_of_attributes_kind akind))
|> SqliteUtils.check_result_code db ~log:"replace bind attribute kind" ;
Sqlite3.bind replace_stmt 4 (* :sfile *) loc_file
|> SqliteUtils.check_result_code db ~log:"replace bind source file" ;
Sqlite3.bind replace_stmt 5 (* :pattr *) attr_blob
|> SqliteUtils.check_result_code db ~log:"replace bind proc attributes" ;
Sqlite3.bind replace_stmt 6 (* :cfg *) (Procdesc.SQLite.serialize proc_desc)
|> SqliteUtils.check_result_code db ~log:"replace bind cfg" ;
Sqlite3.bind replace_stmt 7 (* :callees *) (Typ.Procname.SQLiteList.serialize callees)
|> SqliteUtils.check_result_code db ~log:"replace bind callees" ;
SqliteUtils.result_unit db ~finalize:false ~log:"Attributes.replace" replace_stmt )
let find_more_defined_statement = let find_more_defined_statement =

@ -8,13 +8,6 @@ open! IStd
module F = Format module F = Format
module L = Logging module L = Logging
let store_statement =
ResultsDatabase.register_statement
{|
INSERT OR REPLACE INTO source_files
VALUES (:source, :tenv, :integer_type_widths, :proc_names, :freshly_captured) |}
let select_existing_statement = let select_existing_statement =
ResultsDatabase.register_statement ResultsDatabase.register_statement
"SELECT type_environment, procedure_names FROM source_files WHERE source_file = :source AND \ "SELECT type_environment, procedure_names FROM source_files WHERE source_file = :source AND \
@ -65,26 +58,11 @@ let add source_file cfg tenv integer_type_widths =
sure that all attributes were written to disk (but not necessarily flushed) *) sure that all attributes were written to disk (but not necessarily flushed) *)
SqliteUtils.with_transaction (ResultsDatabase.get_database ()) ~f:(fun () -> SqliteUtils.with_transaction (ResultsDatabase.get_database ()) ~f:(fun () ->
Cfg.store source_file cfg ) ; Cfg.store source_file cfg ) ;
ResultsDatabase.with_registered_statement store_statement ~f:(fun db store_stmt -> DBWriter.add_source_file
SourceFile.SQLite.serialize source_file ~source_file:(SourceFile.SQLite.serialize source_file)
|> Sqlite3.bind store_stmt 1 ~tenv:(Tenv.SQLite.serialize tenv)
(* :source *) ~integer_type_widths:(Typ.IntegerWidths.SQLite.serialize integer_type_widths)
|> SqliteUtils.check_result_code db ~log:"store bind source file" ; ~proc_names:(Typ.Procname.SQLiteList.serialize proc_names)
Tenv.SQLite.serialize tenv |> Sqlite3.bind store_stmt 2
(* :tenv *)
|> SqliteUtils.check_result_code db ~log:"store bind type environment" ;
Typ.IntegerWidths.SQLite.serialize integer_type_widths
|> Sqlite3.bind store_stmt 3
(* :integer_type_widths *)
|> SqliteUtils.check_result_code db ~log:"store bind integer type widths" ;
Typ.Procname.SQLiteList.serialize proc_names
|> Sqlite3.bind store_stmt 4
(* :proc_names *)
|> SqliteUtils.check_result_code db ~log:"store bind proc names" ;
Sqlite3.bind store_stmt 5 (Sqlite3.Data.INT Int64.one)
(* :freshly_captured *)
|> SqliteUtils.check_result_code db ~log:"store freshness" ;
SqliteUtils.result_unit ~finalize:false ~log:"Cfg.store" db store_stmt )
let get_all ~filter () = let get_all ~filter () =
@ -159,14 +137,7 @@ let is_freshly_captured source =
|> Option.value_map ~default:false ~f:deserialize_freshly_captured ) |> Option.value_map ~default:false ~f:deserialize_freshly_captured )
let mark_all_stale_statement = let mark_all_stale () = DBWriter.mark_all_source_files_stale ()
ResultsDatabase.register_statement "UPDATE source_files SET freshly_captured = 0"
let mark_all_stale () =
ResultsDatabase.with_registered_statement mark_all_stale_statement ~f:(fun db stmt ->
SqliteUtils.result_unit db ~finalize:false ~log:"mark_all_stale" stmt )
let select_all_source_files_statement = let select_all_source_files_statement =
ResultsDatabase.register_statement ResultsDatabase.register_statement

@ -13,7 +13,7 @@ module L = Logging
let merge_global_tenvs infer_deps_file = let merge_global_tenvs infer_deps_file =
let time0 = Mtime_clock.counter () in let time0 = Mtime_clock.counter () in
let global_tenv = Tenv.create () in let global_tenv = Tenv.create () in
let merge infer_out_src = let merge ~infer_out_src =
let global_tenv_path = let global_tenv_path =
infer_out_src ^/ Config.global_tenv_filename |> DB.filename_from_string infer_out_src ^/ Config.global_tenv_filename |> DB.filename_from_string
in in

@ -0,0 +1,148 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
let attribute_replace_statement =
(* The innermost SELECT returns either the current attributes_kind and source_file associated with
the given proc name, or default values of (-1,""). These default values have the property that
they are always "less than" any legit value. More precisely, MAX ensures that some value is
returned even if there is no row satisfying WHERE (we'll get NULL in that case, the value in
the row otherwise). COALESCE then returns the first non-NULL value, which will be either the
value of the row corresponding to that pname in the DB, or the default if no such row exists.
The next (second-outermost) SELECT filters out that value if it is "more defined" than the ones
we would like to insert (which will never be the case if the default values are returned). If
not, it returns a trivial row (consisting solely of NULL since we don't use its values) and the
INSERT OR REPLACE will proceed and insert or update the values stored into the DB for that
pname. *)
(* TRICK: use the source file to be more deterministic in case the same procedure name is defined
in several files *)
(* TRICK: older versions of sqlite (prior to version 3.15.0 (2016-10-14)) do not support row
values so the lexicographic ordering for (:akind, :sfile) is done by hand *)
ResultsDatabase.register_statement
{|
INSERT OR REPLACE INTO procedures
SELECT :pname, :proc_name_hum, :akind, :sfile, :pattr, :cfg, :callees
FROM (
SELECT NULL
FROM (
SELECT COALESCE(MAX(attr_kind),-1) AS attr_kind,
COALESCE(MAX(source_file),"") AS source_file
FROM procedures
WHERE proc_name = :pname )
WHERE attr_kind < :akind
OR (attr_kind = :akind AND source_file <= :sfile) )
|}
let replace_attributes ~pname_str ~pname ~akind ~source_file ~attributes ~proc_desc ~callees =
ResultsDatabase.with_registered_statement attribute_replace_statement ~f:(fun db replace_stmt ->
Sqlite3.bind replace_stmt 1 (* :pname *) pname
|> SqliteUtils.check_result_code db ~log:"replace bind pname" ;
Sqlite3.bind replace_stmt 2 (* :proc_name_hum *) (Sqlite3.Data.TEXT pname_str)
|> SqliteUtils.check_result_code db ~log:"replace bind proc_name_hum" ;
Sqlite3.bind replace_stmt 3 (* :akind *) (Sqlite3.Data.INT akind)
|> SqliteUtils.check_result_code db ~log:"replace bind attribute kind" ;
Sqlite3.bind replace_stmt 4 (* :sfile *) source_file
|> SqliteUtils.check_result_code db ~log:"replace bind source file" ;
Sqlite3.bind replace_stmt 5 (* :pattr *) attributes
|> SqliteUtils.check_result_code db ~log:"replace bind proc attributes" ;
Sqlite3.bind replace_stmt 6 (* :cfg *) proc_desc
|> SqliteUtils.check_result_code db ~log:"replace bind cfg" ;
Sqlite3.bind replace_stmt 7 (* :callees *) callees
|> SqliteUtils.check_result_code db ~log:"replace bind callees" ;
SqliteUtils.result_unit db ~finalize:false ~log:"Attributes.replace" replace_stmt )
let source_file_store_statement =
ResultsDatabase.register_statement
{|
INSERT OR REPLACE INTO source_files
VALUES (:source, :tenv, :integer_type_widths, :proc_names, :freshly_captured)
|}
let add_source_file ~source_file ~tenv ~integer_type_widths ~proc_names =
ResultsDatabase.with_registered_statement source_file_store_statement ~f:(fun db store_stmt ->
Sqlite3.bind store_stmt 1 source_file
(* :source *)
|> SqliteUtils.check_result_code db ~log:"store bind source file" ;
Sqlite3.bind store_stmt 2 tenv
(* :tenv *)
|> SqliteUtils.check_result_code db ~log:"store bind type environment" ;
Sqlite3.bind store_stmt 3 integer_type_widths
(* :integer_type_widths *)
|> SqliteUtils.check_result_code db ~log:"store bind integer type widths" ;
Sqlite3.bind store_stmt 4 proc_names
(* :proc_names *)
|> SqliteUtils.check_result_code db ~log:"store bind proc names" ;
Sqlite3.bind store_stmt 5 (Sqlite3.Data.INT Int64.one)
(* :freshly_captured *)
|> SqliteUtils.check_result_code db ~log:"store freshness" ;
SqliteUtils.result_unit ~finalize:false ~log:"Cfg.store" db store_stmt )
let mark_all_source_files_stale_statement =
ResultsDatabase.register_statement "UPDATE source_files SET freshly_captured = 0"
let mark_all_source_files_stale () =
ResultsDatabase.with_registered_statement mark_all_source_files_stale_statement
~f:(fun db stmt -> SqliteUtils.result_unit db ~finalize:false ~log:"mark_all_stale" stmt)
let merge_procedures_table ~db_file =
let db = ResultsDatabase.get_database () in
(* Do the merge purely in SQL for great speed. The query works by doing a left join between the
sub-table and the main one, and applying the same "more defined" logic as in Attributes in the
cases where a proc_name is present in both the sub-table and the main one (main.attr_kind !=
NULL). All the rows that pass this filter are inserted/updated into the main table. *)
Sqlite3.exec db
{|
INSERT OR REPLACE INTO procedures
SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees
FROM (
attached.procedures AS sub
LEFT OUTER JOIN procedures AS main
ON sub.proc_name = main.proc_name )
WHERE
main.attr_kind IS NULL
OR main.attr_kind < sub.attr_kind
OR (main.attr_kind = sub.attr_kind AND main.source_file < sub.source_file)
|}
|> SqliteUtils.check_result_code db
~log:(Printf.sprintf "copying procedures of database '%s'" db_file)
let merge_source_files_table ~db_file =
let db = ResultsDatabase.get_database () in
Sqlite3.exec db
{|
INSERT OR REPLACE INTO source_files
SELECT source_file, type_environment, integer_type_widths, procedure_names, 1
FROM attached.source_files
|}
|> SqliteUtils.check_result_code db
~log:(Printf.sprintf "copying source_files of database '%s'" db_file)
let merge_dbs ~infer_out_src =
let db_file = infer_out_src ^/ ResultsDatabase.database_filename in
let main_db = ResultsDatabase.get_database () in
Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file)
|> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "attaching database '%s'" db_file) ;
merge_procedures_table ~db_file ;
merge_source_files_table ~db_file ;
Sqlite3.exec main_db "DETACH attached"
|> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "detaching database '%s'" db_file) ;
()
let canonicalize () =
let db = ResultsDatabase.get_database () in
SqliteUtils.exec db ~log:"running VACUUM" ~stmt:"VACUUM"

@ -0,0 +1,33 @@
(*
* Copyright (c) 2009-2013, Monoidics ltd.
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
val replace_attributes :
pname_str:string
-> pname:Sqlite3.Data.t
-> akind:int64
-> source_file:Sqlite3.Data.t
-> attributes:Sqlite3.Data.t
-> proc_desc:Sqlite3.Data.t
-> callees:Sqlite3.Data.t
-> unit
val add_source_file :
source_file:Sqlite3.Data.t
-> tenv:Sqlite3.Data.t
-> integer_type_widths:Sqlite3.Data.t
-> proc_names:Sqlite3.Data.t
-> unit
val mark_all_source_files_stale : unit -> unit
val merge_dbs : infer_out_src:string -> unit
val canonicalize : unit -> unit
(** put the database on disk in deterministic form *)

@ -9,54 +9,7 @@ module L = Logging
module YB = Yojson.Basic module YB = Yojson.Basic
module YBU = Yojson.Basic.Util module YBU = Yojson.Basic.Util
let merge_procedures_table ~db_file = let merge_changed_functions_json ~infer_out_src =
let db = ResultsDatabase.get_database () in
(* Do the merge purely in SQL for great speed. The query works by doing a left join between the
sub-table and the main one, and applying the same "more defined" logic as in Attributes in the
cases where a proc_name is present in both the sub-table and the main one (main.attr_kind !=
NULL). All the rows that pass this filter are inserted/updated into the main table. *)
Sqlite3.exec db
{|
INSERT OR REPLACE INTO procedures
SELECT sub.proc_name, sub.proc_name_hum, sub.attr_kind, sub.source_file, sub.proc_attributes, sub.cfg, sub.callees
FROM (
attached.procedures AS sub
LEFT OUTER JOIN procedures AS main
ON sub.proc_name = main.proc_name )
WHERE
main.attr_kind IS NULL
OR main.attr_kind < sub.attr_kind
OR (main.attr_kind = sub.attr_kind AND main.source_file < sub.source_file)
|}
|> SqliteUtils.check_result_code db
~log:(Printf.sprintf "copying procedures of database '%s'" db_file)
let merge_source_files_table ~db_file =
let db = ResultsDatabase.get_database () in
Sqlite3.exec db
{|
INSERT OR REPLACE INTO source_files
SELECT source_file, type_environment, integer_type_widths, procedure_names, 1
FROM attached.source_files
|}
|> SqliteUtils.check_result_code db
~log:(Printf.sprintf "copying source_files of database '%s'" db_file)
let merge_dbs infer_out_src =
let db_file = infer_out_src ^/ ResultsDatabase.database_filename in
let main_db = ResultsDatabase.get_database () in
Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file)
|> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "attaching database '%s'" db_file) ;
merge_procedures_table ~db_file ;
merge_source_files_table ~db_file ;
Sqlite3.exec main_db "DETACH attached"
|> SqliteUtils.check_result_code main_db ~log:(Printf.sprintf "detaching database '%s'" db_file) ;
()
let merge_changed_functions_json infer_out_src =
let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in let main_changed_fs_file = Config.results_dir ^/ Config.export_changed_functions_output in
let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in let changed_fs_file = infer_out_src ^/ Config.export_changed_functions_output in
let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in let main_json = try YB.from_file main_changed_fs_file |> YBU.to_list with Sys_error _ -> [] in
@ -80,7 +33,7 @@ let iter_infer_deps infer_deps_file ~f =
Filename.dirname (Config.project_root ^/ "buck-out") ^/ target_results_dir Filename.dirname (Config.project_root ^/ "buck-out") ^/ target_results_dir
else target_results_dir else target_results_dir
in in
f infer_out_src f ~infer_out_src
| _ -> | _ ->
assert false assert false
in in
@ -91,7 +44,9 @@ let iter_infer_deps infer_deps_file ~f =
L.internal_error "Couldn't read deps file '%s': %s" infer_deps_file error L.internal_error "Couldn't read deps file '%s': %s" infer_deps_file error
let merge_buck_flavors_results infer_deps_file = iter_infer_deps infer_deps_file ~f:merge_dbs let merge_buck_flavors_results infer_deps_file =
iter_infer_deps infer_deps_file ~f:DBWriter.merge_dbs
let merge_buck_changed_functions infer_deps_file = let merge_buck_changed_functions infer_deps_file =
iter_infer_deps infer_deps_file ~f:merge_changed_functions_json iter_infer_deps infer_deps_file ~f:merge_changed_functions_json

@ -15,4 +15,4 @@ val merge_buck_changed_functions : string -> unit
(** Merge the changed functions from sub-invocations of infer inside buck-out/. Takes as argument the (** Merge the changed functions from sub-invocations of infer inside buck-out/. Takes as argument the
infer_deps file. *) infer_deps file. *)
val iter_infer_deps : string -> f:(string -> unit) -> unit val iter_infer_deps : string -> f:(infer_out_src:string -> unit) -> unit

@ -154,9 +154,4 @@ end
include UnsafeDatabaseRef include UnsafeDatabaseRef
let db_canonicalize () =
let db = get_database () in
SqliteUtils.exec db ~log:"running VACUUM" ~stmt:"VACUUM"
let () = Epilogues.register_late ~f:db_close ~description:"closing database connection" let () = Epilogues.register_late ~f:db_close ~description:"closing database connection"

@ -22,9 +22,6 @@ val get_database : unit -> Sqlite3.db
val new_database_connection : unit -> unit val new_database_connection : unit -> unit
(** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *) (** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *)
val db_canonicalize : unit -> unit
(** put the database on disk in deterministic form *)
val db_close : unit -> unit val db_close : unit -> unit
(** close the current connection to the database *) (** close the current connection to the database *)

@ -76,7 +76,7 @@ let register_perf_stats_report stats_type =
get rid of non-deterministic outputs.*) get rid of non-deterministic outputs.*)
let clean_results_dir () = let clean_results_dir () =
let cache_capture = Config.(flavors || genrule_mode) in let cache_capture = Config.(flavors || genrule_mode) in
if cache_capture then ResultsDatabase.db_canonicalize () ; if cache_capture then DBWriter.canonicalize () ;
(* make sure we are done with the database *) (* make sure we are done with the database *)
ResultsDatabase.db_close () ; ResultsDatabase.db_close () ;
(* In Buck flavors mode we keep all capture data, but in Java mode we keep only the tenv *) (* In Buck flavors mode we keep all capture data, but in Java mode we keep only the tenv *)

Loading…
Cancel
Save