From b00451d23ef2275549aaa717e81ac25325be812e Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Fri, 29 Sep 2017 08:46:34 -0700 Subject: [PATCH] [sql] Cache key blobs Summary: - use a similar key as for specs in the attributes table - cache blob computations - this improves memory usage a lot Reviewed By: mbouaziz Differential Revision: D5824177 fbshipit-source-id: c318577 --- infer/src/IR/Attributes.ml | 36 +++++++++++++++---- infer/src/IR/Attributes.mli | 3 +- infer/src/IR/Typ.ml | 20 ++++++----- infer/src/IR/Typ.mli | 5 +-- infer/src/base/DB.ml | 4 +-- infer/src/base/DB.mli | 5 +-- infer/src/base/KeyValue.ml | 63 ++++++++++++++++++++++------------ infer/src/base/KeyValue.mli | 22 +++++++----- infer/src/base/MergeResults.ml | 33 ++++++++++-------- 9 files changed, 122 insertions(+), 69 deletions(-) diff --git a/infer/src/IR/Attributes.ml b/infer/src/IR/Attributes.ml index c99428e5a..4de264884 100644 --- a/infer/src/IR/Attributes.ml +++ b/infer/src/IR/Attributes.ml @@ -40,7 +40,7 @@ let should_override_attr attr1 attr2 = > 0 module Table = struct - type key = Typ.Procname.t * attributes_kind + type key = string type value = ProcAttributes.t @@ -49,20 +49,44 @@ end module Store = KeyValue.Make (Table) +let string_of_pkind = function + | ProcUndefined + -> "U" + | ProcObjCAccessor + -> "O" + | ProcDefined + -> "D" + +module KeyHashtbl = Caml.Hashtbl.Make (struct + type t = Typ.Procname.t * attributes_kind + + let equal = [%compare.equal : Typ.Procname.t * attributes_kind] + + let hash = Hashtbl.hash +end) + +let pname_to_key = KeyHashtbl.create 16 + +let key_of_pname_pkind (pname, pkind as p) = + try KeyHashtbl.find pname_to_key p + with Not_found -> + let key = Typ.Procname.to_filename pname ^ string_of_pkind pkind |> Store.blob_of_key in + KeyHashtbl.replace pname_to_key p key ; key + let load_aux ?(min_kind= ProcUndefined) pname = List.find_map (most_relevant_down_to_proc_kind_inclusive min_kind) ~f:(fun pkind -> - Store.find (pname, pkind) ) + key_of_pname_pkind (pname, pkind) |> Store.find ) let load pname : ProcAttributes.t option = load_aux pname let store (attr: ProcAttributes.t) = let pkind = proc_kind_of_attr attr in - let key = (attr.proc_name, pkind) in - if load attr.proc_name |> Option.value_map ~default:true ~f:(should_override_attr attr) then ( + if load attr.proc_name |> Option.value_map ~default:true ~f:(should_override_attr attr) then (* NOTE: We need to do this dance of adding the proc_kind to the key because there's a race condition between the time we load the attributes from the db and the time we write possibly better ones. We could avoid this by making the db schema richer than just key/value and turning the SELECT + REPLACE into an atomic transaction. *) - Store.replace key attr ; + let key = key_of_pname_pkind (attr.proc_name, pkind) in + Store.replace key (Store.blob_of_value attr) ; least_relevant_up_to_proc_kind_exclusive pkind - |> List.iter ~f:(fun k -> Store.delete (attr.proc_name, k)) ) + |> List.iter ~f:(fun k -> key_of_pname_pkind (attr.proc_name, k) |> Store.delete) let load_defined pname = load_aux ~min_kind:ProcDefined pname diff --git a/infer/src/IR/Attributes.mli b/infer/src/IR/Attributes.mli index 091dbb198..528518546 100644 --- a/infer/src/IR/Attributes.mli +++ b/infer/src/IR/Attributes.mli @@ -13,8 +13,7 @@ open! IStd type attributes_kind -module Table : - KeyValue.Table with type key = Typ.Procname.t * attributes_kind and type value = ProcAttributes.t +module Table : KeyValue.Table with type key = string and type value = ProcAttributes.t module Store : KeyValue.S with module Table = Table diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index b93c4c0ac..d5b5b903c 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -969,13 +969,15 @@ module Procname = struct (** hash function for procname *) let hash_pname = Hashtbl.hash - module Hash = Hashtbl.Make (struct + module Hashable = struct type nonrec t = t let equal = equal let hash = hash_pname - end) + end + + module Hash = Hashtbl.Make (Hashable) module Map = PrettyPrintable.MakePPMap (struct type nonrec t = t @@ -1009,7 +1011,7 @@ module Procname = struct -> QualifiedCppName.empty (** Convert a proc name to a filename *) - let to_concrete_filename pname = + let to_concrete_filename ?crc_only pname = (* filenames for clang procs are REVERSED qualifiers with '#' as separator *) let get_qual_name_str pname = get_qualifiers pname |> QualifiedCppName.to_rev_list |> String.concat ~sep:"#" @@ -1023,21 +1025,21 @@ module Procname = struct | _ -> to_unique_id pname in - Escape.escape_filename @@ DB.append_crc_cutoff proc_id + Escape.escape_filename @@ DB.append_crc_cutoff ?crc_only proc_id - let to_generic_filename pname = + let to_generic_filename ?crc_only pname = let proc_id = get_qualifiers pname |> QualifiedCppName.strip_template_args |> QualifiedCppName.to_rev_list |> String.concat ~sep:"#" in - Escape.escape_filename @@ DB.append_crc_cutoff proc_id + Escape.escape_filename @@ DB.append_crc_cutoff ?crc_only proc_id - let to_filename pname = + let to_filename ?crc_only pname = match pname with | (C {is_generic_model} | ObjC_Cpp {is_generic_model}) when Bool.equal is_generic_model true - -> to_generic_filename pname + -> to_generic_filename ?crc_only pname | _ - -> to_concrete_filename pname + -> to_concrete_filename ?crc_only pname (** given two template arguments, try to generate mapping from generic ones to concrete ones. *) let get_template_args_mapping generic_procname concrete_procname = diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index 94915ae73..4d076ae13 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -299,6 +299,7 @@ module Procname : sig | ObjCInternalMethod (** Hash tables with proc names as keys. *) + module Hashable : Caml.Hashtbl.HashedType with type t = t module Hash : Caml.Hashtbl.S with type key = t @@ -479,8 +480,8 @@ module Procname : sig val to_unique_id : t -> string (** Convert a proc name into a unique identifier. *) - val to_filename : t -> string - (** Convert a proc name to a filename. *) + val to_filename : ?crc_only:bool -> t -> string + (** Convert a proc name to a filename or only to its crc. *) val get_qualifiers : t -> QualifiedCppName.t (** get qualifiers of C/objc/C++ method/function *) diff --git a/infer/src/base/DB.ml b/infer/src/base/DB.ml index ebe593127..738c6ff51 100644 --- a/infer/src/base/DB.ml +++ b/infer/src/base/DB.ml @@ -20,7 +20,7 @@ let cutoff_length = 100 let crc_token = '.' -let append_crc_cutoff ?(key= "") name = +let append_crc_cutoff ?(key= "") ?(crc_only= false) name = let name_up_to_cutoff = if String.length name <= cutoff_length then name else String.sub name ~pos:0 ~len:cutoff_length in @@ -28,7 +28,7 @@ let append_crc_cutoff ?(key= "") name = let name_for_crc = name ^ key in Utils.string_crc_hex32 name_for_crc in - name_up_to_cutoff ^ Char.to_string crc_token ^ crc_str + if crc_only then crc_str else name_up_to_cutoff ^ Char.to_string crc_token ^ crc_str (* Lengh of .crc part: 32 characters of digest, plus 1 character of crc_token *) let dot_crc_len = 1 + 32 diff --git a/infer/src/base/DB.mli b/infer/src/base/DB.mli index 8aef13def..c83b1ef02 100644 --- a/infer/src/base/DB.mli +++ b/infer/src/base/DB.mli @@ -76,10 +76,11 @@ module Results_dir : sig (** create a file at the given path, creating any missing directories *) end -val append_crc_cutoff : ?key:string -> string -> string +val append_crc_cutoff : ?key:string -> ?crc_only:bool -> string -> string (** Append a crc to the string, using string_crc_hex32. Cut the string if it exceeds the cutoff limit. - Use an optional key to compute the crc. *) + Use an optional key to compute the crc. + Return only the crc if [crc_only] is true. *) val string_crc_has_extension : ext:string -> string -> bool (** Remove the crc from the string, and check if it has the given extension *) diff --git a/infer/src/base/KeyValue.ml b/infer/src/base/KeyValue.ml index 7e1a33588..4328274ee 100644 --- a/infer/src/base/KeyValue.ml +++ b/infer/src/base/KeyValue.ml @@ -17,33 +17,53 @@ module type Table = sig val table : string end -module type Blob = sig +module type BlobInternal = sig module Table : Table - val blob_of_key : Table.key -> Sqlite3.Data.t + type key_blob = Sqlite3.Data.t - val blob_of_value : Table.value -> Sqlite3.Data.t + type value_blob = Sqlite3.Data.t - val key_of_blob : Sqlite3.Data.t -> Table.key option + val blob_of_key : Table.key -> key_blob - val value_of_blob : Sqlite3.Data.t -> Table.value option + val blob_of_value : Table.value -> value_blob + + val value_of_blob : value_blob -> Table.value option end module type S = sig - include Blob + module Table : Table + + type key_blob + + type value_blob + + val blob_of_key : Table.key -> key_blob + + val blob_of_value : Table.value -> value_blob + + external key_blob_of_data : Sqlite3.Data.t -> key_blob = "%identity" - val replace : Table.key -> Table.value -> unit + external value_blob_of_data : Sqlite3.Data.t -> value_blob = "%identity" - val find : Table.key -> Table.value option + val value_of_blob : value_blob -> Table.value option - val delete : Table.key -> unit + val replace : key_blob -> value_blob -> unit + + val find : key_blob -> Table.value option + + val delete : key_blob -> unit end (* The functor is mostly here to provide a modicum of type safety around blobing/unblobing *) module Make (Table : Table) : S with module Table = Table = struct - module Unsafe : Blob with module Table = Table = struct + module Unsafe : BlobInternal with module Table = Table = struct module Table = Table + type key_blob = Sqlite3.Data.t + + type value_blob = Sqlite3.Data.t + let blob x = Sqlite3.Data.BLOB (Marshal.to_string x []) let unblob = function @@ -58,14 +78,16 @@ module Make (Table : Table) : S with module Table = Table = struct let blob_of_value = blob - let key_of_blob = unblob - let value_of_blob = unblob end (* cannot mix, e.g., blob_key and blob_value now *) include Unsafe + external key_blob_of_data : Sqlite3.Data.t -> key_blob = "%identity" + + external value_blob_of_data : Sqlite3.Data.t -> value_blob = "%identity" + let register_statement stmt_fmt = let k stmt0 = let stmt_ref = ref None in @@ -91,28 +113,25 @@ module Make (Table : Table) : S with module Table = Table = struct let get_replace_statement = register_statement "REPLACE INTO %s(key, value) VALUES(:k, :v)" Table.table - let replace key value = + let replace key_blob value_blob = let replace_stmt = get_replace_statement () in - Sqlite3.bind replace_stmt 1 (blob_of_key key) - |> SqliteUtils.check_sqlite_error ~log:"replace bind key" ; - Sqlite3.bind replace_stmt 2 (blob_of_value value) + Sqlite3.bind replace_stmt 1 key_blob |> SqliteUtils.check_sqlite_error ~log:"replace bind key" ; + Sqlite3.bind replace_stmt 2 value_blob |> SqliteUtils.check_sqlite_error ~log:"replace bind value" ; SqliteUtils.sqlite_unit_step ~finalize:false ~log:"KeyValue.replace" replace_stmt let get_select_statement = register_statement "SELECT value FROM %s WHERE key = :k" Table.table - let find key = + let find key_blob = let select_stmt = get_select_statement () in - Sqlite3.bind select_stmt 1 (blob_of_key key) - |> SqliteUtils.check_sqlite_error ~log:"insert bind key" ; + Sqlite3.bind select_stmt 1 key_blob |> SqliteUtils.check_sqlite_error ~log:"insert bind key" ; SqliteUtils.sqlite_result_step ~finalize:false ~log:"KeyValue.find" select_stmt |> Option.bind ~f:value_of_blob let get_delete_statement = register_statement "DELETE FROM %s WHERE key = :k" Table.table - let delete key = + let delete key_blob = let delete_stmt = get_delete_statement () in - Sqlite3.bind delete_stmt 1 (blob_of_key key) - |> SqliteUtils.check_sqlite_error ~log:"delete bind key" ; + Sqlite3.bind delete_stmt 1 key_blob |> SqliteUtils.check_sqlite_error ~log:"delete bind key" ; SqliteUtils.sqlite_unit_step ~finalize:false ~log:"KeyValue.delete" delete_stmt end diff --git a/infer/src/base/KeyValue.mli b/infer/src/base/KeyValue.mli index 7017c1cb9..7b0691bff 100644 --- a/infer/src/base/KeyValue.mli +++ b/infer/src/base/KeyValue.mli @@ -22,23 +22,27 @@ end module type S = sig module Table : Table - val blob_of_key : Table.key -> Sqlite3.Data.t + type key_blob + + type value_blob + + val blob_of_key : Table.key -> key_blob (** turn a key into a [Sqlite3.Data.BLOB] *) - val blob_of_value : Table.value -> Sqlite3.Data.t + val blob_of_value : Table.value -> value_blob (** turn a value into a [Sqlite3.Data.BLOB] *) - val key_of_blob : Sqlite3.Data.t -> Table.key option - (** turn a [Sqlite3.Data.BLOB] (or [Sqlite3.Data.NULL]) back into a key *) + external key_blob_of_data : Sqlite3.Data.t -> key_blob = "%identity" + + external value_blob_of_data : Sqlite3.Data.t -> value_blob = "%identity" - val value_of_blob : Sqlite3.Data.t -> Table.value option - (** turn a [Sqlite3.Data.BLOB] (or [Sqlite3.Data.NULL]) back into a value *) + val value_of_blob : value_blob -> Table.value option - val replace : Table.key -> Table.value -> unit + val replace : key_blob -> value_blob -> unit - val find : Table.key -> Table.value option + val find : key_blob -> Table.value option - val delete : Table.key -> unit + val delete : key_blob -> unit end module Make (Table : Table) : S with module Table = Table diff --git a/infer/src/base/MergeResults.ml b/infer/src/base/MergeResults.ml index 0fe50c2f6..0236bfced 100644 --- a/infer/src/base/MergeResults.ml +++ b/infer/src/base/MergeResults.ml @@ -9,26 +9,29 @@ open! IStd module L = Logging -let all_attributes ~into ~db_name = - let select_stmt = +let merge_attributes_table ~into ~db_name ~db_file = + (* no need to wrap this in a single transaction (to batch writes) because we open the table with + synchronous=OFF *) + (* do not go through Attributes so as not to deserialize and reserialize objects pointlessly, and + so as not to fill the cache with all the attributes (especially since this function will be + called before forking all the analysis processes. *) + let copy_stmt = Sqlite3.prepare into - (Printf.sprintf "SELECT value FROM %s.%s" db_name ResultsDir.attributes_table) + (Printf.sprintf "REPLACE INTO %s SELECT * FROM %s.%s" ResultsDir.attributes_table db_name + ResultsDir.attributes_table) in - List.filter_map ~f:(Option.bind ~f:Attributes.Store.value_of_blob) - (SqliteUtils.sqlite_result_rev_list_step ~log:"select" select_stmt) + SqliteUtils.sqlite_unit_step ~log:(Printf.sprintf "copying contents of database '%s'" db_file) + copy_stmt -let merge_attributes_table ~into ~db_name = - let rows = all_attributes ~into ~db_name in - (* no need to wrap this in a single transaction because we open the table with synchronous=OFF *) - List.iter rows ~f:Attributes.store - -let merge ~into db = +let merge ~into db_file = let db_name = "db" in - SqliteUtils.check_sqlite_error ~fatal:true ~log:"attaching db" - (Sqlite3.exec into (Printf.sprintf "ATTACH '%s' AS %s" db db_name)) ; - let do_merge () = merge_attributes_table ~into ~db_name in + SqliteUtils.check_sqlite_error ~fatal:true + ~log:(Printf.sprintf "attaching database '%s'" db_file) + (Sqlite3.exec into (Printf.sprintf "ATTACH '%s' AS %s" db_file db_name)) ; + let do_merge () = merge_attributes_table ~into ~db_name ~db_file in Utils.without_gc ~f:do_merge ; - SqliteUtils.check_sqlite_error ~fatal:true ~log:"detaching db" + SqliteUtils.check_sqlite_error ~fatal:true + ~log:(Printf.sprintf "detaching database '%s'" db_file) (Sqlite3.exec into (Printf.sprintf "DETACH %s" db_name)) ; ()