From a91e7dda500288985c9765e9b83cf00eb61cb78d Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Thu, 14 Dec 2017 09:04:35 -0800 Subject: [PATCH] [cfg] move cfgs to sqlite Summary: Instead of storing the cfgs of source files inside their own individual files, put them in results.db, in their own table. (that table may change in the future to map source files to more than just their cfgs, eg their tenv as well) Reviewed By: jberdine Differential Revision: D6297201 fbshipit-source-id: 7fa891d --- infer/src/IR/Attributes.ml | 16 ++----- infer/src/IR/Cfg.ml | 62 +++++++++++++++++++++------- infer/src/IR/Cfg.mli | 10 +++-- infer/src/backend/OndemandCapture.ml | 13 +++--- infer/src/backend/exe_env.ml | 8 +--- infer/src/base/DB.ml | 2 +- infer/src/base/DB.mli | 2 +- infer/src/base/MergeResults.ml | 13 +++++- infer/src/base/ResultsDatabase.ml | 18 ++++++-- infer/src/base/ResultsDatabase.mli | 4 +- infer/src/base/ResultsDir.ml | 2 +- infer/src/clang/cFrontend.ml | 5 +-- infer/src/integration/Driver.ml | 4 +- infer/src/java/jMain.ml | 3 +- 14 files changed, 99 insertions(+), 63 deletions(-) diff --git a/infer/src/IR/Attributes.ml b/infer/src/IR/Attributes.ml index 22b779fd6..1516ba998 100644 --- a/infer/src/IR/Attributes.ml +++ b/infer/src/IR/Attributes.ml @@ -143,24 +143,16 @@ let store (attr: ProcAttributes.t) = let load_defined pname = Data.of_pname pname |> find ~defined:true let find_file_capturing_procedure pname = - match load pname with - | None -> - None - | Some proc_attributes -> + Option.map (load pname) ~f:(fun proc_attributes -> let source_file = proc_attributes.ProcAttributes.source_file_captured in - let source_dir = DB.source_dir_from_source_file source_file in let origin = - (* Procedure coming from include files if it has different location - than the file where it was captured. *) + (* Procedure coming from include files if it has different location than the file where it + was captured. *) match SourceFile.compare source_file proc_attributes.ProcAttributes.loc.file <> 0 with | true -> `Include | false -> `Source in - let cfg_fname = DB.source_dir_get_internal_file source_dir ".cfg" in - let cfg_fname_exists = - PVariant.( = ) `Yes (Sys.file_exists (DB.filename_to_string cfg_fname)) - in - if cfg_fname_exists then Some (source_file, origin) else None + (source_file, origin) ) diff --git a/infer/src/IR/Cfg.ml b/infer/src/IR/Cfg.ml index c633a2244..d59756726 100644 --- a/infer/src/IR/Cfg.ml +++ b/infer/src/IR/Cfg.ml @@ -94,14 +94,32 @@ let check_cfg_connectedness cfg = List.iter ~f:do_pdesc pdescs -(** Serializer for control flow graphs *) -let cfg_serializer : t Serialization.serializer = - Serialization.create_serializer Serialization.Key.cfg +module type Data = sig + val of_cfg : t -> Sqlite3.Data.t + val of_source_file : SourceFile.t -> Sqlite3.Data.t -(** Load a cfg from a file *) -let load_from_file (filename: DB.filename) : t option = - Serialization.read_from_file cfg_serializer filename + val to_cfg : Sqlite3.Data.t -> t +end + +module Data : Data = struct + let of_source_file file = Sqlite3.Data.TEXT (SourceFile.to_string file) + + let of_cfg x = Sqlite3.Data.BLOB (Marshal.to_string x []) + + let to_cfg = function[@warning "-8"] Sqlite3.Data.BLOB b -> Marshal.from_string b 0 +end + +let get_load_statement = + ResultsDatabase.register_statement "SELECT cfgs FROM cfg WHERE source_file = :k" + + +let load source = + let load_stmt = get_load_statement () in + Data.of_source_file source |> Sqlite3.bind load_stmt 1 + |> SqliteUtils.check_sqlite_error ~log:"load bind source file" ; + SqliteUtils.sqlite_result_step ~finalize:false ~log:"Cfg.load" load_stmt + |> Option.map ~f:Data.to_cfg (** Save the .attr files for the procedures in the cfg. *) @@ -273,20 +291,26 @@ let mark_unchanged_pdescs cfg_new cfg_old = Typ.Procname.Hash.iter mark_pdesc_if_unchanged cfg_new -(** Save a cfg into a file *) -let store_to_file ~source_file (filename: DB.filename) (cfg: t) = +let get_store_statement = + ResultsDatabase.register_statement "INSERT OR REPLACE INTO cfg VALUES (:source, :cfgs)" + + +let store source_file cfg = inline_java_synthetic_methods cfg ; ( if Config.incremental_procs then - match load_from_file filename with - | Some old_cfg -> - mark_unchanged_pdescs cfg old_cfg - | None -> - () ) ; - (* NOTE: it's important to write attribute files to disk before writing .cfg file to disk. - OndemandCapture module relies on it - it uses existance of .cfg file as a barrier to make + match load source_file with Some old_cfg -> mark_unchanged_pdescs cfg old_cfg | None -> () ) ; + (* NOTE: it's important to write attribute files to disk before writing cfgs to disk. + OndemandCapture module relies on it - it uses existance of the cfg as a barrier to make sure that all attributes were written to disk (but not necessarily flushed) *) save_attributes source_file cfg ; - Serialization.write_to_file cfg_serializer filename ~data:cfg + let store_stmt = get_store_statement () in + Data.of_source_file source_file |> Sqlite3.bind store_stmt 1 + (* :source *) + |> SqliteUtils.check_sqlite_error ~log:"store bind source file" ; + Data.of_cfg cfg |> Sqlite3.bind store_stmt 2 + (* :cfg *) + |> SqliteUtils.check_sqlite_error ~log:"store bind cfg" ; + SqliteUtils.sqlite_unit_step ~finalize:false ~log:"Cfg.store" store_stmt (** Applies convert_instr_list to all the instructions in all the nodes of the cfg *) @@ -614,3 +638,9 @@ let pp_proc_signatures fmt cfg = F.fprintf fmt "METHOD SIGNATURES@\n@." ; let sorted_procs = List.sort ~cmp:Procdesc.compare (get_all_procs cfg) in List.iter ~f:(fun pdesc -> F.fprintf fmt "%a@." Procdesc.pp_signature pdesc) sorted_procs + + +let exists_for_source_file source = + (* simplistic implementation that allocates the cfg as this is only used for reactive capture for now *) + load source |> Option.is_some + diff --git a/infer/src/IR/Cfg.mli b/infer/src/IR/Cfg.mli index 8c91f9a2e..0e80c209c 100644 --- a/infer/src/IR/Cfg.mli +++ b/infer/src/IR/Cfg.mli @@ -15,11 +15,11 @@ open! IStd (** A control-flow graph *) type t -val load_from_file : DB.filename -> t option -(** Load a cfg from a file *) +val load : SourceFile.t -> t option +(** Load the cfgs of the procedures of a source file *) -val store_to_file : source_file:SourceFile.t -> DB.filename -> t -> unit -(** Save a cfg into a file *) +val store : SourceFile.t -> t -> unit +(** Save a cfg into the database *) (** {2 Functions for manipulating an interprocedural CFG} *) @@ -65,3 +65,5 @@ val specialize_with_block_args : in the closures *) val pp_proc_signatures : Format.formatter -> t -> unit + +val exists_for_source_file : SourceFile.t -> bool diff --git a/infer/src/backend/OndemandCapture.ml b/infer/src/backend/OndemandCapture.ml index aa5a61ff1..516d083c9 100644 --- a/infer/src/backend/OndemandCapture.ml +++ b/infer/src/backend/OndemandCapture.ml @@ -20,14 +20,11 @@ let try_capture (attributes: ProcAttributes.t) : ProcAttributes.t option = let decl_file = attributes.loc.file in let definition_file_opt = SourceFile.of_header decl_file in let try_compile definition_file = - let source_dir = DB.source_dir_from_source_file definition_file in - (* Use cfg_filename as a proxy to find out whether definition_file was already captured. - If it was, there is no point in trying to capture it again. - Treat existance of cfg_filename as a barrier - if it exists it means that - all attributes files have been created - write logic is defined in - Cfg.store_cfg_to_file *) - let cfg_filename = DB.source_dir_get_internal_file source_dir ".cfg" in - if not (DB.file_exists cfg_filename) then ( + (* Use the cfg as a proxy to find out whether definition_file was already captured. If it + was, there is no point in trying to capture it again. Treat existance of the cfg as a + barrier - if it exists it means that all attributes files have been created - write logic + is defined in Cfg.store *) + if not (Cfg.exists_for_source_file decl_file) then ( L.(debug Capture Verbose) "Started capture of %a...@\n" SourceFile.pp definition_file ; Timeout.suspend_existing_timeout ~keep_symop_total:true ; protect diff --git a/infer/src/backend/exe_env.ml b/infer/src/backend/exe_env.ml index 596f9f141..a1416b209 100644 --- a/infer/src/backend/exe_env.ml +++ b/infer/src/backend/exe_env.ml @@ -22,7 +22,6 @@ type file_data = { source: SourceFile.t ; tenv_file: DB.filename ; mutable tenv: Tenv.t option - ; cfg_file: DB.filename ; mutable cfg: Cfg.t option } (** get the path to the tenv file, which either one tenv file per source file or a global tenv file *) @@ -45,12 +44,9 @@ end) let new_file_data source cg_fname = let file_base = DB.chop_extension cg_fname in let tenv_file = tenv_filename file_base in - let cfg_file = DB.filename_add_suffix file_base ".cfg" in { source ; tenv_file - ; tenv= None - ; (* Sil.load_tenv_from_file tenv_file *) - cfg_file + ; tenv= None (* Sil.load_tenv_from_file tenv_file *) ; cfg= None (* Cfg.load_cfg_from_file cfg_file *) } @@ -151,7 +147,7 @@ let file_data_to_tenv file_data = let file_data_to_cfg file_data = - if is_none file_data.cfg then file_data.cfg <- Cfg.load_from_file file_data.cfg_file ; + if is_none file_data.cfg then file_data.cfg <- Cfg.load file_data.source ; file_data.cfg diff --git a/infer/src/base/DB.ml b/infer/src/base/DB.ml index 2281a5c11..226389f6a 100644 --- a/infer/src/base/DB.ml +++ b/infer/src/base/DB.ml @@ -69,7 +69,7 @@ type source_dir = string [@@deriving compare] (** expose the source dir as a string *) let source_dir_to_string source_dir = source_dir -(** get the path to an internal file with the given extention (.cfg, .cg, .tenv) *) +(** get the path to an internal file with the given extention (.cg, .tenv) *) let source_dir_get_internal_file source_dir extension = let source_dir_name = append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) diff --git a/infer/src/base/DB.mli b/infer/src/base/DB.mli index c83b1ef02..8c6b88b28 100644 --- a/infer/src/base/DB.mli +++ b/infer/src/base/DB.mli @@ -100,7 +100,7 @@ val source_dir_to_string : source_dir -> string (** expose the source dir as a string *) val source_dir_get_internal_file : source_dir -> string -> filename -(** get the path to an internal file with the given extention (.cfg, .cg, .tenv) *) +(** get the path to an internal file with the given extention (.cg, .tenv) *) val source_dir_from_source_file : SourceFile.t -> source_dir (** get the source directory corresponding to a source file *) diff --git a/infer/src/base/MergeResults.ml b/infer/src/base/MergeResults.ml index 7b1f4e629..df485e84f 100644 --- a/infer/src/base/MergeResults.ml +++ b/infer/src/base/MergeResults.ml @@ -33,7 +33,17 @@ WHERE |} in SqliteUtils.sqlite_unit_step - ~log:(Printf.sprintf "copying contents of database '%s'" db_file) + ~log:(Printf.sprintf "copying attributes of database '%s'" db_file) + copy_stmt + + +let merge_cfg_table ~db_file = + let copy_stmt = + Sqlite3.prepare (ResultsDatabase.get_database ()) + "INSERT OR REPLACE INTO cfg SELECT * FROM attached.cfg" + in + SqliteUtils.sqlite_unit_step + ~log:(Printf.sprintf "copying cfgs of database '%s'" db_file) copy_stmt @@ -43,6 +53,7 @@ let merge ~db_file = ~log:(Printf.sprintf "attaching database '%s'" db_file) (Sqlite3.exec main_db (Printf.sprintf "ATTACH '%s' AS attached" db_file)) ; merge_attributes_table ~db_file ; + merge_cfg_table ~db_file ; SqliteUtils.check_sqlite_error ~fatal:true ~log:(Printf.sprintf "detaching database '%s'" db_file) (Sqlite3.exec main_db "DETACH attached") ; diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index 334b8f8e2..3a2e296f0 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -20,7 +20,7 @@ let database_fullpath = Config.results_dir ^/ database_filename let create_attributes_table db = (* it would be nice to use "WITHOUT ROWID" here but ancient versions of sqlite do not support it *) - SqliteUtils.exec db ~log:"initializing results DB" + SqliteUtils.exec db ~log:"creating attributes table" ~stmt: {| CREATE TABLE IF NOT EXISTS attributes @@ -30,10 +30,20 @@ CREATE TABLE IF NOT EXISTS attributes , proc_attributes BLOB NOT NULL )|} +let create_cfg_table db = + SqliteUtils.exec db ~log:"creating cfg table" + ~stmt: + {| +CREATE TABLE IF NOT EXISTS cfg + ( source_file TEXT PRIMARY KEY + , cfgs BLOB NOT NULL )|} + + let create_db () = let temp_db = Filename.temp_file ~in_dir:Config.results_dir database_filename ".tmp" in let db = Sqlite3.db_open ~mutex:`FULL temp_db in create_attributes_table db ; + create_cfg_table db ; (* This should be the default but better be sure, otherwise we cannot access the database concurrently. This has to happen before setting WAL mode. *) SqliteUtils.exec db ~log:"locking mode=NORMAL" ~stmt:"PRAGMA locking_mode=NORMAL" ; ( match Config.sqlite_vfs with @@ -58,10 +68,12 @@ let on_close_database ~f = close_db_callbacks := f :: !close_db_callbacks let get_database () = Option.value_exn !database -let reset_attributes_table () = +let reset_capture_tables () = let db = get_database () in SqliteUtils.exec db ~log:"drop attributes table" ~stmt:"DROP TABLE attributes" ; - create_attributes_table db + create_attributes_table db ; + SqliteUtils.exec db ~log:"drop cfg table" ~stmt:"DROP TABLE cfg" ; + create_cfg_table db let db_canonicalize () = diff --git a/infer/src/base/ResultsDatabase.mli b/infer/src/base/ResultsDatabase.mli index e1fd6d9e9..fbc9ec028 100644 --- a/infer/src/base/ResultsDatabase.mli +++ b/infer/src/base/ResultsDatabase.mli @@ -16,8 +16,8 @@ val database_fullpath : string val get_database : unit -> Sqlite3.db (** The results database. You should always use this function to access the database, as the connection to it may change during the execution (see [new_database_connection]). *) -val reset_attributes_table : unit -> unit -(** zero out the attributes table *) +val reset_capture_tables : unit -> unit +(** zero out the tables associated with capture data *) val new_database_connection : unit -> unit (** Closes the previous connection to the database (if any), and opens a new one. Needed after calls to fork(2). *) diff --git a/infer/src/base/ResultsDir.ml b/infer/src/base/ResultsDir.ml index 453810b81..b2fd75e21 100644 --- a/infer/src/base/ResultsDir.ml +++ b/infer/src/base/ResultsDir.ml @@ -59,7 +59,7 @@ let assert_results_dir advice = let delete_capture_and_analysis_data () = - ResultsDatabase.reset_attributes_table () ; + ResultsDatabase.reset_capture_tables () ; let dirs_to_delete = List.map ~f:(Filename.concat Config.results_dir) Config.([captured_dir_name; specs_dir_name]) in diff --git a/infer/src/clang/cFrontend.ml b/infer/src/clang/cFrontend.ml index 66deacb8d..84927dddb 100644 --- a/infer/src/clang/cFrontend.ml +++ b/infer/src/clang/cFrontend.ml @@ -53,13 +53,10 @@ let do_source_file translation_unit_context ast = (* This could be moved in the cfg_infer module *) let source_dir = DB.source_dir_from_source_file source_file in let tenv_file = DB.source_dir_get_internal_file source_dir ".tenv" in - (* Naming scheme of .cfg file matters for OndemandCapture module. If it - changes here, it should be changed there as well*) - let cfg_file = DB.source_dir_get_internal_file source_dir ".cfg" in let cg_file = DB.source_dir_get_internal_file source_dir ".cg" in NullabilityPreanalysis.analysis cfg tenv ; Cg.store_to_file cg_file call_graph ; - Cfg.store_to_file ~source_file cfg_file cfg ; + Cfg.store source_file cfg ; Tenv.sort_fields_tenv tenv ; Tenv.store_to_file tenv_file tenv ; if Config.debug_mode then Cfg.check_cfg_connectedness cfg ; diff --git a/infer/src/integration/Driver.ml b/infer/src/integration/Driver.ml index c953e7616..67d6dd413 100644 --- a/infer/src/integration/Driver.ml +++ b/infer/src/integration/Driver.ml @@ -67,7 +67,7 @@ let clean_compilation_command mode = let clean_results_dir () = if not Config.flavors then (* we do not need to keep the capture data in Buck/Java mode *) - ResultsDatabase.reset_attributes_table () ; + ResultsDatabase.reset_capture_tables () ; ResultsDatabase.db_canonicalize () ; (* make sure we are done with the database *) ResultsDatabase.db_close () ; @@ -91,7 +91,7 @@ let clean_results_dir () = ; ResultsDatabase.database_filename ^ "-wal" ] in let suffixes_to_delete = - ".txt" :: ".csv" :: ".json" :: (if Config.flavors then [] else [".cfg"; ".cg"]) + ".txt" :: ".csv" :: ".json" :: (if Config.flavors then [] else [".cg"]) in fun name -> (* Keep the JSON report *) diff --git a/infer/src/java/jMain.ml b/infer/src/java/jMain.ml index f15d5b7f8..540e52df0 100644 --- a/infer/src/java/jMain.ml +++ b/infer/src/java/jMain.ml @@ -30,10 +30,9 @@ let init_global_state source_file = let store_icfg source_file cg cfg = let source_dir = DB.source_dir_from_source_file source_file in - let cfg_file = DB.source_dir_get_internal_file source_dir ".cfg" in let cg_file = DB.source_dir_get_internal_file source_dir ".cg" in Cg.store_to_file cg_file cg ; - Cfg.store_to_file ~source_file cfg_file cfg ; + Cfg.store source_file cfg ; if Config.debug_mode || Config.frontend_tests then ( Dotty.print_icfg_dotty source_file cfg ; Cg.save_call_graph_dotty source_file cg ) ;