From 4c002f0c98ebe76184c5b1a5cdb256e4c208d378 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Tue, 27 Feb 2018 09:28:57 -0800 Subject: [PATCH] [sqlite] migrate tenv to sqlite Summary: Instead of storing the type environment in infer-out/captured/foo.c/foo.c.tenv, store it in the `source_files` table of the SQLite db. This limits the number of files we create on disk. The "file local" type environemnts are specific to the clang integration. For Java, there is a "global tenv" file. Instead of matching on string names, this diff also makes the API of `Tenv` reflect this situation. The global tenv is serialized as a separate file in "infer-out/.global.tenv" instead of "infer-out/captured/global.tenv", because "infer-out/captured/" will soon be removed as it now only contains the global tenv (except in debug mode, where it will still be created). In the DB, we either store the local tenv for the file, or "global" to indicate that the global tenv should be consulted. This diff also moves `Cfg.store` to `SourceFiles.add` because that function deals with more than just `Cfg.t`. Reviewed By: jeremydubreil Differential Revision: D6937945 fbshipit-source-id: 001c10a --- .../lib/python/inferlib/capture/ndk-build.py | 9 ---- infer/src/IR/Cfg.ml | 34 +------------ infer/src/IR/Cfg.mli | 15 ++++-- infer/src/IR/Tenv.ml | 48 ++++++++++++++----- infer/src/IR/Tenv.mli | 7 +-- infer/src/base/Config.ml | 2 +- infer/src/base/DB.ml | 14 ++---- infer/src/base/DB.mli | 3 -- infer/src/base/MergeResults.ml | 2 +- infer/src/base/ResultsDatabase.ml | 1 + infer/src/base/ResultsDir.ml | 4 +- infer/src/base/SourceFiles.ml | 38 +++++++++++++++ infer/src/base/SourceFiles.mli | 3 ++ infer/src/clang/cFrontend.ml | 3 +- infer/src/java/jMain.ml | 2 +- 15 files changed, 104 insertions(+), 81 deletions(-) diff --git a/infer/lib/python/inferlib/capture/ndk-build.py b/infer/lib/python/inferlib/capture/ndk-build.py index 00054910d..b4ab46403 100644 --- a/infer/lib/python/inferlib/capture/ndk-build.py +++ b/infer/lib/python/inferlib/capture/ndk-build.py @@ -60,15 +60,6 @@ class NdkBuildCapture(): cmd = map(utils.encode, self.cmd) logging.info('Running command %s with env:\n%s' % (cmd, env)) subprocess.check_call(cmd, env=env) - capture_dir = os.path.join(self.args.infer_out, 'captured') - if len(os.listdir(capture_dir)) < 1: - # Don't return with a failure code unless we're - # running make. It could be normal to have captured - # nothing (eg, empty source file). Further output will - # alert the user that there was nothing to analyze. - if self.cmd[0] == 'make': - # reuse code from gradle, etc. integration - return util.run_compilation_commands([], 'make clean') return os.EX_OK except subprocess.CalledProcessError as exc: if self.args.debug: diff --git a/infer/src/IR/Cfg.ml b/infer/src/IR/Cfg.ml index c613aa0df..b11d9130f 100644 --- a/infer/src/IR/Cfg.ml +++ b/infer/src/IR/Cfg.ml @@ -99,7 +99,6 @@ let load source = |> Option.map ~f:SQLite.deserialize ) -(** Save the .attr files for the procedures in the cfg. *) let save_attributes source_file cfg = let save_proc _ pdesc = let attributes = Procdesc.get_attributes pdesc in @@ -202,14 +201,12 @@ let proc_inline_synthetic_methods cfg pdesc : unit = Procdesc.iter_nodes node_inline_synthetic_methods pdesc -(** Inline the java synthetic methods in the cfg *) let inline_java_synthetic_methods cfg = let f pname pdesc = if Typ.Procname.is_java pname then proc_inline_synthetic_methods cfg pdesc in Typ.Procname.Hash.iter f cfg -(** compute the list of procedures added or changed in [cfg_new] over [cfg_old] *) -let mark_unchanged_pdescs cfg_new cfg_old = +let mark_unchanged_pdescs ~cfg_old ~cfg_new = let pdescs_eq (pd1: Procdesc.t) (pd2: Procdesc.t) = (* map of exp names in pd1 -> exp names in pd2 *) let exp_map = ref Exp.Map.empty in @@ -268,35 +265,6 @@ let mark_unchanged_pdescs cfg_new cfg_old = Typ.Procname.Hash.iter mark_pdesc_if_unchanged cfg_new -let store_statement = - ResultsDatabase.register_statement - "INSERT OR REPLACE INTO source_files VALUES (:source, :cfgs, :proc_names, :timestamp)" - - -let store source_file cfg = - inline_java_synthetic_methods cfg ; - ( if Config.incremental_procs then - match load source_file with Some old_cfg -> mark_unchanged_pdescs cfg old_cfg | None -> () ) ; - (* NOTE: it's important to write attribute files to disk before writing cfgs to disk. - OndemandCapture module relies on it - it uses existance of the cfg as a barrier to make - sure that all attributes were written to disk (but not necessarily flushed) *) - save_attributes source_file cfg ; - ResultsDatabase.with_registered_statement store_statement ~f:(fun db store_stmt -> - SourceFile.SQLite.serialize source_file |> Sqlite3.bind store_stmt 1 - (* :source *) - |> SqliteUtils.check_sqlite_error db ~log:"store bind source file" ; - SQLite.serialize cfg |> Sqlite3.bind store_stmt 2 - (* :cfg *) - |> SqliteUtils.check_sqlite_error db ~log:"store bind cfg" ; - get_all_proc_names cfg |> Typ.Procname.SQLiteList.serialize |> Sqlite3.bind store_stmt 3 - (* :proc_names *) - |> SqliteUtils.check_sqlite_error db ~log:"store bind proc names" ; - Sqlite3.bind store_stmt 4 (Sqlite3.Data.INT Int64.one) - (* :freshly_captured *) - |> SqliteUtils.check_sqlite_error db ~log:"store freshness" ; - SqliteUtils.sqlite_unit_step ~finalize:false ~log:"Cfg.store" db store_stmt ) - - let pp_proc_signatures fmt cfg = F.fprintf fmt "METHOD SIGNATURES@\n@." ; let sorted_procs = List.sort ~cmp:Procdesc.compare (get_all_proc_descs cfg) in diff --git a/infer/src/IR/Cfg.mli b/infer/src/IR/Cfg.mli index d4a1ab19e..361442b19 100644 --- a/infer/src/IR/Cfg.mli +++ b/infer/src/IR/Cfg.mli @@ -18,9 +18,6 @@ type t = Procdesc.t Typ.Procname.Hash.t val load : SourceFile.t -> t option (** Load the cfgs of the procedures of a source file *) -val store : SourceFile.t -> t -> unit -(** Save a cfg into the database *) - val get_all_proc_names : t -> Typ.Procname.t list (** get all the keys from the hashtable *) @@ -38,4 +35,16 @@ val iter_all_nodes : ?sorted:bool -> (Procdesc.t -> Procdesc.Node.t -> unit) -> val check_cfg_connectedness : t -> unit (** checks whether a cfg is connected or not *) +val save_attributes : SourceFile.t -> t -> unit +(** Save the .attr files for the procedures in the cfg. *) + +val inline_java_synthetic_methods : t -> unit +(** Inline the java synthetic methods in the cfg (in-place) *) + +val mark_unchanged_pdescs : cfg_old:t -> cfg_new:t -> unit +(** compute the list of procedures added or changed in [cfg_new] over [cfg_old] and record the + [changed] attribute in-place in the new cfg. *) + val pp_proc_signatures : Format.formatter -> t -> unit + +module SQLite : SqliteUtils.Data with type t = t diff --git a/infer/src/IR/Tenv.ml b/infer/src/IR/Tenv.ml index c4a3f85f6..32d34de55 100644 --- a/infer/src/IR/Tenv.ml +++ b/infer/src/IR/Tenv.ml @@ -71,22 +71,40 @@ let add_field tenv class_tn_name field = () -(** Serializer for type environments *) -let tenv_serializer : t Serialization.serializer = - Serialization.create_serializer Serialization.Key.tenv +type per_file = Global | FileLocal of t +module SQLite : SqliteUtils.Data with type t = per_file = struct + type t = per_file -let global_tenv : t option ref = ref None + let global_string = "global" -let tenv_filename_of_source_file source_file = - DB.source_dir_get_internal_file (DB.source_dir_from_source_file source_file) ".tenv" + let serialize = function + | Global -> + Sqlite3.Data.TEXT global_string + | FileLocal tenv -> + Sqlite3.Data.BLOB (Marshal.to_string tenv []) -let load source_file : t option = - tenv_filename_of_source_file source_file |> Serialization.read_from_file tenv_serializer + let deserialize = function[@warning "-8"] + | Sqlite3.Data.TEXT g when String.equal g global_string -> + Global + | Sqlite3.Data.BLOB b -> + FileLocal (Marshal.from_string b 0) +end +let load_statement = + ResultsDatabase.register_statement + "SELECT type_environment FROM source_files WHERE source_file = :k" -let global_tenv_path = Config.(captured_dir ^/ global_tenv_filename) |> DB.filename_from_string + +(** Serializer for type environments *) +let tenv_serializer : t Serialization.serializer = + Serialization.create_serializer Serialization.Key.tenv + + +let global_tenv : t option ref = ref None + +let global_tenv_path = Config.(results_dir ^/ global_tenv_filename) |> DB.filename_from_string let load_global () : t option = if is_none !global_tenv then @@ -94,6 +112,16 @@ let load_global () : t option = !global_tenv +let load source = + ResultsDatabase.with_registered_statement load_statement ~f:(fun db load_stmt -> + SourceFile.SQLite.serialize source |> Sqlite3.bind load_stmt 1 + |> SqliteUtils.check_sqlite_error db ~log:"load bind source file" ; + SqliteUtils.sqlite_result_step ~finalize:false ~log:"Tenv.load" db load_stmt + |> Option.bind ~f:(fun x -> + SQLite.deserialize x + |> function Global -> load_global () | FileLocal tenv -> Some tenv ) ) + + let store_to_filename tenv tenv_filename = Serialization.write_to_file tenv_serializer tenv_filename ~data:tenv ; if Config.debug_mode then ( @@ -103,8 +131,6 @@ let store_to_filename tenv tenv_filename = Format.fprintf fmt "%a" pp tenv ; Out_channel.close out_channel ) -let store source_file tenv = tenv_filename_of_source_file source_file |> store_to_filename tenv - let store_global tenv = (* update in-memory global tenv for later uses by this process, e.g. in single-core mode the frontend and backend run in the same process *) diff --git a/infer/src/IR/Tenv.mli b/infer/src/IR/Tenv.mli index 5895d97a0..34037a6d9 100644 --- a/infer/src/IR/Tenv.mli +++ b/infer/src/IR/Tenv.mli @@ -23,9 +23,6 @@ val load : SourceFile.t -> t option val load_global : unit -> t option (** load the global type environment (Java) *) -val store : SourceFile.t -> t -> unit -(** Save a type environment into a file *) - val store_global : t -> unit (** save a global type environment (Java) *) @@ -46,3 +43,7 @@ val pp : Format.formatter -> t -> unit [@@warning "-32"] val language_is : t -> Language.t -> bool (** Test the language from which the types in the tenv were translated *) + +type per_file = Global | FileLocal of t + +module SQLite : SqliteUtils.Data with type t = per_file diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index 5f53c21da..96ae2aac0 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -185,7 +185,7 @@ let fail_on_issue_exit_code = 2 let frontend_stats_dir_name = "frontend_stats" -let global_tenv_filename = "global.tenv" +let global_tenv_filename = ".global.tenv" (** If true, treat calls to no-arg getters as idempotent w.r.t non-nullness *) let idempotent_getters = true diff --git a/infer/src/base/DB.ml b/infer/src/base/DB.ml index 577909fa8..cf12faa96 100644 --- a/infer/src/base/DB.ml +++ b/infer/src/base/DB.ml @@ -60,15 +60,6 @@ type source_dir = string [@@deriving compare] (** expose the source dir as a string *) let source_dir_to_string source_dir = source_dir -(** get the path to an internal file with the given extention (.tenv, ...) *) -let source_dir_get_internal_file source_dir extension = - let source_dir_name = - append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) - in - let fname = source_dir_name ^ extension in - Filename.concat source_dir fname - - (** get the source directory corresponding to a source file *) let source_dir_from_source_file source_file = Filename.concat Config.captured_dir (source_file_encoding source_file) @@ -193,8 +184,9 @@ module Results_dir = struct if SourceFile.is_invalid source then L.(die InternalError) "Invalid source file passed" ; Utils.create_dir Config.results_dir ; Utils.create_dir specs_dir ; - Utils.create_dir (path_to_filename Abs_root [Config.captured_dir_name]) ; - Utils.create_dir (path_to_filename (Abs_source_dir source) []) + if Config.html || Config.debug_mode || Config.frontend_tests then ( + Utils.create_dir (path_to_filename Abs_root [Config.captured_dir_name]) ; + Utils.create_dir (path_to_filename (Abs_source_dir source) []) ) let clean_specs_dir () = diff --git a/infer/src/base/DB.mli b/infer/src/base/DB.mli index c01c6cf84..b992c7538 100644 --- a/infer/src/base/DB.mli +++ b/infer/src/base/DB.mli @@ -81,9 +81,6 @@ type source_dir [@@deriving compare] val source_dir_to_string : source_dir -> string (** expose the source dir as a string *) -val source_dir_get_internal_file : source_dir -> string -> filename -(** get the path to an internal file with the given extention (.tenv, ...) *) - val source_dir_from_source_file : SourceFile.t -> source_dir (** get the source directory corresponding to a source file *) diff --git a/infer/src/base/MergeResults.ml b/infer/src/base/MergeResults.ml index 5b027ee76..3395d4761 100644 --- a/infer/src/base/MergeResults.ml +++ b/infer/src/base/MergeResults.ml @@ -37,7 +37,7 @@ let merge_source_files_table ~db_file = Sqlite3.exec db {| INSERT OR REPLACE INTO source_files - SELECT source_file, cfgs, procedure_names, 1 + SELECT source_file, cfgs, type_environment, procedure_names, 1 FROM attached.source_files |} |> SqliteUtils.check_sqlite_error db diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index 447f28dc8..2f3318ef7 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -29,6 +29,7 @@ let source_files_schema = {|CREATE TABLE IF NOT EXISTS source_files ( source_file TEXT PRIMARY KEY , cfgs BLOB NOT NULL + , type_environment BLOB NOT NULL , procedure_names BLOB NOT NULL , freshly_captured INT NOT NULL )|} diff --git a/infer/src/base/ResultsDir.ml b/infer/src/base/ResultsDir.ml index af8edb639..9160816ce 100644 --- a/infer/src/base/ResultsDir.ml +++ b/infer/src/base/ResultsDir.ml @@ -11,9 +11,7 @@ open! PVariant module CLOpt = CommandLineOption module L = Logging -let results_dir_dir_markers = - List.map ~f:(Filename.concat Config.results_dir) [Config.captured_dir_name; Config.specs_dir_name] - +let results_dir_dir_markers = [Config.results_dir ^/ Config.specs_dir_name] let is_results_dir ~check_correct_version () = let not_found = ref "" in diff --git a/infer/src/base/SourceFiles.ml b/infer/src/base/SourceFiles.ml index cd07027d3..f03d370a8 100644 --- a/infer/src/base/SourceFiles.ml +++ b/infer/src/base/SourceFiles.ml @@ -9,6 +9,44 @@ open! IStd module L = Logging +let store_statement = + ResultsDatabase.register_statement + {| + INSERT OR REPLACE INTO source_files + VALUES (:source, :cfgs, :tenv, :proc_names, :freshly_captured) |} + + +let add source_file cfg tenv = + Cfg.inline_java_synthetic_methods cfg ; + ( if Config.incremental_procs then + match Cfg.load source_file with + | Some cfg_old -> + Cfg.mark_unchanged_pdescs ~cfg_old ~cfg_new:cfg + | None -> + () ) ; + (* NOTE: it's important to write attribute files to disk before writing cfgs to disk. + OndemandCapture module relies on it - it uses existance of the cfg as a barrier to make + sure that all attributes were written to disk (but not necessarily flushed) *) + Cfg.save_attributes source_file cfg ; + ResultsDatabase.with_registered_statement store_statement ~f:(fun db store_stmt -> + SourceFile.SQLite.serialize source_file |> Sqlite3.bind store_stmt 1 + (* :source *) + |> SqliteUtils.check_sqlite_error db ~log:"store bind source file" ; + Cfg.SQLite.serialize cfg |> Sqlite3.bind store_stmt 2 + (* :cfg *) + |> SqliteUtils.check_sqlite_error db ~log:"store bind cfg" ; + Tenv.SQLite.serialize tenv |> Sqlite3.bind store_stmt 3 + (* :tenv *) + |> SqliteUtils.check_sqlite_error db ~log:"store bind type environment" ; + Cfg.get_all_proc_names cfg |> Typ.Procname.SQLiteList.serialize |> Sqlite3.bind store_stmt 4 + (* :proc_names *) + |> SqliteUtils.check_sqlite_error db ~log:"store bind proc names" ; + Sqlite3.bind store_stmt 5 (Sqlite3.Data.INT Int64.one) + (* :freshly_captured *) + |> SqliteUtils.check_sqlite_error db ~log:"store freshness" ; + SqliteUtils.sqlite_unit_step ~finalize:false ~log:"Cfg.store" db store_stmt ) + + let get_all () = let db = ResultsDatabase.get_database () in Sqlite3.prepare db "SELECT source_file FROM source_files" diff --git a/infer/src/base/SourceFiles.mli b/infer/src/base/SourceFiles.mli index be98f3df6..6e038cad0 100644 --- a/infer/src/base/SourceFiles.mli +++ b/infer/src/base/SourceFiles.mli @@ -7,6 +7,9 @@ * of patent rights can be found in the PATENTS file in the same directory. *) +val add : SourceFile.t -> Cfg.t -> Tenv.per_file -> unit +(** Add or replace the row corresponding to the source file into the database. *) + val get_all : unit -> SourceFile.t list (** get all the source files in the database *) diff --git a/infer/src/clang/cFrontend.ml b/infer/src/clang/cFrontend.ml index 86efb428d..6b1e2b0a2 100644 --- a/infer/src/clang/cFrontend.ml +++ b/infer/src/clang/cFrontend.ml @@ -51,8 +51,7 @@ let do_source_file (translation_unit_context: CFrontend_config.translation_unit_ (* This part below is a boilerplate in every frontends. *) (* This could be moved in the cfg_infer module *) NullabilityPreanalysis.analysis cfg tenv ; - Cfg.store source_file cfg ; - Tenv.store source_file tenv ; + SourceFiles.add source_file cfg (FileLocal tenv) ; if Config.debug_mode then Cfg.check_cfg_connectedness cfg ; if Config.debug_mode || Config.testing_mode || Config.frontend_tests || Option.is_some Config.icfg_dotty_outfile diff --git a/infer/src/java/jMain.ml b/infer/src/java/jMain.ml index c15647c9f..5269d117c 100644 --- a/infer/src/java/jMain.ml +++ b/infer/src/java/jMain.ml @@ -29,7 +29,7 @@ let init_global_state source_file = let store_icfg source_file cfg = - Cfg.store source_file cfg ; + SourceFiles.add source_file cfg Tenv.Global ; if Config.debug_mode || Config.frontend_tests then Dotty.print_icfg_dotty source_file cfg ; ()