From 1d3e0741acc83490dcaf5aeecadfa2c9163cbfc6 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Wed, 24 Jan 2018 07:24:59 -0800 Subject: [PATCH] [cfg] get defined procs of file from cfg db Summary: In preparation for getting rid of call graphs, we need to find another way to get the list of defined procedures (which is the only place where we use the globally-computed call graph for now). The natural way to get the list of procedures defined in a file is to load the cfg for that file and look at the proc names that are the keys of the cfg. This is way too expensive, as the CFG is big. Thus, we cache this list of proc names as another column in the SQLite database of cfgs. This gives good performance in benchmarks. Reviewed By: jeremydubreil Differential Revision: D6621142 fbshipit-source-id: ed265fe --- infer/src/IR/Cfg.ml | 6 +++++- infer/src/IR/Typ.ml | 4 ++++ infer/src/IR/Typ.mli | 2 ++ infer/src/backend/InferAnalyze.ml | 7 ++----- infer/src/backend/callbacks.ml | 5 ++--- infer/src/backend/callbacks.mli | 2 +- infer/src/backend/exe_env.ml | 6 ++---- infer/src/backend/exe_env.mli | 14 +++++++++----- infer/src/backend/printer.ml | 2 -- infer/src/base/ResultsDatabase.ml | 3 ++- infer/src/base/SourceFiles.ml | 14 ++++++++++++++ infer/src/base/SourceFiles.mli | 3 +++ 12 files changed, 46 insertions(+), 22 deletions(-) diff --git a/infer/src/IR/Cfg.ml b/infer/src/IR/Cfg.ml index 9e383f8cc..2e92e57fe 100644 --- a/infer/src/IR/Cfg.ml +++ b/infer/src/IR/Cfg.ml @@ -269,7 +269,8 @@ let mark_unchanged_pdescs cfg_new cfg_old = let store_statement = - ResultsDatabase.register_statement "INSERT OR REPLACE INTO source_files VALUES (:source, :cfgs)" + ResultsDatabase.register_statement + "INSERT OR REPLACE INTO source_files VALUES (:source, :cfgs, :proc_names)" let store source_file cfg = @@ -287,6 +288,9 @@ let store source_file cfg = SQLite.serialize cfg |> Sqlite3.bind store_stmt 2 (* :cfg *) |> SqliteUtils.check_sqlite_error db ~log:"store bind cfg" ; + get_all_proc_names cfg |> Typ.Procname.SQLiteList.serialize |> Sqlite3.bind store_stmt 3 + (* :proc_names *) + |> SqliteUtils.check_sqlite_error db ~log:"store bind proc names" ; SqliteUtils.sqlite_unit_step ~finalize:false ~log:"Cfg.store" db store_stmt ) diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 1c24cc93f..28f88a15a 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -1110,6 +1110,10 @@ module Procname = struct let default () = Sqlite3.Data.TEXT (to_filename pname) in Base.Hashtbl.find_or_add pname_to_key pname ~default end + + module SQLiteList = SqliteUtils.MarshalledData (struct + type nonrec t = t list + end) end module Fieldname = struct diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index 7d4b92a88..d585d3356 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -394,6 +394,8 @@ module Procname : sig val serialize : t -> Sqlite3.Data.t end + module SQLiteList : SqliteUtils.Data with type t = t list + val c : QualifiedCppName.t -> string -> template_spec_info -> is_generic_model:bool -> c (** Create a C procedure name from plain and mangled name. *) diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index 733478a22..a571f86b9 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -20,18 +20,15 @@ let analyze_exe_env_tasks cluster exe_env : Tasks.t = Random.self_init () ; Tasks.create [ (fun () -> - let call_graph = Exe_env.get_cg exe_env in - Callbacks.iterate_callbacks call_graph exe_env ; + Callbacks.iterate_callbacks exe_env ; if Config.write_html then Printer.write_all_html_files cluster ) ] (** Create tasks to analyze a cluster *) let analyze_cluster_tasks cluster_num (cluster: Cluster.t) : Tasks.t = let exe_env = Exe_env.mk cluster in - let defined_procs = Cg.get_defined_nodes (Exe_env.get_cg exe_env) in - let num_procs = List.length defined_procs in L.(debug Analysis Medium) - "@\nProcessing cluster #%d with %d procedures@." (cluster_num + 1) num_procs ; + "@\nProcessing cluster '%a' #%d@." SourceFile.pp cluster (cluster_num + 1) ; analyze_exe_env_tasks cluster exe_env diff --git a/infer/src/backend/callbacks.ml b/infer/src/backend/callbacks.ml index 74a149e97..7d4e11652 100644 --- a/infer/src/backend/callbacks.ml +++ b/infer/src/backend/callbacks.ml @@ -87,9 +87,8 @@ let iterate_cluster_callbacks all_procs exe_env get_proc_desc = (** Invoke all procedure and cluster callbacks on a given environment. *) -let iterate_callbacks call_graph exe_env = +let iterate_callbacks (exe_env: Exe_env.t) = let saved_language = !Config.curr_language in - (* Create and register on-demand analysis callback *) let get_proc_desc proc_name = match Exe_env.get_proc_desc exe_env proc_name with | Some pdesc -> @@ -104,7 +103,7 @@ let iterate_callbacks call_graph exe_env = (* Invoke procedure callbacks using on-demand analysis schedulling *) let procs_to_analyze = (* analyze all the currently defined procedures *) - Cg.get_defined_nodes call_graph + SourceFiles.proc_names_of_source exe_env.source_file in let analyze_proc_name pname = Option.iter diff --git a/infer/src/backend/callbacks.mli b/infer/src/backend/callbacks.mli index 08da5ee20..1bc9ede85 100644 --- a/infer/src/backend/callbacks.mli +++ b/infer/src/backend/callbacks.mli @@ -37,5 +37,5 @@ val register_procedure_callback : val register_cluster_callback : Config.language -> cluster_callback_t -> unit (** register a cluster callback *) -val iterate_callbacks : Cg.t -> Exe_env.t -> unit +val iterate_callbacks : Exe_env.t -> unit (** Invoke all the registered callbacks. *) diff --git a/infer/src/backend/exe_env.ml b/infer/src/backend/exe_env.ml index 1cdc8a8d9..23da08238 100644 --- a/infer/src/backend/exe_env.ml +++ b/infer/src/backend/exe_env.ml @@ -44,6 +44,8 @@ end) let new_file_data source cg_fname = let file_base = DB.chop_extension cg_fname in let tenv_file = tenv_filename file_base in + (* Do not fill in tenv and cfg as they can be quite large. This makes calls to fork() cheaper + until we start filling out these fields. *) { source ; tenv_file ; tenv= None (* Sil.load_tenv_from_file tenv_file *) @@ -60,7 +62,6 @@ let create_file_data table source cg_fname = file_data -(** execution environment *) type t = { cg: Cg.t (** global call graph *) ; proc_map: file_data Typ.Procname.Hash.t (** map from procedure name to file data *) @@ -97,9 +98,6 @@ let add_cg exe_env source = Cg.extend exe_env.cg cg -(** get the global call graph *) -let get_cg exe_env = exe_env.cg - let get_file_data exe_env pname = try Some (Typ.Procname.Hash.find exe_env.proc_map pname) with Not_found -> let source_file_opt = diff --git a/infer/src/backend/exe_env.mli b/infer/src/backend/exe_env.mli index 946b99df1..fa3583ea7 100644 --- a/infer/src/backend/exe_env.mli +++ b/infer/src/backend/exe_env.mli @@ -12,15 +12,19 @@ open! IStd (** Support for Execution environments *) -(** execution environment: a global call graph, and map from procedure names to cfg and tenv *) -type t +type file_data + +module FilenameHash : Caml.Hashtbl.S + +type t = private + { cg: Cg.t (** global call graph *) + ; proc_map: file_data Typ.Procname.Hash.t (** map from procedure name to file data *) + ; file_map: file_data FilenameHash.t (** map from cg fname to file data *) + ; source_file: SourceFile.t (** source file being analyzed *) } val mk : SourceFile.t -> t (** Create an exe_env from a source file *) -val get_cg : t -> Cg.t -(** get the global call graph *) - val get_tenv : t -> Typ.Procname.t -> Tenv.t (** return the type environment associated to the procedure *) diff --git a/infer/src/backend/printer.ml b/infer/src/backend/printer.ml index 88e8f6d59..68ed27256 100644 --- a/infer/src/backend/printer.ml +++ b/infer/src/backend/printer.ml @@ -529,8 +529,6 @@ let write_html_file linereader filename procs = (** Create filename.ext.html for each file in the cluster. *) let write_all_html_files cluster = let exe_env = Exe_env.mk cluster in - let load_proc_desc pname = ignore (Exe_env.get_proc_desc exe_env pname) in - let () = List.iter ~f:load_proc_desc (Cg.get_defined_nodes (Exe_env.get_cg exe_env)) in let opt_whitelist_regex = match Config.write_html_whitelist_regex with | [] -> diff --git a/infer/src/base/ResultsDatabase.ml b/infer/src/base/ResultsDatabase.ml index befb8d8c4..f816d0912 100644 --- a/infer/src/base/ResultsDatabase.ml +++ b/infer/src/base/ResultsDatabase.ml @@ -28,7 +28,8 @@ let procedures_schema = let source_files_schema = {|CREATE TABLE IF NOT EXISTS source_files ( source_file TEXT PRIMARY KEY - , cfgs BLOB NOT NULL )|} + , cfgs BLOB NOT NULL + , procedure_names BLOB NOT NULL )|} let schema_hum = Printf.sprintf "%s;\n%s" procedures_schema source_files_schema diff --git a/infer/src/base/SourceFiles.ml b/infer/src/base/SourceFiles.ml index 505d9757d..5ee239322 100644 --- a/infer/src/base/SourceFiles.ml +++ b/infer/src/base/SourceFiles.ml @@ -15,6 +15,20 @@ let get_all () = |> List.filter_map ~f:(Option.map ~f:SourceFile.SQLite.deserialize) +let load_proc_names_statement = + ResultsDatabase.register_statement + "SELECT procedure_names FROM source_files WHERE source_file = :k" + + +let proc_names_of_source source = + ResultsDatabase.with_registered_statement load_proc_names_statement ~f:(fun db load_stmt -> + SourceFile.SQLite.serialize source |> Sqlite3.bind load_stmt 1 + |> SqliteUtils.check_sqlite_error db ~log:"load bind source file" ; + SqliteUtils.sqlite_result_step ~finalize:false db ~log:"SourceFiles.proc_names_of_source" + load_stmt + |> Option.value_map ~default:[] ~f:Typ.Procname.SQLiteList.deserialize ) + + let exists_statement = ResultsDatabase.register_statement "SELECT 1 FROM source_files WHERE source_file = :k" diff --git a/infer/src/base/SourceFiles.mli b/infer/src/base/SourceFiles.mli index 5b179eb58..331ed7d59 100644 --- a/infer/src/base/SourceFiles.mli +++ b/infer/src/base/SourceFiles.mli @@ -10,5 +10,8 @@ val get_all : unit -> SourceFile.t list (** get all the source files in the database *) +val proc_names_of_source : SourceFile.t -> Typ.Procname.t list +(** list of all the proc names (declared and defined) found in a source file *) + val is_captured : SourceFile.t -> bool (** has the source file been captured? *)