From e01311c431dcd641e2a7825f78ba4f9fb2dcd707 Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Wed, 26 Feb 2020 04:10:31 -0800 Subject: [PATCH] [scheduler][callgraph] load graph directly from DB Summary: Currently the call graph of all captured procedures is loaded and then traversed to flag reachable procedures from modified files, followed by deleting the unflagged part, and unflagging the rest. This is a bit wasteful, and doesn't lend itself nicely to constructing directly the reverse call graph, which further diffs will do. This diff loads all captured procedures and callees in a hashconsed table, and performs a BFS from procedures in modified files, to build the call graph in one pass. Reviewed By: fgasperij Differential Revision: D19888965 fbshipit-source-id: eeb59356e --- infer/src/IR/Procname.ml | 5 ++ infer/src/IR/Procname.mli | 2 + infer/src/backend/CallGraph.ml | 13 +--- infer/src/backend/CallGraph.mli | 9 +-- infer/src/backend/SyntacticCallGraph.ml | 94 ++++++++++++++++++------- 5 files changed, 82 insertions(+), 41 deletions(-) diff --git a/infer/src/IR/Procname.ml b/infer/src/IR/Procname.ml index ff151b24b..69fcdcde3 100644 --- a/infer/src/IR/Procname.ml +++ b/infer/src/IR/Procname.ml @@ -734,10 +734,15 @@ module Hashable = struct let equal = equal + let compare = compare + let hash = hash + + let sexp_of_t t = Sexp.of_string (to_string t) end module Hash = Hashtbl.Make (Hashable) +module HashQueue = Hash_queue.Make (Hashable) module Map = PrettyPrintable.MakePPMap (struct type nonrec t = t diff --git a/infer/src/IR/Procname.mli b/infer/src/IR/Procname.mli index dc42980f2..7100ca9bd 100644 --- a/infer/src/IR/Procname.mli +++ b/infer/src/IR/Procname.mli @@ -223,6 +223,8 @@ val is_objc_method : t -> bool module Hash : Caml.Hashtbl.S with type key = t (** Hash tables with proc names as keys. *) +module HashQueue : Hash_queue.S with type key = t + module Map : PrettyPrintable.PPMap with type key = t (** Maps from proc names. *) diff --git a/infer/src/backend/CallGraph.ml b/infer/src/backend/CallGraph.ml index c41efe058..4c40c7a56 100644 --- a/infer/src/backend/CallGraph.ml +++ b/infer/src/backend/CallGraph.ml @@ -64,6 +64,8 @@ let node_of_id {node_map} id = NodeMap.find_opt node_map id let mem {node_map} id = NodeMap.mem node_map id +let mem_procname g pname = id_of_procname g pname |> Option.exists ~f:(mem g) + (** [id_map] may contain undefined procedures, so use [node_map] for actual size *) let n_procs {node_map} = NodeMap.length node_map @@ -124,10 +126,6 @@ let flag_reachable g start_pname = node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> flag_list [start_node]) -let trim_id_map (g : t) = - IdMap.filter_map_inplace (fun _pname id -> Option.some_if (mem g id) id) g.id_map - - let pp_dot fmt {node_map} = F.fprintf fmt "@\ndigraph callgraph {@\n" ; NodeMap.iter (fun _id n -> Node.pp_dot fmt n) node_map ; @@ -140,13 +138,6 @@ let to_dotty g filename = pp_dot fmt g ; Out_channel.close outc -let remove_unflagged_and_unflag_all {id_map; node_map} = - NodeMap.filter_map_inplace - (fun _id (n : Node.t) -> - if n.flag then (Node.unset_flag n ; Some n) else (IdMap.remove id_map n.pname ; None) ) - node_map - - let iter_unflagged_leaves ~f g = NodeMap.iter (fun _id (n : Node.t) -> if not (n.flag || List.exists n.successors ~f:(mem g)) then f n) diff --git a/infer/src/backend/CallGraph.mli b/infer/src/backend/CallGraph.mli index 7e24e6803..d2a2a74b3 100644 --- a/infer/src/backend/CallGraph.mli +++ b/infer/src/backend/CallGraph.mli @@ -37,6 +37,9 @@ val n_procs : t -> int val mem : t -> int -> bool (** is an int [id] the index of a node in the graph? *) +val mem_procname : t -> Procname.t -> bool +(** is there a node for [procname] in the graph? *) + val flag : t -> Procname.t -> unit val flag_reachable : t -> Procname.t -> unit @@ -50,12 +53,6 @@ val remove : t -> Procname.t -> unit val to_dotty : t -> string -> unit (** output call graph in dotty format with the given filename in results dir *) -val trim_id_map : t -> unit -(** remove all pnames that do not correspond to a defined procedure from id_map *) - -val remove_unflagged_and_unflag_all : t -> unit -(** remove all nodes with flag set to false, and set flag to false on all remaining nodes *) - val add_edge : t -> pname:Procname.t -> successor_pname:Procname.t -> unit (** add an edge from [pname] to [successor_pname] in the graph, creating a node for [pname] if there isn't one already *) diff --git a/infer/src/backend/SyntacticCallGraph.ml b/infer/src/backend/SyntacticCallGraph.ml index 7a816fd11..a400b65f7 100644 --- a/infer/src/backend/SyntacticCallGraph.ml +++ b/infer/src/backend/SyntacticCallGraph.ml @@ -6,42 +6,88 @@ *) open! IStd module L = Logging -module IdMap = Procname.Hash - -let build_from_captured_procs g = - let hashcons_pname = - let pname_tbl : Procname.t IdMap.t = IdMap.create 1001 in - fun pname -> - match IdMap.find_opt pname_tbl pname with - | Some pname' -> - pname' - | None -> - IdMap.add pname_tbl pname pname ; pname - in + +let iter_captured_procs_and_callees f = let db = ResultsDatabase.get_database () in (* only load procedure info for those we have a CFG *) let stmt = Sqlite3.prepare db "SELECT proc_name, callees FROM procedures WHERE cfg IS NOT NULL and attr_kind != 0" in - SqliteUtils.result_fold_rows db ~log:"creating call graph" stmt ~init:() ~f:(fun () stmt -> - let proc_name = Sqlite3.column stmt 0 |> Procname.SQLite.deserialize |> hashcons_pname in - let callees = - Sqlite3.column stmt 1 |> Procname.SQLiteList.deserialize |> List.map ~f:hashcons_pname - in - CallGraph.create_node g proc_name callees ) + SqliteUtils.result_fold_rows db ~log:"loading captured procs" stmt ~init:() ~f:(fun () stmt -> + let proc_name = Sqlite3.column stmt 0 |> Procname.SQLite.deserialize in + let callees : Procname.t list = Sqlite3.column stmt 1 |> Procname.SQLiteList.deserialize in + f proc_name callees ) + + +type hashconsed_procname_info = + {name: Procname.t; mutable defined: bool; mutable callees: Procname.t list} + +let hashcons_pname pname_info pname = + match Procname.Hash.find_opt pname_info pname with + | Some {name} -> + name + | None -> + Procname.Hash.add pname_info pname {name= pname; defined= false; callees= []} ; + pname + + +let hashcons_and_update_pname pname_info pname callees = + let callees = List.map ~f:(hashcons_pname pname_info) callees in + match Procname.Hash.find_opt pname_info pname with + | Some info when info.defined -> + L.die InternalError "SyntacticCallGraph: Tried to define %a twice.@." Procname.pp pname + | Some info -> + info.callees <- callees ; + info.defined <- true + | None -> + Procname.Hash.add pname_info pname {name= pname; defined= true; callees} + + +(* load and hashcons all captured procs and their callees ; return also number of defined procs *) +let pname_info_from_captured_procs () = + let pname_info = Procname.Hash.create 1009 in + let n_captured = ref 0 in + iter_captured_procs_and_callees (fun pname callees -> + incr n_captured ; + hashcons_and_update_pname pname_info pname callees ) ; + (pname_info, !n_captured) + + +let enqueue q pname = Procname.HashQueue.enqueue_back q pname pname |> ignore + +let dequeue q = Procname.HashQueue.dequeue_front q + +let queue_from_sources pname_info sources = + let q = Procname.HashQueue.create () in + List.iter sources ~f:(fun sf -> + SourceFiles.proc_names_of_source sf + |> List.iter ~f:(fun pname -> hashcons_pname pname_info pname |> enqueue q) ) ; + q + + +let rec bfs pname_info g q = + match dequeue q with + | Some pname -> + ( match Procname.Hash.find_opt pname_info pname with + | Some {defined= true; callees} -> + CallGraph.create_node g pname callees ; + List.iter callees ~f:(fun pname -> + if not (CallGraph.mem_procname g pname) then enqueue q pname ) + | _ -> + () ) ; + bfs pname_info g q + | _ -> + () let build_from_sources sources = let g = CallGraph.create CallGraph.default_initial_capacity in let time0 = Mtime_clock.counter () in L.progress "Building call graph...@\n%!" ; - build_from_captured_procs g ; - let n_captured = CallGraph.n_procs g in - List.iter sources ~f:(fun sf -> - SourceFiles.proc_names_of_source sf |> List.iter ~f:(CallGraph.flag_reachable g) ) ; - CallGraph.remove_unflagged_and_unflag_all g ; - CallGraph.trim_id_map g ; + let pname_info, n_captured = pname_info_from_captured_procs () in + let q = queue_from_sources pname_info sources in + bfs pname_info g q ; if Config.debug_level_analysis > 0 then CallGraph.to_dotty g "syntactic_callgraph.dot" ; L.progress "Built call graph in %a, from %d total procs, %d reachable defined procs and takes %d bytes@."