[scheduler][callgraph] load graph directly from DB

Summary:
Currently the call graph of all captured procedures is loaded and then traversed to flag reachable procedures from modified files, followed by deleting the unflagged part, and unflagging the rest.  This is a bit wasteful, and doesn't lend itself nicely to constructing directly the reverse call graph, which further diffs will do.

This diff loads all captured procedures and callees in a hashconsed table, and performs a BFS from procedures in modified files, to build the call graph in one pass.

Reviewed By: fgasperij

Differential Revision: D19888965

fbshipit-source-id: eeb59356e
master
Nikos Gorogiannis 5 years ago committed by Facebook Github Bot
parent 3f4458361c
commit e01311c431

@ -734,10 +734,15 @@ module Hashable = struct
let equal = equal
let compare = compare
let hash = hash
let sexp_of_t t = Sexp.of_string (to_string t)
end
module Hash = Hashtbl.Make (Hashable)
module HashQueue = Hash_queue.Make (Hashable)
module Map = PrettyPrintable.MakePPMap (struct
type nonrec t = t

@ -223,6 +223,8 @@ val is_objc_method : t -> bool
module Hash : Caml.Hashtbl.S with type key = t
(** Hash tables with proc names as keys. *)
module HashQueue : Hash_queue.S with type key = t
module Map : PrettyPrintable.PPMap with type key = t
(** Maps from proc names. *)

@ -64,6 +64,8 @@ let node_of_id {node_map} id = NodeMap.find_opt node_map id
let mem {node_map} id = NodeMap.mem node_map id
let mem_procname g pname = id_of_procname g pname |> Option.exists ~f:(mem g)
(** [id_map] may contain undefined procedures, so use [node_map] for actual size *)
let n_procs {node_map} = NodeMap.length node_map
@ -124,10 +126,6 @@ let flag_reachable g start_pname =
node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> flag_list [start_node])
let trim_id_map (g : t) =
IdMap.filter_map_inplace (fun _pname id -> Option.some_if (mem g id) id) g.id_map
let pp_dot fmt {node_map} =
F.fprintf fmt "@\ndigraph callgraph {@\n" ;
NodeMap.iter (fun _id n -> Node.pp_dot fmt n) node_map ;
@ -140,13 +138,6 @@ let to_dotty g filename =
pp_dot fmt g ; Out_channel.close outc
let remove_unflagged_and_unflag_all {id_map; node_map} =
NodeMap.filter_map_inplace
(fun _id (n : Node.t) ->
if n.flag then (Node.unset_flag n ; Some n) else (IdMap.remove id_map n.pname ; None) )
node_map
let iter_unflagged_leaves ~f g =
NodeMap.iter
(fun _id (n : Node.t) -> if not (n.flag || List.exists n.successors ~f:(mem g)) then f n)

@ -37,6 +37,9 @@ val n_procs : t -> int
val mem : t -> int -> bool
(** is an int [id] the index of a node in the graph? *)
val mem_procname : t -> Procname.t -> bool
(** is there a node for [procname] in the graph? *)
val flag : t -> Procname.t -> unit
val flag_reachable : t -> Procname.t -> unit
@ -50,12 +53,6 @@ val remove : t -> Procname.t -> unit
val to_dotty : t -> string -> unit
(** output call graph in dotty format with the given filename in results dir *)
val trim_id_map : t -> unit
(** remove all pnames that do not correspond to a defined procedure from id_map *)
val remove_unflagged_and_unflag_all : t -> unit
(** remove all nodes with flag set to false, and set flag to false on all remaining nodes *)
val add_edge : t -> pname:Procname.t -> successor_pname:Procname.t -> unit
(** add an edge from [pname] to [successor_pname] in the graph, creating a node for [pname] if there
isn't one already *)

@ -6,42 +6,88 @@
*)
open! IStd
module L = Logging
module IdMap = Procname.Hash
let build_from_captured_procs g =
let hashcons_pname =
let pname_tbl : Procname.t IdMap.t = IdMap.create 1001 in
fun pname ->
match IdMap.find_opt pname_tbl pname with
| Some pname' ->
pname'
| None ->
IdMap.add pname_tbl pname pname ; pname
in
let iter_captured_procs_and_callees f =
let db = ResultsDatabase.get_database () in
(* only load procedure info for those we have a CFG *)
let stmt =
Sqlite3.prepare db
"SELECT proc_name, callees FROM procedures WHERE cfg IS NOT NULL and attr_kind != 0"
in
SqliteUtils.result_fold_rows db ~log:"creating call graph" stmt ~init:() ~f:(fun () stmt ->
let proc_name = Sqlite3.column stmt 0 |> Procname.SQLite.deserialize |> hashcons_pname in
let callees =
Sqlite3.column stmt 1 |> Procname.SQLiteList.deserialize |> List.map ~f:hashcons_pname
in
CallGraph.create_node g proc_name callees )
SqliteUtils.result_fold_rows db ~log:"loading captured procs" stmt ~init:() ~f:(fun () stmt ->
let proc_name = Sqlite3.column stmt 0 |> Procname.SQLite.deserialize in
let callees : Procname.t list = Sqlite3.column stmt 1 |> Procname.SQLiteList.deserialize in
f proc_name callees )
type hashconsed_procname_info =
{name: Procname.t; mutable defined: bool; mutable callees: Procname.t list}
let hashcons_pname pname_info pname =
match Procname.Hash.find_opt pname_info pname with
| Some {name} ->
name
| None ->
Procname.Hash.add pname_info pname {name= pname; defined= false; callees= []} ;
pname
let hashcons_and_update_pname pname_info pname callees =
let callees = List.map ~f:(hashcons_pname pname_info) callees in
match Procname.Hash.find_opt pname_info pname with
| Some info when info.defined ->
L.die InternalError "SyntacticCallGraph: Tried to define %a twice.@." Procname.pp pname
| Some info ->
info.callees <- callees ;
info.defined <- true
| None ->
Procname.Hash.add pname_info pname {name= pname; defined= true; callees}
(* load and hashcons all captured procs and their callees ; return also number of defined procs *)
let pname_info_from_captured_procs () =
let pname_info = Procname.Hash.create 1009 in
let n_captured = ref 0 in
iter_captured_procs_and_callees (fun pname callees ->
incr n_captured ;
hashcons_and_update_pname pname_info pname callees ) ;
(pname_info, !n_captured)
let enqueue q pname = Procname.HashQueue.enqueue_back q pname pname |> ignore
let dequeue q = Procname.HashQueue.dequeue_front q
let queue_from_sources pname_info sources =
let q = Procname.HashQueue.create () in
List.iter sources ~f:(fun sf ->
SourceFiles.proc_names_of_source sf
|> List.iter ~f:(fun pname -> hashcons_pname pname_info pname |> enqueue q) ) ;
q
let rec bfs pname_info g q =
match dequeue q with
| Some pname ->
( match Procname.Hash.find_opt pname_info pname with
| Some {defined= true; callees} ->
CallGraph.create_node g pname callees ;
List.iter callees ~f:(fun pname ->
if not (CallGraph.mem_procname g pname) then enqueue q pname )
| _ ->
() ) ;
bfs pname_info g q
| _ ->
()
let build_from_sources sources =
let g = CallGraph.create CallGraph.default_initial_capacity in
let time0 = Mtime_clock.counter () in
L.progress "Building call graph...@\n%!" ;
build_from_captured_procs g ;
let n_captured = CallGraph.n_procs g in
List.iter sources ~f:(fun sf ->
SourceFiles.proc_names_of_source sf |> List.iter ~f:(CallGraph.flag_reachable g) ) ;
CallGraph.remove_unflagged_and_unflag_all g ;
CallGraph.trim_id_map g ;
let pname_info, n_captured = pname_info_from_captured_procs () in
let q = queue_from_sources pname_info sources in
bfs pname_info g q ;
if Config.debug_level_analysis > 0 then CallGraph.to_dotty g "syntactic_callgraph.dot" ;
L.progress
"Built call graph in %a, from %d total procs, %d reachable defined procs and takes %d bytes@."

Loading…
Cancel
Save