diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 460b9c2ea..3e61ffd07 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1155,6 +1155,10 @@ INTERNAL OPTIONS --buck-targets-blacklist-reset Set --buck-targets-blacklist to the empty list. + --call-graph-schedule + Activates: use call graph for scheduling analysis (Conversely: + --no-call-graph-schedule) + --no-capture Deactivates: capture and translate source files into infer's intermediate language for analysis (Conversely: --capture) diff --git a/infer/src/backend/CallGraph.ml b/infer/src/backend/CallGraph.ml new file mode 100644 index 000000000..76a6342e9 --- /dev/null +++ b/infer/src/backend/CallGraph.ml @@ -0,0 +1,173 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd +module F = Format +module L = Logging + +module type NodeSig = sig + type t = private {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool} + + val make : int -> Typ.Procname.t -> int list -> t + + val set_flag : t -> unit + + val unset_flag : t -> unit + + val pp_dot : F.formatter -> t -> unit +end + +module Node : NodeSig = struct + type t = {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool} + + let make id pname successors = {id; pname; successors; flag= false} + + let set_flag n = n.flag <- true + + let unset_flag n = n.flag <- false + + let pp_dot fmt {id; pname; successors} = + let pp_id fmt id = F.fprintf fmt "N%d" id in + let pp_edge fmt src dst = F.fprintf fmt " %a -> %a ;@\n" pp_id src pp_id dst in + F.fprintf fmt " %a [ label = %S ];@\n" pp_id id (F.asprintf "%a" Typ.Procname.pp pname) ; + List.iter successors ~f:(pp_edge fmt id) ; + F.pp_print_newline fmt () +end + +module IdMap = Typ.Procname.Hash +module NodeMap = Caml.Hashtbl.Make (Int) + +(** [node_map] is a map from ids (unique ints) to nodes corresponding to defined procedures. + [id_map] is a map from all encountered (not necessarily defined) procnames to their ids, + and thus its image is a superset of the domain of [node_map], and usually a strict superset. + [trim_id_map] makes the image equal to the domain of [node_map]. *) +type t = {id_map: int IdMap.t; node_map: Node.t NodeMap.t} + +let create initial_capacity = + {id_map= IdMap.create initial_capacity; node_map= NodeMap.create initial_capacity} + + +let id_of_procname {id_map} pname = IdMap.find_opt id_map pname + +let node_of_id {node_map} id = NodeMap.find_opt node_map id + +let mem {node_map} id = NodeMap.mem node_map id + +(** [id_map] may contain undefined procedures, so use [node_map] for actual size *) +let length {node_map} = NodeMap.length node_map + +let node_of_procname g pname = id_of_procname g pname |> Option.bind ~f:(node_of_id g) + +let remove (g : t) pname id = IdMap.remove g.id_map pname ; NodeMap.remove g.node_map id + +let add ({id_map; node_map} as graph) pname successor_pnames = + let get_or_set_id procname = + match id_of_procname graph procname with + | None -> + let id = IdMap.length id_map in + IdMap.replace id_map procname id ; id + | Some id -> + id + in + let id = get_or_set_id pname in + let successors = List.map successor_pnames ~f:get_or_set_id in + let node = Node.make id pname successors in + NodeMap.replace node_map id node + + +let remove_reachable g start_pname = + let add_live_successors_and_remove_self init (n : Node.t) = + remove g n.pname n.id ; + List.fold n.successors ~init ~f:(fun init succ_id -> + node_of_id g succ_id |> Option.fold ~init ~f:(fun acc s -> s :: acc) ) + in + let rec remove_list frontier = + if not (List.is_empty frontier) then + remove_list (List.fold frontier ~init:[] ~f:add_live_successors_and_remove_self) + in + node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> remove_list [start_node]) + + +let flag_reachable g start_pname = + let process_node init (n : Node.t) = + if n.flag then init + else ( + Node.set_flag n ; + List.fold n.successors ~init ~f:(fun acc id -> + match node_of_id g id with Some n' when not n'.flag -> n' :: acc | _ -> acc ) ) + in + let rec flag_list frontier = + if not (List.is_empty frontier) then flag_list (List.fold frontier ~init:[] ~f:process_node) + in + node_of_procname g start_pname |> Option.iter ~f:(fun start_node -> flag_list [start_node]) + + +let remove_unflagged_and_unflag_all {id_map; node_map} = + NodeMap.filter_map_inplace + (fun _id (n : Node.t) -> + if n.flag then ( Node.unset_flag n ; Some n ) else ( IdMap.remove id_map n.pname ; None ) ) + node_map + + +(** remove pnames for all undefined procedures *) +let trim_id_map (g : t) = + IdMap.filter_map_inplace (fun _pname id -> Option.some_if (mem g id) id) g.id_map + + +let pp_dot fmt {node_map} = + F.fprintf fmt "@\ndigraph callgraph {@\n" ; + NodeMap.iter (fun _id n -> Node.pp_dot fmt n) node_map ; + F.fprintf fmt "}@." + + +let to_dotty g = + let outc = Filename.concat Config.results_dir "callgraph.dot" |> Out_channel.create in + let fmt = F.formatter_of_out_channel outc in + pp_dot fmt g ; Out_channel.close outc + + +let build_from_captured_procs g = + let hashcons_pname = + let pname_tbl : Typ.Procname.t IdMap.t = IdMap.create 1001 in + fun pname -> + match IdMap.find_opt pname_tbl pname with + | Some pname' -> + pname' + | None -> + IdMap.add pname_tbl pname pname ; pname + in + let db = ResultsDatabase.get_database () in + let stmt = Sqlite3.prepare db "SELECT proc_name, callees FROM procedures" in + SqliteUtils.result_fold_rows db ~log:"creating call graph" stmt ~init:() ~f:(fun () stmt -> + let proc_name = Sqlite3.column stmt 0 |> Typ.Procname.SQLite.deserialize |> hashcons_pname in + let callees = + Sqlite3.column stmt 1 |> Typ.Procname.SQLiteList.deserialize |> List.map ~f:hashcons_pname + in + add g proc_name callees ) + + +let build_from_sources g sources = + let time0 = Mtime_clock.counter () in + L.progress "Building call graph...@\n%!" ; + build_from_captured_procs g ; + let captured_length = length g in + List.iter sources ~f:(fun sf -> + SourceFiles.proc_names_of_source sf |> List.iter ~f:(flag_reachable g) ) ; + remove_unflagged_and_unflag_all g ; + trim_id_map g ; + if Config.debug_level_analysis > 0 then to_dotty g ; + L.progress "Building call graph took %a@\n" Mtime.Span.pp (Mtime_clock.count time0) ; + L.progress + "Constructed call graph from %d total procs, %d reachable defined procs, and takes %d bytes@." + captured_length (length g) + (Obj.(reachable_words (repr g)) * (Sys.word_size / 8)) + + +let get_unflagged_leaves g = + NodeMap.fold + (fun _id (n : Node.t) acc -> + if n.flag || List.exists n.successors ~f:(mem g) then acc else n :: acc ) + g.node_map [] diff --git a/infer/src/backend/CallGraph.mli b/infer/src/backend/CallGraph.mli new file mode 100644 index 000000000..854de966e --- /dev/null +++ b/infer/src/backend/CallGraph.mli @@ -0,0 +1,41 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd +module F = Format + +module type NodeSig = sig + type t = private {id: int; pname: Typ.Procname.t; successors: int list; mutable flag: bool} + + val make : int -> Typ.Procname.t -> int list -> t + + val set_flag : t -> unit + + val unset_flag : t -> unit + + val pp_dot : F.formatter -> t -> unit +end + +module Node : NodeSig + +type t + +val create : int -> t +(** [create n] makes an empty graph with initial capacity [n] which grows as required *) + +val build_from_sources : t -> SourceFile.t list -> unit +(** build restriction of call graph to procedures reachable from provided sources *) + +val mem : t -> int -> bool +(** is an int [id] the index of a node in the graph? *) + +val flag_reachable : t -> Typ.Procname.t -> unit +(** flag all nodes reachable from the node of the given procname, if it exists *) + +val remove_reachable : t -> Typ.Procname.t -> unit +(** remove all nodes reachable from procname *) + +val get_unflagged_leaves : t -> Node.t list diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index 1e3ec1512..56adb087e 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -17,18 +17,28 @@ let clear_caches () = Typ.Procname.SQLite.clear_cache () -(** Create tasks to analyze an execution environment *) -let analyze_source_file : SourceFile.t Tasks.doer = - fun source_file -> - if Config.memcached then Memcached.connect () ; - DB.Results_dir.init source_file ; - let exe_env = Exe_env.mk () in - L.task_progress SourceFile.pp source_file ~f:(fun () -> - (* clear cache for each source file to avoid it growing unboundedly *) - clear_caches () ; - Callbacks.analyze_file exe_env source_file ; - if Config.write_html then Printer.write_all_html_files source_file ) ; - if Config.memcached then Memcached.disconnect () +let analyze_target : TaskScheduler.target Tasks.doer = + let analyze_source_file exe_env source_file = + DB.Results_dir.init source_file ; + L.task_progress SourceFile.pp source_file ~f:(fun () -> + Callbacks.analyze_file exe_env source_file ; + if Config.write_html then Printer.write_all_html_files source_file ) + in + let analyze_proc_name exe_env proc_name = + L.task_progress Typ.Procname.pp proc_name ~f:(fun () -> + Callbacks.analyze_proc_name exe_env proc_name ) + in + fun target -> + if Config.memcached then Memcached.connect () ; + let exe_env = Exe_env.mk () in + (* clear cache for each source file to avoid it growing unboundedly *) + clear_caches () ; + ( match target with + | Procname procname -> + analyze_proc_name exe_env procname + | File source_file -> + analyze_source_file exe_env source_file ) ; + if Config.memcached then Memcached.disconnect () let output_json_makefile_stats clusters = @@ -95,17 +105,14 @@ let main ~changed_files = Config.results_dir ; (* empty all caches to minimize the process heap to have less work to do when forking *) clear_caches () ; - ( if Int.equal Config.jobs 1 then ( - Tasks.run_sequentially ~f:analyze_source_file source_files_to_analyze ; + if Int.equal Config.jobs 1 then ( + let target_files = List.rev_map source_files_to_analyze ~f:(fun sf -> TaskScheduler.File sf) in + Tasks.run_sequentially ~f:analyze_target target_files ; L.progress "@\nAnalysis finished in %as@." Pp.elapsed_time () ) - else - let source_files_to_analyze = - List.permute source_files_to_analyze - ~random_state:(Random.State.make (Array.create ~len:1 0)) - in + else ( L.environment_info "Parallel jobs: %d@." Config.jobs ; + let tasks = TaskScheduler.schedule source_files_to_analyze in (* Prepare tasks one cluster at a time while executing in parallel *) - let tasks = Tasks.gen_of_list source_files_to_analyze in - let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_source_file ~tasks in + let runner = Tasks.Runner.create ~jobs:Config.jobs ~f:analyze_target ~tasks in Tasks.Runner.run runner ) ; output_json_makefile_stats source_files_to_analyze diff --git a/infer/src/backend/TaskScheduler.ml b/infer/src/backend/TaskScheduler.ml new file mode 100644 index 000000000..ad77f038e --- /dev/null +++ b/infer/src/backend/TaskScheduler.ml @@ -0,0 +1,100 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd +module L = Logging + +type target = Procname of Typ.Procname.t | File of SourceFile.t + +type 'a task_generator = 'a Tasks.task_generator + +let chain (gen1 : 'a task_generator) (gen2 : 'a task_generator) : 'a task_generator = + let n_tasks = gen1.n_tasks + gen2.n_tasks in + let gen1_returned_empty = ref false in + let gen1_is_empty () = + gen1_returned_empty := !gen1_returned_empty || gen1.is_empty () ; + !gen1_returned_empty + in + let is_empty () = gen1_is_empty () && gen2.is_empty () in + let next x = if gen1_is_empty () then gen2.next x else gen1.next x in + {n_tasks; is_empty; next} + + +let count_procedures () = + let db = ResultsDatabase.get_database () in + let stmt = Sqlite3.prepare db "SELECT COUNT(rowid) FROM procedures" in + let count = + match SqliteUtils.result_single_column_option db ~log:"counting procedures" stmt with + | Some (Sqlite3.Data.INT i64) -> + Int64.to_int i64 |> Option.value ~default:Int.max_value + | _ -> + L.die InternalError "Got no result trying to count procedures" + in + L.debug Analysis Quiet "Found %d procedures in procedures table.@." count ; + count + + +(** choose some reasonable minimum capacity that also is a prime number *) +let initial_call_graph_capacity = 1009 + +let bottom_up sources : target task_generator = + (* this will potentially grossly overapproximate the tasks *) + let n_tasks = count_procedures () in + let g = CallGraph.create initial_call_graph_capacity in + let initialized = ref false in + let pending : CallGraph.Node.t list ref = ref [] in + let scheduled = ref Typ.Procname.Set.empty in + let is_empty () = + !initialized && List.is_empty !pending && Typ.Procname.Set.is_empty !scheduled + in + let rec next_aux () = + match !pending with + | [] -> + pending := CallGraph.get_unflagged_leaves g ; + if List.is_empty !pending then None else next_aux () + | n :: ns when n.flag || not (CallGraph.mem g n.id) -> + pending := ns ; + next_aux () + | n :: ns -> + pending := ns ; + scheduled := Typ.Procname.Set.add n.pname !scheduled ; + CallGraph.flag_reachable g n.pname ; + Some (Procname n.pname) + in + let next target_opt = + (* do construction here, to avoid having the call graph into forked workers *) + if not !initialized then ( + CallGraph.build_from_sources g sources ; + initialized := true ) ; + ( match target_opt with + | None -> + () + | Some (File _) -> + assert false + | Some (Procname pname) -> + scheduled := Typ.Procname.Set.remove pname !scheduled ; + CallGraph.remove_reachable g pname ) ; + next_aux () + in + {n_tasks; is_empty; next} + + +let of_sources sources = + let gen = + List.rev_map sources ~f:(fun sf -> File sf) + |> List.permute ~random_state:(Random.State.make (Array.create ~len:1 0)) + |> Tasks.gen_of_list + in + let next x = + let res = gen.next x in + match res with None -> None | Some (Procname _) -> assert false | Some (File _) as v -> v + in + {gen with next} + + +let schedule sources = + if Config.call_graph_schedule then chain (bottom_up sources) (of_sources sources) + else of_sources sources diff --git a/infer/src/backend/TaskScheduler.mli b/infer/src/backend/TaskScheduler.mli new file mode 100644 index 000000000..cb436011c --- /dev/null +++ b/infer/src/backend/TaskScheduler.mli @@ -0,0 +1,11 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd + +type target = Procname of Typ.Procname.t | File of SourceFile.t + +val schedule : SourceFile.t list -> target Tasks.task_generator diff --git a/infer/src/backend/callbacks.ml b/infer/src/backend/callbacks.ml index 48f46dbf8..5d0d9a5b6 100644 --- a/infer/src/backend/callbacks.ml +++ b/infer/src/backend/callbacks.ml @@ -140,23 +140,26 @@ let create_perf_stats_report source_file = PerfStats.get_reporter (PerfStats.Backend source_file) () -(** Invoke all procedure and cluster callbacks on a given environment. *) -let analyze_file (exe_env : Exe_env.t) source_file = +let analyze_procedures exe_env procs_to_analyze source_file_opt = let saved_language = !Language.curr_language in + Option.iter source_file_opt ~f:(fun source_file -> + if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ) ; let analyze_ondemand summary proc_desc = iterate_procedure_callbacks exe_env summary proc_desc in - (* Invoke procedure callbacks using on-demand analysis schedulling *) Ondemand.set_callbacks {Ondemand.exe_env; analyze_ondemand} ; - let procs_to_analyze = - (* analyze all the currently defined procedures *) - SourceFiles.proc_names_of_source source_file - in - if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ; let analyze_proc_name pname = ignore (Ondemand.analyze_proc_name pname : Summary.t option) in List.iter ~f:analyze_proc_name procs_to_analyze ; - (* Invoke cluster callbacks. *) - iterate_cluster_callbacks procs_to_analyze exe_env source_file ; - (* Perf logging needs to remain at the end - after analysis work is complete *) - create_perf_stats_report source_file ; - (* Unregister callbacks *) + Option.iter source_file_opt ~f:(fun source_file -> + iterate_cluster_callbacks procs_to_analyze exe_env source_file ; + create_perf_stats_report source_file ) ; Ondemand.unset_callbacks () ; Language.curr_language := saved_language + + +(** Invoke all procedure and cluster callbacks on a given environment. *) +let analyze_file (exe_env : Exe_env.t) source_file = + let procs_to_analyze = SourceFiles.proc_names_of_source source_file in + analyze_procedures exe_env procs_to_analyze (Some source_file) + + +(** Invoke procedure callbacks on a given environment. *) +let analyze_proc_name (exe_env : Exe_env.t) proc_name = analyze_procedures exe_env [proc_name] None diff --git a/infer/src/backend/callbacks.mli b/infer/src/backend/callbacks.mli index 0e20698ec..3e12371bb 100644 --- a/infer/src/backend/callbacks.mli +++ b/infer/src/backend/callbacks.mli @@ -38,3 +38,6 @@ val register_cluster_callback : name:string -> Language.t -> cluster_callback_t val analyze_file : Exe_env.t -> SourceFile.t -> unit (** Invoke all the registered callbacks on the given file. *) + +val analyze_proc_name : Exe_env.t -> Typ.Procname.t -> unit +(** Invoke all the registered callbacks on the given procedure. *) diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index fa5e50ca3..5b8bda41c 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -866,6 +866,10 @@ and buck_targets_blacklist = ~meta:"regex" "Skip capture of buck targets matched by the specified regular expression." +and call_graph_schedule = + CLOpt.mk_bool ~long:"call-graph-schedule" ~default:false "use call graph for scheduling analysis" + + and capture = CLOpt.mk_bool ~long:"capture" ~default:true "capture and translate source files into infer's intermediate language for analysis" @@ -2611,6 +2615,8 @@ and buck_targets_blacklist = !buck_targets_blacklist and bufferoverrun = !bufferoverrun +and call_graph_schedule = !call_graph_schedule + and capture = (* take `--clang-frontend-action` as the source of truth as long as that option exists *) match !clang_frontend_action with diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 6e516df1f..4aa1d1f9e 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -261,6 +261,8 @@ val buck_targets_blacklist : string list val bufferoverrun : bool +val call_graph_schedule : bool + val capture : bool val capture_blacklist : string option diff --git a/infer/tests/build_systems/objc_missing_fld/issues.exp b/infer/tests/build_systems/objc_missing_fld/issues.exp index 4fce68946..d4e9674f9 100644 --- a/infer/tests/build_systems/objc_missing_fld/issues.exp +++ b/infer/tests/build_systems/objc_missing_fld/issues.exp @@ -1,3 +1,2 @@ build_systems/codetoanalyze/objc_missing_fld/A.m, badOnlyOneNDA, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDA(),start of procedure predA(),start of procedure implOnlyFn:,return from a call to A::implOnlyFn:,Executing synthesized getter delegate,Condition is true,return from a call to predA,Taking false branch] -build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 3, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),Taking true branch] -build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),Taking false branch] +build_systems/codetoanalyze/objc_missing_fld/B.m, badOnlyOneNDB, 5, NULL_DEREFERENCE, no_bucket, ERROR, [start of procedure badOnlyOneNDB(),start of procedure predB(),start of procedure implOnlyFn:,return from a call to A::implOnlyFn:,Executing synthesized getter delegate,Condition is true,return from a call to predB,Taking false branch]