From 73e78d9e200456b12a6d155ab269068ae7d442c9 Mon Sep 17 00:00:00 2001 From: Mitya Lyubarskiy Date: Wed, 26 Feb 2020 06:35:34 -0800 Subject: [PATCH] [orchestration][refactoring] Introduce stronger contract for file-level callback Summary: # Current design Infer analysis is currently two staged: 1) proc-level callbacks calculate summary, including writing down the issues if applicable. 2) file-level callbacks (formerly cluster callbacks, see the prev diff) are executed next; they are supposed to emit additional issues that are impossible to emit based on mere proc-context. Currently RacerD and Starvation use file-level callback; in near future we plan to onboard Nullsafe checker as well. # Problem Contract of callback (1) is clear: given a proc and existing summary, the checker updates it and returns a modified summary. This summary later on gets serialized (in-memory + external) and can be consumed by other chechers. Issues written in summary will get reported when analysis is over. In constrast, contract of (2) is wild west: the function returns unit. In practice, what the checkers do is create IssueLog and serialize it to checker-specific directory. Then another part of program (InferPrint.ml) knows about this side effect, reads the error log for checkers and ultimately get it reported together with errors written at stage (1). This is problematic because it is hard to reason about the system and it makes onboarding new checkers to (2) error-prone. # This diff This diff brings (2) on par with (1): now file-level callback has a clear contract: it should be side effect free, and the only responsibility is to fill out and return IssueLog. Additionally, we make the notion of "checker-specific issue directory" an official thing, so the checker only needs to specify the name, everything else will be made automatically by orchestation layer, including cleanup. # Starvation Implementing the new contract is starvation is possible and desirable, but involved: see comment in the code, so we leave it up to the future work to fix that. Reviewed By: ngorogiannis Differential Revision: D20115024 fbshipit-source-id: fb2f9b7e6 --- infer/src/IR/IssueLog.mli | 3 +- infer/src/backend/InferPrint.ml | 4 +-- infer/src/backend/callbacks.ml | 22 ++++++++++----- infer/src/backend/callbacks.mli | 28 +++++++++++-------- infer/src/backend/ondemand.ml | 2 +- infer/src/base/FileLevelAnalysisIssueDirs.ml | 15 ++++++++++ infer/src/base/FileLevelAnalysisIssueDirs.mli | 16 +++++++++++ infer/src/base/ResultsDir.ml | 4 +-- infer/src/checkers/registerCheckers.ml | 18 +++++++----- infer/src/concurrency/RacerD.ml | 3 +- infer/src/concurrency/starvation.ml | 22 +++++++++++++-- 11 files changed, 101 insertions(+), 36 deletions(-) create mode 100644 infer/src/base/FileLevelAnalysisIssueDirs.ml create mode 100644 infer/src/base/FileLevelAnalysisIssueDirs.mli diff --git a/infer/src/IR/IssueLog.mli b/infer/src/IR/IssueLog.mli index c82455ec3..2d7a035c6 100644 --- a/infer/src/IR/IssueLog.mli +++ b/infer/src/IR/IssueLog.mli @@ -8,8 +8,7 @@ open! IStd (** Module for storing issues detected outside of per-procedure analysis (and hence not serialized - as a part of procedure summary). Normally those are issues detected at the file-level analysis - step. Individual checkers are responsible for storing their own list of such issues. *) + as a part of procedure summary). *) type t val empty : t diff --git a/infer/src/backend/InferPrint.ml b/infer/src/backend/InferPrint.ml index 42f6b99d0..186e44944 100644 --- a/infer/src/backend/InferPrint.ml +++ b/infer/src/backend/InferPrint.ml @@ -1166,8 +1166,8 @@ let pp_summary_and_issues formats_by_report_kind issue_formats = !all_issues ; if Config.precondition_stats then PreconditionStats.pp_stats () ; if Config.summary_stats then SummaryStats.pp_stats () ; - List.iter - [Config.lint_issues_dir_name; Config.starvation_issues_dir_name; Config.racerd_issues_dir_name] + (* Issues that are generated and stored outside of summaries by linter and checkers *) + List.iter (Config.lint_issues_dir_name :: FileLevelAnalysisIssueDirs.get_registered_dir_names ()) ~f:(fun dir_name -> IssueLog.load dir_name |> IssueLog.iter ~f:(pp_lint_issues filters formats_by_report_kind linereader) ) ; diff --git a/infer/src/backend/callbacks.ml b/infer/src/backend/callbacks.ml index 4ad1c1039..d4caee22b 100644 --- a/infer/src/backend/callbacks.ml +++ b/infer/src/backend/callbacks.ml @@ -15,12 +15,18 @@ type proc_callback_t = proc_callback_args -> Summary.t type file_callback_args = {procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t} -type file_callback_t = file_callback_args -> unit +type file_callback_t = file_callback_args -> IssueLog.t type procedure_callback = {checker_name: string; dynamic_dispatch: bool; language: Language.t; callback: proc_callback_t} -type file_callback = {checker_name: string; language: Language.t; callback: file_callback_t} +type file_callback = + { checker_name: string + ; language: Language.t + ; callback: file_callback_t + ; issue_dir: string + (** Place for storing issues generated at file-level analysis stage (additionally to ones + generated by procedure-level callbacks which are stored in summaries) *) } let procedure_callbacks = ref [] @@ -32,8 +38,9 @@ let register_procedure_callback ~checker_name ?(dynamic_dispatch = false) langua {checker_name; dynamic_dispatch; language; callback} :: !procedure_callbacks -let register_file_callback ~checker_name language (callback : file_callback_t) = - file_callbacks := {checker_name; language; callback} :: !file_callbacks +let register_file_callback ~checker_name language (callback : file_callback_t) ~issue_dir = + file_callbacks := {checker_name; language; callback; issue_dir} :: !file_callbacks ; + FileLevelAnalysisIssueDirs.register_dir_name issue_dir let iterate_procedure_callbacks exe_env summary = @@ -67,7 +74,7 @@ let iterate_procedure_callbacks exe_env summary = !procedure_callbacks -let iterate_file_callbacks procedures exe_env source_file = +let iterate_file_callbacks_and_store_issues procedures exe_env source_file = if not (List.is_empty !file_callbacks) then let environment = {procedures; source_file; exe_env} in let language_matches language = @@ -78,8 +85,9 @@ let iterate_file_callbacks procedures exe_env source_file = true in List.iter - ~f:(fun {language; callback} -> + ~f:(fun {language; callback; issue_dir} -> if language_matches language then ( Language.curr_language := language ; - callback environment ) ) + let issue_log = callback environment in + IssueLog.store ~file:source_file ~dir:issue_dir issue_log ) ) !file_callbacks diff --git a/infer/src/backend/callbacks.mli b/infer/src/backend/callbacks.mli index 06ee08c7a..823e165a1 100644 --- a/infer/src/backend/callbacks.mli +++ b/infer/src/backend/callbacks.mli @@ -15,37 +15,43 @@ open! IStd writing errors that were detected during per-procedure analysis. 2) File level callback. Guaranteed to be invoked after all procedure-level callbacks for this files are invoked, and generated summaries are serialized. - The checker is responsible for doing any additional work, but SHALL NOT modify summaries at this point, including updating + The checker should return an issue log containing list of additional errors to be added + to ones produced on procedure-level stage. + NOTE: The checker SHALL NOT modify summaries at this point, including updating summaries' error log. - Additional errors can be issued at this stage using capabilities of [IssueLog]. *) -type proc_callback_args = - {get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t} - (** Type of a procedure callback: - List of all the procedures the callback will be called on. - get_proc_desc to get a proc desc from a proc name - Type environment. - Procedure for the callback to act on. *) +type proc_callback_args = + {get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t} + +(* Result is updated summary with all information relevant for the checker (including list of errors found by the checker for this procedure *) type proc_callback_t = proc_callback_args -> Summary.t type file_callback_args = {procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t} -type file_callback_t = file_callback_args -> unit +(** Result is a list of additional issues found at this stage (complementary to issues generated on + per-procedure analysis stage) *) +type file_callback_t = file_callback_args -> IssueLog.t val register_procedure_callback : checker_name:string -> ?dynamic_dispatch:bool -> Language.t -> proc_callback_t -> unit (** Register a procedure callback (see details above) *) -val register_file_callback : checker_name:string -> Language.t -> file_callback_t -> unit -(** Register a file callback (see details above) *) +val register_file_callback : + checker_name:string -> Language.t -> file_callback_t -> issue_dir:string -> unit +(** Register a file callback (see details above). [issue_dir] must be unique for this type of + checker. *) val iterate_procedure_callbacks : Exe_env.t -> Summary.t -> Summary.t (** Invoke all registered procedure callbacks on the given procedure. *) -val iterate_file_callbacks : Procname.t list -> Exe_env.t -> SourceFile.t -> unit -(** Invoke all registered file callbacks on a file. Guaranteed to be called after all - procedure-level callbacks are invoked *) +val iterate_file_callbacks_and_store_issues : Procname.t list -> Exe_env.t -> SourceFile.t -> unit +(** Invoke all registered file callbacks on a file, and store produced errors in a corresponding + directory. Guaranteed to be called after all procedure-level callbacks are invoked *) diff --git a/infer/src/backend/ondemand.ml b/infer/src/backend/ondemand.ml index d86c31f52..4bb44fbc9 100644 --- a/infer/src/backend/ondemand.ml +++ b/infer/src/backend/ondemand.ml @@ -381,7 +381,7 @@ let analyze_procedures exe_env procs_to_analyze source_file_opt = Option.iter source_file_opt ~f:(fun source_file -> if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ) ; Option.iter source_file_opt ~f:(fun source_file -> - Callbacks.iterate_file_callbacks procs_to_analyze exe_env source_file ; + Callbacks.iterate_file_callbacks_and_store_issues procs_to_analyze exe_env source_file ; create_perf_stats_report source_file ) ; unset_exe_env () ; Language.curr_language := saved_language diff --git a/infer/src/base/FileLevelAnalysisIssueDirs.ml b/infer/src/base/FileLevelAnalysisIssueDirs.ml new file mode 100644 index 000000000..e814269bf --- /dev/null +++ b/infer/src/base/FileLevelAnalysisIssueDirs.ml @@ -0,0 +1,15 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +let dir_names = ref [] + +let get_registered_dir_names () = !dir_names + +let register_dir_name name = + if not (List.exists !dir_names ~f:(String.equal name)) then dir_names := name :: !dir_names diff --git a/infer/src/base/FileLevelAnalysisIssueDirs.mli b/infer/src/base/FileLevelAnalysisIssueDirs.mli new file mode 100644 index 000000000..f6bbc2b96 --- /dev/null +++ b/infer/src/base/FileLevelAnalysisIssueDirs.mli @@ -0,0 +1,16 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +val get_registered_dir_names : unit -> string list +(** Directory names responsible for storing checker-specific issues generated at file-level analysis + phase. (Those are additional issues on top of ones stored in summary after procedure analysis + phase). *) + +val register_dir_name : string -> unit +(** Add directory name. No-op if was already added *) diff --git a/infer/src/base/ResultsDir.ml b/infer/src/base/ResultsDir.ml index fee7de2b3..6198a70ba 100644 --- a/infer/src/base/ResultsDir.ml +++ b/infer/src/base/ResultsDir.ml @@ -38,9 +38,9 @@ let dirs_to_clean ~cache_capture = let open Config in let common_list = [backend_stats_dir_name; classnames_dir_name; frontend_stats_dir_name; reporting_stats_dir_name] + @ FileLevelAnalysisIssueDirs.get_registered_dir_names () in - if cache_capture then common_list - else captured_dir_name :: racerd_issues_dir_name :: starvation_issues_dir_name :: common_list + if cache_capture then common_list else captured_dir_name :: common_list let delete_capture_and_results_data () = diff --git a/infer/src/checkers/registerCheckers.ml b/infer/src/checkers/registerCheckers.ml index 50d84c631..bb19c3e48 100644 --- a/infer/src/checkers/registerCheckers.ml +++ b/infer/src/checkers/registerCheckers.ml @@ -21,7 +21,7 @@ let () = type callback_fun = | Procedure of Callbacks.proc_callback_t | DynamicDispatch of Callbacks.proc_callback_t - | File of Callbacks.file_callback_t + | File of {callback: Callbacks.file_callback_t; issue_dir: string} type callback = callback_fun * Language.t @@ -101,8 +101,10 @@ let all_checkers = ; callbacks= [ (Procedure RacerD.analyze_procedure, Language.Clang) ; (Procedure RacerD.analyze_procedure, Language.Java) - ; (File RacerD.file_analysis, Language.Clang) - ; (File RacerD.file_analysis, Language.Java) ] } + ; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name} + , Language.Clang ) + ; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name} + , Language.Java ) ] } (* toy resource analysis to use in the infer lab, see the lab/ directory *) ; { name= "resource leak" ; active= Config.resource_leak @@ -131,9 +133,11 @@ let all_checkers = ; active= Config.starvation ; callbacks= [ (Procedure Starvation.analyze_procedure, Language.Java) - ; (File Starvation.reporting, Language.Java) + ; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name} + , Language.Java ) ; (Procedure Starvation.analyze_procedure, Language.Clang) - ; (File Starvation.reporting, Language.Clang) ] } + ; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name} + , Language.Clang ) ] } ; { name= "purity" ; active= Config.purity || Config.loop_hoisting ; callbacks= @@ -160,8 +164,8 @@ let register checkers = | DynamicDispatch procedure_cb -> Callbacks.register_procedure_callback ~checker_name:name ~dynamic_dispatch:true language procedure_cb - | File file_cb -> - Callbacks.register_file_callback ~checker_name:name language file_cb + | File {callback; issue_dir} -> + Callbacks.register_file_callback ~checker_name:name language callback ~issue_dir in List.iter ~f:register_callback callbacks in diff --git a/infer/src/concurrency/RacerD.ml b/infer/src/concurrency/RacerD.ml index 1880b0e35..2066fb1ba 100644 --- a/infer/src/concurrency/RacerD.ml +++ b/infer/src/concurrency/RacerD.ml @@ -1151,10 +1151,9 @@ let aggregate_by_class exe_env procedures = (* Gathers results by analyzing all the methods in a file, then post-processes the results to check an (approximation of) thread safety *) -let file_analysis ({procedures; source_file; exe_env} : Callbacks.file_callback_args) = +let file_analysis ({procedures; exe_env} : Callbacks.file_callback_args) = let class_map = aggregate_by_class exe_env procedures in Typ.Name.Map.fold (fun classname methods issue_log -> make_results_table exe_env methods |> report_unsafe_accesses ~issue_log classname ) class_map IssueLog.empty - |> IssueLog.store ~dir:Config.racerd_issues_dir_name ~file:source_file diff --git a/infer/src/concurrency/starvation.ml b/infer/src/concurrency/starvation.ml index c3abeb183..545521814 100644 --- a/infer/src/concurrency/starvation.ml +++ b/infer/src/concurrency/starvation.ml @@ -729,7 +729,7 @@ let report_on_pair tenv summary (pair : Domain.CriticalPair.t) report_map = let reporting {Callbacks.procedures; exe_env} = - if Config.starvation_whole_program then () + if Config.starvation_whole_program then IssueLog.empty else let report_on_summary tenv summary report_map (payload : Domain.summary) = Domain.CriticalPairs.fold (report_on_pair tenv summary) payload.critical_pairs report_map @@ -744,7 +744,25 @@ let reporting {Callbacks.procedures; exe_env} = |> Option.fold ~init:report_map ~f:(report_on_summary tenv summary) else report_map ) in - List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store + List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store ; + (* TODO FIXME stop serializing issues inside the checker and respect callback's contract. + Contract of a file-level callback is that it needs to return list of issues for that file. + Those will be stored in corresponding directory later on. + Instead, starvation checker returns an empty log, but takes care of logging by itself. + This is for historical reasons. + In case the issue corresponds to two files A and B, the analysis will be ran twice on A and B. + Which of it should report, and where? + + Desired behavior: + - if duplication is allowed, checker for A reports on A and checker for B reports on B. + - if duplication is disallowed, checker for A reports on A, checker B reports nothing, and + which one should report and one should not is decided based on some symmetry-breaking condition. + + In both cases, desired behavior allows to preserve contract of the callback. + Current behavior is not like that: each of analyzers for A and B can report to both A and B, + which is not optimal and is (such an irony!) prone to a race condition. + *) + IssueLog.empty (* given a scheduled-work item, read the summary of the scheduled method from the disk