[orchestration][refactoring] Introduce stronger contract for file-level callback

Summary: # Current design Infer analysis is currently two staged: 1) proc-level callbacks calculate summary, including writing down the issues if applicable. 2) file-level callbacks (formerly cluster callbacks, see the prev diff) are executed next; they are supposed to emit additional issues that are impossible to emit based on mere proc-context. Currently RacerD and Starvation use file-level callback; in near future we plan to onboard Nullsafe checker as well. # Problem Contract of callback (1) is clear: given a proc and existing summary, the checker updates it and returns a modified summary. This summary later on gets serialized (in-memory + external) and can be consumed by other chechers. Issues written in summary will get reported when analysis is over. In constrast, contract of (2) is wild west: the function returns unit. In practice, what the checkers do is create IssueLog and serialize it to checker-specific directory. Then another part of program (InferPrint.ml) knows about this side effect, reads the error log for checkers and ultimately get it reported together with errors written at stage (1). This is problematic because it is hard to reason about the system and it makes onboarding new checkers to (2) error-prone. # This diff This diff brings (2) on par with (1): now file-level callback has a clear contract: it should be side effect free, and the only responsibility is to fill out and return IssueLog. Additionally, we make the notion of "checker-specific issue directory" an official thing, so the checker only needs to specify the name, everything else will be made automatically by orchestation layer, including cleanup. # Starvation Implementing the new contract is starvation is possible and desirable, but involved: see comment in the code, so we leave it up to the future work to fix that. Reviewed By: ngorogiannis Differential Revision: D20115024 fbshipit-source-id: fb2f9b7e6
5 years ago · 73e78d9e20
parent c2a53a1334
commit 73e78d9e20
11 changed files with 101 additions and 36 deletions
--- a/infer/src/IR/IssueLog.mli
+++ b/infer/src/IR/IssueLog.mli
@ -8,8 +8,7 @@
 open! IStd

 (** Module for storing issues detected outside of per-procedure analysis (and hence not serialized
-    as a part of procedure summary). Normally those are issues detected at the file-level analysis
-    step. Individual checkers are responsible for storing their own list of such issues. *)
+    as a part of procedure summary). *)
 type t

 val empty : t
--- a/infer/src/backend/InferPrint.ml
+++ b/infer/src/backend/InferPrint.ml
@ -1166,8 +1166,8 @@ let pp_summary_and_issues formats_by_report_kind issue_formats =
    !all_issues ;
  if Config.precondition_stats then PreconditionStats.pp_stats () ;
  if Config.summary_stats then SummaryStats.pp_stats () ;
-  List.iter
-    [Config.lint_issues_dir_name; Config.starvation_issues_dir_name; Config.racerd_issues_dir_name]
+  (* Issues that are generated and stored outside of summaries by linter and checkers *)
+  List.iter (Config.lint_issues_dir_name :: FileLevelAnalysisIssueDirs.get_registered_dir_names ())
    ~f:(fun dir_name ->
      IssueLog.load dir_name
      |> IssueLog.iter ~f:(pp_lint_issues filters formats_by_report_kind linereader) ) ;
--- a/infer/src/backend/callbacks.ml
+++ b/infer/src/backend/callbacks.ml
@ -15,12 +15,18 @@ type proc_callback_t = proc_callback_args -> Summary.t
 type file_callback_args =
  {procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t}

-type file_callback_t = file_callback_args -> unit
+type file_callback_t = file_callback_args -> IssueLog.t

 type procedure_callback =
  {checker_name: string; dynamic_dispatch: bool; language: Language.t; callback: proc_callback_t}

-type file_callback = {checker_name: string; language: Language.t; callback: file_callback_t}
+type file_callback =
+  { checker_name: string
+  ; language: Language.t
+  ; callback: file_callback_t
+  ; issue_dir: string
+        (** Place for storing issues generated at file-level analysis stage (additionally to ones
+            generated by procedure-level callbacks which are stored in summaries) *) }

 let procedure_callbacks = ref []

@ -32,8 +38,9 @@ let register_procedure_callback ~checker_name ?(dynamic_dispatch = false) langua
    {checker_name; dynamic_dispatch; language; callback} :: !procedure_callbacks


-let register_file_callback ~checker_name language (callback : file_callback_t) =
-  file_callbacks := {checker_name; language; callback} :: !file_callbacks
+let register_file_callback ~checker_name language (callback : file_callback_t) ~issue_dir =
+  file_callbacks := {checker_name; language; callback; issue_dir} :: !file_callbacks ;
+  FileLevelAnalysisIssueDirs.register_dir_name issue_dir


 let iterate_procedure_callbacks exe_env summary =
@ -67,7 +74,7 @@ let iterate_procedure_callbacks exe_env summary =
    !procedure_callbacks


-let iterate_file_callbacks procedures exe_env source_file =
+let iterate_file_callbacks_and_store_issues procedures exe_env source_file =
  if not (List.is_empty !file_callbacks) then
    let environment = {procedures; source_file; exe_env} in
    let language_matches language =
@ -78,8 +85,9 @@ let iterate_file_callbacks procedures exe_env source_file =
          true
    in
    List.iter
-      ~f:(fun {language; callback} ->
+      ~f:(fun {language; callback; issue_dir} ->
        if language_matches language then (
          Language.curr_language := language ;
-          callback environment ) )
+          let issue_log = callback environment in
+          IssueLog.store ~file:source_file ~dir:issue_dir issue_log ) )
      !file_callbacks
--- a/infer/src/backend/callbacks.mli
+++ b/infer/src/backend/callbacks.mli
@ -15,37 +15,43 @@ open! IStd
      writing errors that were detected during per-procedure analysis.
   2) File level callback. Guaranteed to be invoked after all procedure-level callbacks for this files are invoked,
      and generated summaries are serialized.
-      The checker is responsible for doing any additional work, but SHALL NOT modify summaries at this point, including updating
+      The checker should return an issue log containing list of additional errors to be added
+      to ones produced on procedure-level stage.
+      NOTE: The checker SHALL NOT modify summaries at this point, including updating
      summaries' error log.
-      Additional errors can be issued at this stage using capabilities of [IssueLog].
 *)

-type proc_callback_args =
-  {get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t}
-
 (** Type of a procedure callback:

    - List of all the procedures the callback will be called on.
    - get_proc_desc to get a proc desc from a proc name
    - Type environment.
    - Procedure for the callback to act on. *)
+type proc_callback_args =
+  {get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t}
+
+(* Result is updated summary with all information relevant for the checker (including list of errors found by the checker for this procedure *)
 type proc_callback_t = proc_callback_args -> Summary.t

 type file_callback_args =
  {procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t}

-type file_callback_t = file_callback_args -> unit
+(** Result is a list of additional issues found at this stage (complementary to issues generated on
+    per-procedure analysis stage) *)
+type file_callback_t = file_callback_args -> IssueLog.t

 val register_procedure_callback :
  checker_name:string -> ?dynamic_dispatch:bool -> Language.t -> proc_callback_t -> unit
 (** Register a procedure callback (see details above) *)

-val register_file_callback : checker_name:string -> Language.t -> file_callback_t -> unit
-(** Register a file callback (see details above) *)
+val register_file_callback :
+  checker_name:string -> Language.t -> file_callback_t -> issue_dir:string -> unit
+(** Register a file callback (see details above). [issue_dir] must be unique for this type of
+    checker. *)

 val iterate_procedure_callbacks : Exe_env.t -> Summary.t -> Summary.t
 (** Invoke all registered procedure callbacks on the given procedure. *)

-val iterate_file_callbacks : Procname.t list -> Exe_env.t -> SourceFile.t -> unit
-(** Invoke all registered file callbacks on a file. Guaranteed to be called after all
-    procedure-level callbacks are invoked *)
+val iterate_file_callbacks_and_store_issues : Procname.t list -> Exe_env.t -> SourceFile.t -> unit
+(** Invoke all registered file callbacks on a file, and store produced errors in a corresponding
+    directory. Guaranteed to be called after all procedure-level callbacks are invoked *)
--- a/infer/src/backend/ondemand.ml
+++ b/infer/src/backend/ondemand.ml
@ -381,7 +381,7 @@ let analyze_procedures exe_env procs_to_analyze source_file_opt =
  Option.iter source_file_opt ~f:(fun source_file ->
      if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ) ;
  Option.iter source_file_opt ~f:(fun source_file ->
-      Callbacks.iterate_file_callbacks procs_to_analyze exe_env source_file ;
+      Callbacks.iterate_file_callbacks_and_store_issues procs_to_analyze exe_env source_file ;
      create_perf_stats_report source_file ) ;
  unset_exe_env () ;
  Language.curr_language := saved_language
--- a/infer/src/base/FileLevelAnalysisIssueDirs.ml
+++ b/infer/src/base/FileLevelAnalysisIssueDirs.ml
@ -0,0 +1,15 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+
+let dir_names = ref []
+
+let get_registered_dir_names () = !dir_names
+
+let register_dir_name name =
+  if not (List.exists !dir_names ~f:(String.equal name)) then dir_names := name :: !dir_names
--- a/infer/src/base/FileLevelAnalysisIssueDirs.mli
+++ b/infer/src/base/FileLevelAnalysisIssueDirs.mli
@ -0,0 +1,16 @@
+(*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+
+open! IStd
+
+val get_registered_dir_names : unit -> string list
+(** Directory names responsible for storing checker-specific issues generated at file-level analysis
+    phase. (Those are additional issues on top of ones stored in summary after procedure analysis
+    phase). *)
+
+val register_dir_name : string -> unit
+(** Add directory name. No-op if was already added *)
--- a/infer/src/base/ResultsDir.ml
+++ b/infer/src/base/ResultsDir.ml
@ -38,9 +38,9 @@ let dirs_to_clean ~cache_capture =
  let open Config in
  let common_list =
    [backend_stats_dir_name; classnames_dir_name; frontend_stats_dir_name; reporting_stats_dir_name]
+    @ FileLevelAnalysisIssueDirs.get_registered_dir_names ()
  in
-  if cache_capture then common_list
-  else captured_dir_name :: racerd_issues_dir_name :: starvation_issues_dir_name :: common_list
+  if cache_capture then common_list else captured_dir_name :: common_list


 let delete_capture_and_results_data () =
--- a/infer/src/checkers/registerCheckers.ml
+++ b/infer/src/checkers/registerCheckers.ml
@ -21,7 +21,7 @@ let () =
 type callback_fun =
  | Procedure of Callbacks.proc_callback_t
  | DynamicDispatch of Callbacks.proc_callback_t
-  | File of Callbacks.file_callback_t
+  | File of {callback: Callbacks.file_callback_t; issue_dir: string}

 type callback = callback_fun * Language.t

@ -101,8 +101,10 @@ let all_checkers =
    ; callbacks=
        [ (Procedure RacerD.analyze_procedure, Language.Clang)
        ; (Procedure RacerD.analyze_procedure, Language.Java)
-        ; (File RacerD.file_analysis, Language.Clang)
-        ; (File RacerD.file_analysis, Language.Java) ] }
+        ; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name}
+          , Language.Clang )
+        ; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name}
+          , Language.Java ) ] }
    (* toy resource analysis to use in the infer lab, see the lab/ directory *)
  ; { name= "resource leak"
    ; active= Config.resource_leak
@ -131,9 +133,11 @@ let all_checkers =
    ; active= Config.starvation
    ; callbacks=
        [ (Procedure Starvation.analyze_procedure, Language.Java)
-        ; (File Starvation.reporting, Language.Java)
+        ; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name}
+          , Language.Java )
        ; (Procedure Starvation.analyze_procedure, Language.Clang)
-        ; (File Starvation.reporting, Language.Clang) ] }
+        ; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name}
+          , Language.Clang ) ] }
  ; { name= "purity"
    ; active= Config.purity || Config.loop_hoisting
    ; callbacks=
@ -160,8 +164,8 @@ let register checkers =
      | DynamicDispatch procedure_cb ->
          Callbacks.register_procedure_callback ~checker_name:name ~dynamic_dispatch:true language
            procedure_cb
-      | File file_cb ->
-          Callbacks.register_file_callback ~checker_name:name language file_cb
+      | File {callback; issue_dir} ->
+          Callbacks.register_file_callback ~checker_name:name language callback ~issue_dir
    in
    List.iter ~f:register_callback callbacks
  in
--- a/infer/src/concurrency/RacerD.ml
+++ b/infer/src/concurrency/RacerD.ml
@ -1151,10 +1151,9 @@ let aggregate_by_class exe_env procedures =
 (* Gathers results by analyzing all the methods in a file, then
   post-processes the results to check an (approximation of) thread
   safety *)
-let file_analysis ({procedures; source_file; exe_env} : Callbacks.file_callback_args) =
+let file_analysis ({procedures; exe_env} : Callbacks.file_callback_args) =
  let class_map = aggregate_by_class exe_env procedures in
  Typ.Name.Map.fold
    (fun classname methods issue_log ->
      make_results_table exe_env methods |> report_unsafe_accesses ~issue_log classname )
    class_map IssueLog.empty
-  |> IssueLog.store ~dir:Config.racerd_issues_dir_name ~file:source_file
--- a/infer/src/concurrency/starvation.ml
+++ b/infer/src/concurrency/starvation.ml
@ -729,7 +729,7 @@ let report_on_pair tenv summary (pair : Domain.CriticalPair.t) report_map =


 let reporting {Callbacks.procedures; exe_env} =
-  if Config.starvation_whole_program then ()
+  if Config.starvation_whole_program then IssueLog.empty
  else
    let report_on_summary tenv summary report_map (payload : Domain.summary) =
      Domain.CriticalPairs.fold (report_on_pair tenv summary) payload.critical_pairs report_map
@ -744,7 +744,25 @@ let reporting {Callbacks.procedures; exe_env} =
               |> Option.fold ~init:report_map ~f:(report_on_summary tenv summary)
             else report_map )
    in
-    List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store
+    List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store ;
+    (* TODO FIXME stop serializing issues inside the checker and respect callback's contract.
+       Contract of a file-level callback is that it needs to return list of issues for that file.
+       Those will be stored in corresponding directory later on.
+       Instead, starvation checker returns an empty log, but takes care of logging by itself.
+       This is for historical reasons.
+       In case the issue corresponds to two files A and B, the analysis will be ran twice on A and B.
+       Which of it should report, and where?
+
+       Desired behavior:
+       - if duplication is allowed, checker for A reports on A and checker for B reports on B.
+       - if duplication is disallowed, checker for A reports on A, checker B reports nothing, and
+       which one should report and one should not is decided based on some symmetry-breaking condition.
+
+       In both cases, desired behavior allows to preserve contract of the callback.
+       Current behavior is not like that: each of analyzers for A and B can report to both A and B,
+       which is not optimal and is (such an irony!) prone to a race condition.
+    *)
+    IssueLog.empty


 (* given a scheduled-work item, read the summary of the scheduled method from the disk