[orchestration][refactoring] Introduce stronger contract for file-level callback

Summary:
# Current design

Infer analysis is currently two staged:
1) proc-level callbacks calculate summary, including writing down the
issues if applicable.
2) file-level callbacks (formerly cluster callbacks, see the prev diff) are executed next; they are supposed to emit
additional issues that are impossible to emit based on mere
proc-context.

Currently RacerD and Starvation use file-level callback; in near future
we plan to onboard Nullsafe checker as well.

# Problem
Contract of callback (1) is clear: given a proc and existing
summary, the checker updates it and returns a modified summary. This
summary later on gets serialized (in-memory + external) and can be consumed by
other chechers. Issues written in summary will get reported when
analysis is over.

In constrast, contract of (2) is wild west: the function returns unit.
In practice, what the checkers do is create IssueLog and serialize it to
checker-specific directory.
Then another part of program (InferPrint.ml) knows about this side
effect, reads the error log for checkers and ultimately get it reported
together with errors written at stage (1).

This is problematic because it is hard to reason about the system and it
makes onboarding new checkers to (2) error-prone.

# This diff
This diff brings (2) on par with (1): now file-level callback has a
clear contract: it should be side effect free, and the only
responsibility is to fill out and return IssueLog.

Additionally, we make the notion of "checker-specific issue directory"
an official thing, so the checker only needs to specify the name,
everything else will be made automatically by orchestation layer,
including cleanup.

# Starvation
Implementing the new contract is starvation is possible and desirable, but involved: see comment
in the code, so we leave it up to the future work to fix that.

Reviewed By: ngorogiannis

Differential Revision: D20115024

fbshipit-source-id: fb2f9b7e6
master
Mitya Lyubarskiy 5 years ago committed by Facebook Github Bot
parent c2a53a1334
commit 73e78d9e20

@ -8,8 +8,7 @@
open! IStd
(** Module for storing issues detected outside of per-procedure analysis (and hence not serialized
as a part of procedure summary). Normally those are issues detected at the file-level analysis
step. Individual checkers are responsible for storing their own list of such issues. *)
as a part of procedure summary). *)
type t
val empty : t

@ -1166,8 +1166,8 @@ let pp_summary_and_issues formats_by_report_kind issue_formats =
!all_issues ;
if Config.precondition_stats then PreconditionStats.pp_stats () ;
if Config.summary_stats then SummaryStats.pp_stats () ;
List.iter
[Config.lint_issues_dir_name; Config.starvation_issues_dir_name; Config.racerd_issues_dir_name]
(* Issues that are generated and stored outside of summaries by linter and checkers *)
List.iter (Config.lint_issues_dir_name :: FileLevelAnalysisIssueDirs.get_registered_dir_names ())
~f:(fun dir_name ->
IssueLog.load dir_name
|> IssueLog.iter ~f:(pp_lint_issues filters formats_by_report_kind linereader) ) ;

@ -15,12 +15,18 @@ type proc_callback_t = proc_callback_args -> Summary.t
type file_callback_args =
{procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t}
type file_callback_t = file_callback_args -> unit
type file_callback_t = file_callback_args -> IssueLog.t
type procedure_callback =
{checker_name: string; dynamic_dispatch: bool; language: Language.t; callback: proc_callback_t}
type file_callback = {checker_name: string; language: Language.t; callback: file_callback_t}
type file_callback =
{ checker_name: string
; language: Language.t
; callback: file_callback_t
; issue_dir: string
(** Place for storing issues generated at file-level analysis stage (additionally to ones
generated by procedure-level callbacks which are stored in summaries) *) }
let procedure_callbacks = ref []
@ -32,8 +38,9 @@ let register_procedure_callback ~checker_name ?(dynamic_dispatch = false) langua
{checker_name; dynamic_dispatch; language; callback} :: !procedure_callbacks
let register_file_callback ~checker_name language (callback : file_callback_t) =
file_callbacks := {checker_name; language; callback} :: !file_callbacks
let register_file_callback ~checker_name language (callback : file_callback_t) ~issue_dir =
file_callbacks := {checker_name; language; callback; issue_dir} :: !file_callbacks ;
FileLevelAnalysisIssueDirs.register_dir_name issue_dir
let iterate_procedure_callbacks exe_env summary =
@ -67,7 +74,7 @@ let iterate_procedure_callbacks exe_env summary =
!procedure_callbacks
let iterate_file_callbacks procedures exe_env source_file =
let iterate_file_callbacks_and_store_issues procedures exe_env source_file =
if not (List.is_empty !file_callbacks) then
let environment = {procedures; source_file; exe_env} in
let language_matches language =
@ -78,8 +85,9 @@ let iterate_file_callbacks procedures exe_env source_file =
true
in
List.iter
~f:(fun {language; callback} ->
~f:(fun {language; callback; issue_dir} ->
if language_matches language then (
Language.curr_language := language ;
callback environment ) )
let issue_log = callback environment in
IssueLog.store ~file:source_file ~dir:issue_dir issue_log ) )
!file_callbacks

@ -15,37 +15,43 @@ open! IStd
writing errors that were detected during per-procedure analysis.
2) File level callback. Guaranteed to be invoked after all procedure-level callbacks for this files are invoked,
and generated summaries are serialized.
The checker is responsible for doing any additional work, but SHALL NOT modify summaries at this point, including updating
The checker should return an issue log containing list of additional errors to be added
to ones produced on procedure-level stage.
NOTE: The checker SHALL NOT modify summaries at this point, including updating
summaries' error log.
Additional errors can be issued at this stage using capabilities of [IssueLog].
*)
type proc_callback_args =
{get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t}
(** Type of a procedure callback:
- List of all the procedures the callback will be called on.
- get_proc_desc to get a proc desc from a proc name
- Type environment.
- Procedure for the callback to act on. *)
type proc_callback_args =
{get_procs_in_file: Procname.t -> Procname.t list; summary: Summary.t; exe_env: Exe_env.t}
(* Result is updated summary with all information relevant for the checker (including list of errors found by the checker for this procedure *)
type proc_callback_t = proc_callback_args -> Summary.t
type file_callback_args =
{procedures: Procname.t list; source_file: SourceFile.t; exe_env: Exe_env.t}
type file_callback_t = file_callback_args -> unit
(** Result is a list of additional issues found at this stage (complementary to issues generated on
per-procedure analysis stage) *)
type file_callback_t = file_callback_args -> IssueLog.t
val register_procedure_callback :
checker_name:string -> ?dynamic_dispatch:bool -> Language.t -> proc_callback_t -> unit
(** Register a procedure callback (see details above) *)
val register_file_callback : checker_name:string -> Language.t -> file_callback_t -> unit
(** Register a file callback (see details above) *)
val register_file_callback :
checker_name:string -> Language.t -> file_callback_t -> issue_dir:string -> unit
(** Register a file callback (see details above). [issue_dir] must be unique for this type of
checker. *)
val iterate_procedure_callbacks : Exe_env.t -> Summary.t -> Summary.t
(** Invoke all registered procedure callbacks on the given procedure. *)
val iterate_file_callbacks : Procname.t list -> Exe_env.t -> SourceFile.t -> unit
(** Invoke all registered file callbacks on a file. Guaranteed to be called after all
procedure-level callbacks are invoked *)
val iterate_file_callbacks_and_store_issues : Procname.t list -> Exe_env.t -> SourceFile.t -> unit
(** Invoke all registered file callbacks on a file, and store produced errors in a corresponding
directory. Guaranteed to be called after all procedure-level callbacks are invoked *)

@ -381,7 +381,7 @@ let analyze_procedures exe_env procs_to_analyze source_file_opt =
Option.iter source_file_opt ~f:(fun source_file ->
if Config.dump_duplicate_symbols then dump_duplicate_procs source_file procs_to_analyze ) ;
Option.iter source_file_opt ~f:(fun source_file ->
Callbacks.iterate_file_callbacks procs_to_analyze exe_env source_file ;
Callbacks.iterate_file_callbacks_and_store_issues procs_to_analyze exe_env source_file ;
create_perf_stats_report source_file ) ;
unset_exe_env () ;
Language.curr_language := saved_language

@ -0,0 +1,15 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
let dir_names = ref []
let get_registered_dir_names () = !dir_names
let register_dir_name name =
if not (List.exists !dir_names ~f:(String.equal name)) then dir_names := name :: !dir_names

@ -0,0 +1,16 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
val get_registered_dir_names : unit -> string list
(** Directory names responsible for storing checker-specific issues generated at file-level analysis
phase. (Those are additional issues on top of ones stored in summary after procedure analysis
phase). *)
val register_dir_name : string -> unit
(** Add directory name. No-op if was already added *)

@ -38,9 +38,9 @@ let dirs_to_clean ~cache_capture =
let open Config in
let common_list =
[backend_stats_dir_name; classnames_dir_name; frontend_stats_dir_name; reporting_stats_dir_name]
@ FileLevelAnalysisIssueDirs.get_registered_dir_names ()
in
if cache_capture then common_list
else captured_dir_name :: racerd_issues_dir_name :: starvation_issues_dir_name :: common_list
if cache_capture then common_list else captured_dir_name :: common_list
let delete_capture_and_results_data () =

@ -21,7 +21,7 @@ let () =
type callback_fun =
| Procedure of Callbacks.proc_callback_t
| DynamicDispatch of Callbacks.proc_callback_t
| File of Callbacks.file_callback_t
| File of {callback: Callbacks.file_callback_t; issue_dir: string}
type callback = callback_fun * Language.t
@ -101,8 +101,10 @@ let all_checkers =
; callbacks=
[ (Procedure RacerD.analyze_procedure, Language.Clang)
; (Procedure RacerD.analyze_procedure, Language.Java)
; (File RacerD.file_analysis, Language.Clang)
; (File RacerD.file_analysis, Language.Java) ] }
; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name}
, Language.Clang )
; ( File {callback= RacerD.file_analysis; issue_dir= Config.racerd_issues_dir_name}
, Language.Java ) ] }
(* toy resource analysis to use in the infer lab, see the lab/ directory *)
; { name= "resource leak"
; active= Config.resource_leak
@ -131,9 +133,11 @@ let all_checkers =
; active= Config.starvation
; callbacks=
[ (Procedure Starvation.analyze_procedure, Language.Java)
; (File Starvation.reporting, Language.Java)
; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name}
, Language.Java )
; (Procedure Starvation.analyze_procedure, Language.Clang)
; (File Starvation.reporting, Language.Clang) ] }
; ( File {callback= Starvation.reporting; issue_dir= Config.starvation_issues_dir_name}
, Language.Clang ) ] }
; { name= "purity"
; active= Config.purity || Config.loop_hoisting
; callbacks=
@ -160,8 +164,8 @@ let register checkers =
| DynamicDispatch procedure_cb ->
Callbacks.register_procedure_callback ~checker_name:name ~dynamic_dispatch:true language
procedure_cb
| File file_cb ->
Callbacks.register_file_callback ~checker_name:name language file_cb
| File {callback; issue_dir} ->
Callbacks.register_file_callback ~checker_name:name language callback ~issue_dir
in
List.iter ~f:register_callback callbacks
in

@ -1151,10 +1151,9 @@ let aggregate_by_class exe_env procedures =
(* Gathers results by analyzing all the methods in a file, then
post-processes the results to check an (approximation of) thread
safety *)
let file_analysis ({procedures; source_file; exe_env} : Callbacks.file_callback_args) =
let file_analysis ({procedures; exe_env} : Callbacks.file_callback_args) =
let class_map = aggregate_by_class exe_env procedures in
Typ.Name.Map.fold
(fun classname methods issue_log ->
make_results_table exe_env methods |> report_unsafe_accesses ~issue_log classname )
class_map IssueLog.empty
|> IssueLog.store ~dir:Config.racerd_issues_dir_name ~file:source_file

@ -729,7 +729,7 @@ let report_on_pair tenv summary (pair : Domain.CriticalPair.t) report_map =
let reporting {Callbacks.procedures; exe_env} =
if Config.starvation_whole_program then ()
if Config.starvation_whole_program then IssueLog.empty
else
let report_on_summary tenv summary report_map (payload : Domain.summary) =
Domain.CriticalPairs.fold (report_on_pair tenv summary) payload.critical_pairs report_map
@ -744,7 +744,25 @@ let reporting {Callbacks.procedures; exe_env} =
|> Option.fold ~init:report_map ~f:(report_on_summary tenv summary)
else report_map )
in
List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store
List.fold procedures ~init:ReportMap.empty ~f:report_procedure |> ReportMap.store ;
(* TODO FIXME stop serializing issues inside the checker and respect callback's contract.
Contract of a file-level callback is that it needs to return list of issues for that file.
Those will be stored in corresponding directory later on.
Instead, starvation checker returns an empty log, but takes care of logging by itself.
This is for historical reasons.
In case the issue corresponds to two files A and B, the analysis will be ran twice on A and B.
Which of it should report, and where?
Desired behavior:
- if duplication is allowed, checker for A reports on A and checker for B reports on B.
- if duplication is disallowed, checker for A reports on A, checker B reports nothing, and
which one should report and one should not is decided based on some symmetry-breaking condition.
In both cases, desired behavior allows to preserve contract of the callback.
Current behavior is not like that: each of analyzers for A and B can report to both A and B,
which is not optimal and is (such an irony!) prone to a race condition.
*)
IssueLog.empty
(* given a scheduled-work item, read the summary of the scheduled method from the disk

Loading…
Cancel
Save