[racerd] Delay issue deduplication to differential reporting

Summary:
Deduping issues when generating a single report and then diffing the
reports can lead to introduced issues being considered duplicates of
existing issues.

Reviewed By: sblackshear

Differential Revision: D6414673

fbshipit-source-id: bba81fd
master
Josh Berdine 7 years ago committed by Facebook Github Bot
parent 8d0f141974
commit 63183f94f1

@ -85,7 +85,8 @@ type err_data =
; err_class: Exceptions.err_class
; visibility: Exceptions.visibility
; linters_def_file: string option
; doc_url: string option }
; doc_url: string option
; access: string option }
let compare_err_data err_data1 err_data2 = Location.compare err_data1.loc err_data2.loc
@ -233,7 +234,8 @@ let update errlog_old errlog_new =
ErrLogHash.iter (fun err_key l -> ignore (add_issue errlog_old err_key l)) errlog_new
let log_issue err_kind err_log loc (node_id, node_key) session ltr ?linters_def_file ?doc_url exn =
let log_issue err_kind err_log loc (node_id, node_key) session ltr ?linters_def_file ?doc_url
?access exn =
let error = Exceptions.recognize_exception exn in
let err_kind = match error.kind with Some err_kind -> err_kind | _ -> err_kind in
let hide_java_loc_zero =
@ -265,7 +267,8 @@ let log_issue err_kind err_log loc (node_id, node_key) session ltr ?linters_def_
; err_class= error.category
; visibility= error.visibility
; linters_def_file
; doc_url }
; doc_url
; access }
in
let err_key =
{ err_kind

@ -56,8 +56,8 @@ type err_data = private
; err_class: Exceptions.err_class
; visibility: Exceptions.visibility
; linters_def_file: string option
; doc_url: string option
(* url to documentation of the issue type *) }
; doc_url: string option (** url to documentation of the issue type *)
; access: string option }
(** Type of the error log *)
type t [@@deriving compare]
@ -94,7 +94,7 @@ val update : t -> t -> unit
val log_issue :
Exceptions.err_kind -> t -> Location.t -> int * Digest.t -> int -> loc_trace
-> ?linters_def_file:string -> ?doc_url:string -> exn -> unit
-> ?linters_def_file:string -> ?doc_url:string -> ?access:string -> exn -> unit
(** {2 Functions for manipulating per-file error tables} *)

@ -43,6 +43,7 @@ type jsonbug = {
?linters_def_file: string option;
?traceview_id: int option;
censored_reason : string;
?access : string option;
}
type report = jsonbug list

@ -9,6 +9,77 @@
open! IStd
(** type for remembering what we have already reported to avoid duplicates. our policy is to report
each kind of access (read/write) to the same field reachable from the same procedure only once.
in addition, if a call to a procedure (transitively) accesses multiple fields, we will only
report one of each kind of access *)
type reported =
{ reported_sites: CallSite.Set.t
; reported_writes: Typ.Procname.Set.t
; reported_reads: Typ.Procname.Set.t }
let empty_reported =
let reported_sites = CallSite.Set.empty in
let reported_writes = Typ.Procname.Set.empty in
let reported_reads = Typ.Procname.Set.empty in
{reported_sites; reported_reads; reported_writes}
module Access = struct
type t = Typ.Procname.t * RacerDDomain.TraceElem.t [@@deriving compare]
end
(** map from accesses to reported sets for remembering what has been reported for each access *)
module AccessMap = Caml.Map.Make (Access)
let is_duplicate_report (pname, access) {reported_sites; reported_writes; reported_reads} =
let open RacerDDomain in
if Config.filtering then CallSite.Set.mem (TraceElem.call_site access) reported_sites
||
match TraceElem.kind access with
| Access.Write _ | Access.ContainerWrite _ ->
Typ.Procname.Set.mem pname reported_writes
| Access.Read _ | Access.ContainerRead _ ->
Typ.Procname.Set.mem pname reported_reads
| Access.InterfaceCall _ ->
false
else false
let update_reported (pname, access) reported =
let open RacerDDomain in
if Config.filtering then
let reported_sites = CallSite.Set.add (TraceElem.call_site access) reported.reported_sites in
match TraceElem.kind access with
| Access.Write _ | Access.ContainerWrite _ ->
let reported_writes = Typ.Procname.Set.add pname reported.reported_writes in
{reported with reported_writes; reported_sites}
| Access.Read _ | Access.ContainerRead _ ->
let reported_reads = Typ.Procname.Set.add pname reported.reported_reads in
{reported with reported_reads; reported_sites}
| Access.InterfaceCall _ ->
reported
else reported
let dedup (issues: Jsonbug_t.jsonbug list) =
List.fold issues ~init:(AccessMap.empty, []) ~f:
(fun (reported_map, nondup_issues) (issue: Jsonbug_t.jsonbug) ->
match issue.access with
| Some access_serial ->
let access : Access.t = Marshal.from_string (B64.decode access_serial) 0 in
let reported =
Option.value (AccessMap.find_opt access reported_map) ~default:empty_reported
in
if is_duplicate_report access reported then (reported_map, nondup_issues)
else
( AccessMap.add access (update_reported access reported) reported_map
, {issue with access= None} :: nondup_issues )
| None ->
(reported_map, {issue with access= None} :: nondup_issues) )
|> snd
type t = {introduced: Jsonbug_t.report; fixed: Jsonbug_t.report; preexisting: Jsonbug_t.report}
(** Set operations should keep duplicated issues with identical hashes *)
@ -30,7 +101,7 @@ let of_reports ~(current_report: Jsonbug_t.report) ~(previous_report: Jsonbug_t.
let introduced, preexisting, fixed =
Map.fold2 (to_map current_report) (to_map previous_report) ~f:fold_aux ~init:([], [], [])
in
{introduced; fixed; preexisting}
{introduced= dedup introduced; fixed= dedup fixed; preexisting= dedup preexisting}
let to_files {introduced; fixed; preexisting} destdir =

@ -309,7 +309,8 @@ module IssuesJson = struct
; linters_def_file= err_data.linters_def_file
; doc_url= err_data.doc_url
; traceview_id= None
; censored_reason= censored_reason key.err_name source_file }
; censored_reason= censored_reason key.err_name source_file
; access= err_data.access }
in
if not !is_first_item then pp "," else is_first_item := false ;
pp "%s@?" (Jsonbug_j.string_of_jsonbug bug)

@ -12,12 +12,12 @@ module L = Logging
type log_t =
?loc:Location.t -> ?node_id:int * Digest.t -> ?session:int -> ?ltr:Errlog.loc_trace
-> ?linters_def_file:string -> ?doc_url:string -> exn -> unit
-> ?linters_def_file:string -> ?doc_url:string -> ?access:string -> exn -> unit
type log_issue_from_errlog = Errlog.t -> log_t
let log_issue_from_errlog err_kind err_log ?loc ?node_id ?session ?ltr ?linters_def_file ?doc_url
exn =
?access exn =
let issue_type = (Exceptions.recognize_exception exn).name in
if not Config.filtering (* no-filtering takes priority *) || issue_type.IssueType.enabled then
let loc = match loc with None -> State.get_loc () | Some loc -> loc in
@ -32,11 +32,12 @@ let log_issue_from_errlog err_kind err_log ?loc ?node_id ?session ?ltr ?linters_
match session with None -> (State.get_session () :> int) | Some session -> session
in
let ltr = match ltr with None -> State.get_loc_trace () | Some ltr -> ltr in
Errlog.log_issue err_kind err_log loc node_id session ltr ?linters_def_file ?doc_url exn
Errlog.log_issue err_kind err_log loc node_id session ltr ?linters_def_file ?doc_url ?access
exn
let log_issue_from_summary err_kind summary ?loc ?node_id ?session ?ltr ?linters_def_file ?doc_url
exn =
?access exn =
let is_generated_method = Typ.Procname.java_is_generated (Specs.get_proc_name summary) in
let should_suppress_lint =
Config.curr_language_is Config.Java
@ -47,15 +48,15 @@ let log_issue_from_summary err_kind summary ?loc ?node_id ?session ?ltr ?linters
else
let err_log = Specs.get_err_log summary in
log_issue_from_errlog err_kind err_log ?loc ?node_id ?session ?ltr ?linters_def_file ?doc_url
exn
?access exn
let log_issue_deprecated ?(store_summary= false) err_kind proc_name ?loc ?node_id ?session ?ltr
?linters_def_file ?doc_url exn =
?linters_def_file ?doc_url ?access exn =
match Specs.get_summary proc_name with
| Some summary ->
log_issue_from_summary err_kind summary ?loc ?node_id ?session ?ltr ?linters_def_file
?doc_url exn ;
?doc_url ?access exn ;
if store_summary then
(* TODO (#16348004): This is currently needed as ThreadSafety works as a cluster checker *)
Specs.store_summary summary
@ -87,3 +88,4 @@ let log_warning_deprecated ?(store_summary= false) =
let log_info_deprecated ?(store_summary= false) =
log_issue_deprecated ~store_summary Exceptions.Kinfo

@ -13,7 +13,7 @@ open! IStd
type log_t =
?loc:Location.t -> ?node_id:int * Digest.t -> ?session:int -> ?ltr:Errlog.loc_trace
-> ?linters_def_file:string -> ?doc_url:string -> exn -> unit
-> ?linters_def_file:string -> ?doc_url:string -> ?access:string -> exn -> unit
type log_issue_from_errlog = Errlog.t -> log_t

@ -321,8 +321,8 @@ let mark_instr_fail exn =
type log_issue =
?store_summary:bool -> Typ.Procname.t -> ?loc:Location.t -> ?node_id:int * Digest.t
-> ?session:int -> ?ltr:Errlog.loc_trace -> ?linters_def_file:string -> ?doc_url:string -> exn
-> unit
-> ?session:int -> ?ltr:Errlog.loc_trace -> ?linters_def_file:string -> ?doc_url:string
-> ?access:string -> exn -> unit
let process_execution_failures (log_issue: log_issue) pname =
let do_failure _ fs =

@ -81,8 +81,8 @@ val mk_find_duplicate_nodes : Procdesc.t -> Procdesc.Node.t -> Procdesc.NodeSet.
type log_issue =
?store_summary:bool -> Typ.Procname.t -> ?loc:Location.t -> ?node_id:int * Digest.t
-> ?session:int -> ?ltr:Errlog.loc_trace -> ?linters_def_file:string -> ?doc_url:string -> exn
-> unit
-> ?session:int -> ?ltr:Errlog.loc_trace -> ?linters_def_file:string -> ?doc_url:string
-> ?access:string -> exn -> unit
val process_execution_failures : log_issue -> Typ.Procname.t -> unit
(** Process the failures during symbolic execution of a procedure *)

@ -1179,7 +1179,8 @@ let report_thread_safety_violation tenv pdesc ~make_description ~report_kind acc
let exn =
Exceptions.Checkers (issue_type.IssueType.unique_id, Localise.verbatim_desc error_message)
in
Reporting.log_error_deprecated ~store_summary:true pname ~loc ~ltr exn
let access = B64.encode (Marshal.to_string (pname, access) []) in
Reporting.log_error_deprecated ~store_summary:true pname ~loc ~ltr ~access exn
in
let trace_of_pname = trace_of_pname access pdesc in
Option.iter ~f:report_one_path (PathDomain.get_reportable_sink_path access ~trace_of_pname)
@ -1244,22 +1245,6 @@ let make_read_write_race_description ~read_is_sync conflicts pname final_sink_si
pp_access final_sink conflicts_description
(** type for remembering what we have already reported to avoid duplicates. our policy is to report
each kind of access (read/write) to the same field reachable from the same procedure only once.
in addition, if a call to a procedure (transitively) accesses multiple fields, we will only
report one of each kind of access *)
type reported =
{ reported_sites: CallSite.Set.t
; reported_writes: Typ.Procname.Set.t
; reported_reads: Typ.Procname.Set.t }
let empty_reported =
let reported_sites = CallSite.Set.empty in
let reported_writes = Typ.Procname.Set.empty in
let reported_reads = Typ.Procname.Set.empty in
{reported_sites; reported_reads; reported_writes}
(* return true if procedure is at an abstraction boundary or reporting has been explicitly
requested via @ThreadSafe *)
let should_report_on_proc proc_desc tenv =
@ -1308,154 +1293,112 @@ let report_unsafe_accesses
list
AccessListMap.t) =
let open RacerDDomain in
let is_duplicate_report access pname {reported_sites; reported_writes; reported_reads} =
if Config.filtering then CallSite.Set.mem (TraceElem.call_site access) reported_sites
||
match TraceElem.kind access with
| Access.Write _ | Access.ContainerWrite _ ->
Typ.Procname.Set.mem pname reported_writes
| Access.Read _ | Access.ContainerRead _ ->
Typ.Procname.Set.mem pname reported_reads
| Access.InterfaceCall _ ->
false
else false
in
let update_reported access pname reported =
if Config.filtering then
let reported_sites = CallSite.Set.add (TraceElem.call_site access) reported.reported_sites in
match TraceElem.kind access with
| Access.Write _ | Access.ContainerWrite _ ->
let reported_writes = Typ.Procname.Set.add pname reported.reported_writes in
{reported with reported_writes; reported_sites}
| Access.Read _ | Access.ContainerRead _ ->
let reported_reads = Typ.Procname.Set.add pname reported.reported_reads in
{reported with reported_reads; reported_sites}
| Access.InterfaceCall _ ->
reported
else reported
in
let report_unsafe_access (access, pre, thread, tenv, pdesc) accesses reported_acc =
let report_unsafe_access (access, pre, thread, tenv, pdesc) accesses =
let pname = Procdesc.get_proc_name pdesc in
if is_duplicate_report access pname reported_acc then reported_acc
else
match (TraceElem.kind access, pre) with
| ( Access.InterfaceCall unannoted_call_pname
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) ->
if ThreadsDomain.is_any thread && is_marked_thread_safe pdesc tenv then (
(* un-annotated interface call + no lock in method marked thread-safe. warn *)
report_unannotated_interface_violation tenv pdesc access thread unannoted_call_pname ;
update_reported access pname reported_acc )
else reported_acc
| Access.InterfaceCall _, AccessPrecondition.Protected _ ->
(* un-annotated interface call, but it's protected by a lock/thread. don't report *)
reported_acc
| ( (Access.Write _ | ContainerWrite _)
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) -> (
match Procdesc.get_proc_name pdesc with
| Java _ ->
let writes_on_background_thread =
if ThreadsDomain.is_any thread then
(* unprotected write in method that may run in parallel with itself. warn *)
[]
else
(* unprotected write, but not on a method that may run in parallel with itself
match (TraceElem.kind access, pre) with
| ( Access.InterfaceCall unannoted_call_pname
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) ->
if ThreadsDomain.is_any thread && is_marked_thread_safe pdesc tenv then
(* un-annotated interface call + no lock in method marked thread-safe. warn *)
report_unannotated_interface_violation tenv pdesc access thread unannoted_call_pname
| Access.InterfaceCall _, AccessPrecondition.Protected _ ->
(* un-annotated interface call, but it's protected by a lock/thread. don't report *)
()
| ( (Access.Write _ | ContainerWrite _)
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) -> (
match Procdesc.get_proc_name pdesc with
| Java _ ->
let writes_on_background_thread =
if ThreadsDomain.is_any thread then
(* unprotected write in method that may run in parallel with itself. warn *)
[]
else
(* unprotected write, but not on a method that may run in parallel with itself
(i.e., not a self race). find accesses on a background thread this access might
conflict with and report them *)
List.filter_map
~f:(fun (other_access, _, other_thread, _, _) ->
if TraceElem.is_write other_access && ThreadsDomain.is_any other_thread then
Some other_access
else None)
accesses
in
if List.is_empty writes_on_background_thread && not (ThreadsDomain.is_any thread) then
reported_acc
else (
report_thread_safety_violation tenv pdesc
~make_description:make_unprotected_write_description
~report_kind:(WriteWriteRace writes_on_background_thread) access thread ;
update_reported access pname reported_acc )
| _ ->
(* Do not report unprotected writes when an access can't run in parallel with itself, or
for ObjC_Cpp *)
reported_acc )
| (Access.Write _ | ContainerWrite _), AccessPrecondition.Protected _ ->
(* protected write, do nothing *)
reported_acc
| ( (Access.Read _ | ContainerRead _)
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) ->
(* unprotected read. report all writes as conflicts for java. for c++ filter out
unprotected writes *)
let is_cpp_protected_write pre =
match pre with
| AccessPrecondition.Unprotected _ | TotallyUnprotected ->
Typ.Procname.is_java pname
| AccessPrecondition.Protected _ ->
true
in
let is_conflict other_access pre other_thread =
TraceElem.is_write other_access
&&
if Typ.Procname.is_java pname then ThreadsDomain.is_any thread
|| ThreadsDomain.is_any other_thread
else is_cpp_protected_write pre
List.filter_map
~f:(fun (other_access, _, other_thread, _, _) ->
if TraceElem.is_write other_access && ThreadsDomain.is_any other_thread then
Some other_access
else None)
accesses
in
let all_writes =
List.filter
~f:(fun (other_access, pre, other_thread, _, _) ->
is_conflict other_access pre other_thread)
accesses
in
if List.is_empty all_writes then reported_acc
else (
if not (List.is_empty writes_on_background_thread && not (ThreadsDomain.is_any thread))
then
report_thread_safety_violation tenv pdesc
~make_description:(make_read_write_race_description ~read_is_sync:false all_writes)
~report_kind:
(ReadWriteRace (List.map ~f:(fun (access, _, _, _, _) -> access) all_writes))
access thread ;
update_reported access pname reported_acc )
| (Access.Read _ | ContainerRead _), AccessPrecondition.Protected excl ->
(* protected read. report unprotected writes and opposite protected writes as conflicts
~make_description:make_unprotected_write_description
~report_kind:(WriteWriteRace writes_on_background_thread) access thread
| _ ->
(* Do not report unprotected writes when an access can't run in parallel with itself, or
for ObjC_Cpp *)
() )
| (Access.Write _ | ContainerWrite _), AccessPrecondition.Protected _ ->
(* protected write, do nothing *)
()
| ( (Access.Read _ | ContainerRead _)
, (AccessPrecondition.Unprotected _ | AccessPrecondition.TotallyUnprotected) ) ->
(* unprotected read. report all writes as conflicts for java. for c++ filter out
unprotected writes *)
let is_cpp_protected_write pre =
match pre with
| AccessPrecondition.Unprotected _ | TotallyUnprotected ->
Typ.Procname.is_java pname
| AccessPrecondition.Protected _ ->
true
in
let is_conflict other_access pre other_thread =
TraceElem.is_write other_access
&&
if Typ.Procname.is_java pname then ThreadsDomain.is_any thread
|| ThreadsDomain.is_any other_thread
else is_cpp_protected_write pre
in
let all_writes =
List.filter
~f:(fun (other_access, pre, other_thread, _, _) ->
is_conflict other_access pre other_thread)
accesses
in
if not (List.is_empty all_writes) then
report_thread_safety_violation tenv pdesc
~make_description:(make_read_write_race_description ~read_is_sync:false all_writes)
~report_kind:
(ReadWriteRace (List.map ~f:(fun (access, _, _, _, _) -> access) all_writes))
access thread
| (Access.Read _ | ContainerRead _), AccessPrecondition.Protected excl ->
(* protected read. report unprotected writes and opposite protected writes as conflicts
Thread and Lock are opposites of one another, and Both has no opposite *)
let is_opposite = function
| Excluder.Lock, Excluder.Thread ->
true
| Excluder.Thread, Excluder.Lock ->
true
| _, _ ->
false
in
let conflicting_writes =
List.filter
~f:(fun (access, pre, other_thread, _, _) ->
match pre with
| AccessPrecondition.Unprotected _ ->
TraceElem.is_write access && ThreadsDomain.is_any other_thread
| AccessPrecondition.Protected other_excl when is_opposite (excl, other_excl) ->
TraceElem.is_write access
| _ ->
false)
accesses
in
if List.is_empty conflicting_writes then reported_acc
else (
(* protected read with conflicting unprotected write(s). warn. *)
report_thread_safety_violation tenv pdesc
~make_description:
(make_read_write_race_description ~read_is_sync:true conflicting_writes)
~report_kind:
(ReadWriteRace
(List.map ~f:(fun (access, _, _, _, _) -> access) conflicting_writes))
access thread ;
update_reported access pname reported_acc )
let is_opposite = function
| Excluder.Lock, Excluder.Thread ->
true
| Excluder.Thread, Excluder.Lock ->
true
| _, _ ->
false
in
let conflicting_writes =
List.filter
~f:(fun (access, pre, other_thread, _, _) ->
match pre with
| AccessPrecondition.Unprotected _ ->
TraceElem.is_write access && ThreadsDomain.is_any other_thread
| AccessPrecondition.Protected other_excl when is_opposite (excl, other_excl) ->
TraceElem.is_write access
| _ ->
false)
accesses
in
if not (List.is_empty conflicting_writes) then
(* protected read with conflicting unprotected write(s). warn. *)
report_thread_safety_violation tenv pdesc
~make_description:
(make_read_write_race_description ~read_is_sync:true conflicting_writes)
~report_kind:
(ReadWriteRace (List.map ~f:(fun (access, _, _, _, _) -> access) conflicting_writes))
access thread
in
AccessListMap.fold
(fun _ grouped_accesses reported_acc ->
(* reset the reported reads and writes for each memory location *)
let reported =
{ reported_acc with
reported_writes= Typ.Procname.Set.empty; reported_reads= Typ.Procname.Set.empty }
in
AccessListMap.iter
(fun _ grouped_accesses ->
let class_has_mutex_member objc_cpp tenv =
let class_name = Typ.Procname.objc_cpp_get_class_type_name objc_cpp in
let matcher = QualifiedCppName.Match.of_fuzzy_qual_names ["std::mutex"] in
@ -1487,10 +1430,10 @@ let report_unsafe_accesses
let reportable_accesses =
List.filter ~f:(fun (_, _, _, tenv, pdesc) -> should_report pdesc tenv) grouped_accesses
in
List.fold
~f:(fun acc access -> report_unsafe_access access reportable_accesses acc)
reportable_accesses ~init:reported)
aggregated_access_map empty_reported
List.iter
~f:(fun access -> report_unsafe_access access reportable_accesses)
reportable_accesses)
aggregated_access_map
|> ignore

@ -55,6 +55,7 @@ let common_libraries =
@ [ "ANSITerminal"
; "atdgen"
; "base"
; "base64"
; "cmdliner"
; "core"
; "extlib"

@ -184,7 +184,7 @@ let test_skip_duplicated_types_on_filenames =
(sorted_hashes_of_issues diff'.fixed) ;
assert_equal
~pp_diff:(pp_diff_of_string_list "Hashes of preexisting")
["111"; "22"; "222"; "55"]
["11"; "22"; "222"; "55"]
(sorted_hashes_of_issues diff'.preexisting)
in
"test_skip_duplicated_types_on_filenames" >:: do_assert

@ -37,7 +37,8 @@ let create_fake_jsonbug ?(bug_class= "bug_class") ?(kind= "kind") ?(bug_type= "b
; linters_def_file
; doc_url
; traceview_id= None
; censored_reason= "" }
; censored_reason= ""
; access= None }
let pp_diff_of_list ~pp group_name fmt (expected, actual) =

@ -28,6 +28,7 @@ ocaml-version: [ >= "4.04.2" ]
depends: [
"ANSITerminal" {>="0.7"}
"atdgen" {>="1.6.0"}
"base64"
"cmdliner" {>="1.0.0"}
"core"
"conf-autoconf" {build}

@ -2,6 +2,7 @@ ANSITerminal = 0.7
atd = 1.12.0
atdgen = 1.12.0
base = v0.9.3
base64 = 2.2.0
bin_prot = v0.9.1
biniou = 1.2.0
camlp4 = 4.05+1

Loading…
Cancel
Save