From 22ec29fabcda842d75c5f9e71b7b7bfff31eb17b Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Fri, 8 Dec 2017 17:33:55 -0800 Subject: [PATCH] [racerd] Report only once per unique pair of final trace locations Summary: This diff adds a layer of report deduplication logic in addition to the existing scheme. Suppose issue 1 with trace1a and trace1b, and issue 2 with trace2a and trace2b. If trace1a ends at the same location as trace2a (resp., trace2b) and trace1b ends at the same location as trace2b (resp., trace2a), then consider issues 1 and 2 to be duplicates. This chooses to report the issue with the smaller sum of trace lengths, breaking ties using the issue hashes, and eventually the entire issue. Therefore there is a potential for flakiness with respect the the choice of which report to make within a hash-equivalence class. Reviewed By: sblackshear Differential Revision: D6519607 fbshipit-source-id: 63210ab --- infer/src/.merlin | 1 + infer/src/IR/IssueAuxData.ml | 16 +++++ infer/src/IR/IssueAuxData.mli | 16 +++++ infer/src/backend/Differential.ml | 83 +++++++++++++++------- infer/src/concurrency/RacerD.ml | 14 ++-- infer/src/unit/DifferentialFiltersTests.ml | 2 +- 6 files changed, 100 insertions(+), 32 deletions(-) create mode 100644 infer/src/IR/IssueAuxData.ml create mode 100644 infer/src/IR/IssueAuxData.mli diff --git a/infer/src/.merlin b/infer/src/.merlin index 7e2c99b41..0b82821a4 100644 --- a/infer/src/.merlin +++ b/infer/src/.merlin @@ -10,6 +10,7 @@ PKG atdgen PKG base PKG base.caml PKG base.shadow_stdlib +PKG base64 PKG bigarray PKG bin_prot PKG bin_prot.shape diff --git a/infer/src/IR/IssueAuxData.ml b/infer/src/IR/IssueAuxData.ml new file mode 100644 index 000000000..046191739 --- /dev/null +++ b/infer/src/IR/IssueAuxData.ml @@ -0,0 +1,16 @@ +(* + * Copyright (c) 2017 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + *) + +open! IStd + +type t = Typ.Procname.t * RacerDDomain.TraceElem.t * Location.t list + +let encode decoded = B64.encode (Marshal.to_string decoded []) + +let decode encoded = Marshal.from_string (B64.decode encoded) 0 diff --git a/infer/src/IR/IssueAuxData.mli b/infer/src/IR/IssueAuxData.mli new file mode 100644 index 000000000..4653f709f --- /dev/null +++ b/infer/src/IR/IssueAuxData.mli @@ -0,0 +1,16 @@ +(* + * Copyright (c) 2017 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + *) + +open! IStd + +type t = Typ.Procname.t * RacerDDomain.TraceElem.t * Location.t list + +val encode : t -> string + +val decode : string -> t diff --git a/infer/src/backend/Differential.ml b/infer/src/backend/Differential.ml index ed8cf087d..bfed3190b 100644 --- a/infer/src/backend/Differential.ml +++ b/infer/src/backend/Differential.ml @@ -25,28 +25,34 @@ let empty_reported = {reported_sites; reported_reads; reported_writes} -module Access = struct - type t = Typ.Procname.t * RacerDDomain.TraceElem.t [@@deriving compare] -end - -(** map from accesses to reported sets for remembering what has been reported for each access *) -module AccessMap = Caml.Map.Make (Access) +(** set of lists of locations for remembering what trace ends have been reported *) +module LocListSet = struct + include Caml.Set.Make (struct + type t = Location.t list [@@deriving compare] + end) -let is_duplicate_report (pname, access) {reported_sites; reported_writes; reported_reads} = - let open RacerDDomain in - if Config.filtering then CallSite.Set.mem (TraceElem.call_site access) reported_sites - || - match TraceElem.kind access with - | Access.Write _ | Access.ContainerWrite _ -> - Typ.Procname.Set.mem pname reported_writes - | Access.Read _ | Access.ContainerRead _ -> - Typ.Procname.Set.mem pname reported_reads - | Access.InterfaceCall _ -> - false - else false + let mem s xs = not (List.is_empty xs) && mem (List.sort ~cmp:Location.compare xs) s + let add s xs = if List.is_empty xs then s else add (List.sort ~cmp:Location.compare xs) s +end -let update_reported (pname, access) reported = +let is_duplicate_report pname access end_locs {reported_sites; reported_writes; reported_reads} + reported_ends = + let open RacerDDomain in + Config.filtering + && ( LocListSet.mem reported_ends end_locs + || CallSite.Set.mem (TraceElem.call_site access) reported_sites + || + match TraceElem.kind access with + | Access.Write _ | Access.ContainerWrite _ -> + Typ.Procname.Set.mem pname reported_writes + | Access.Read _ | Access.ContainerRead _ -> + Typ.Procname.Set.mem pname reported_reads + | Access.InterfaceCall _ -> + false ) + + +let update_reported pname access reported = let open RacerDDomain in if Config.filtering then let reported_sites = CallSite.Set.add (TraceElem.call_site access) reported.reported_sites in @@ -62,17 +68,40 @@ let update_reported (pname, access) reported = else reported +let sort_by_decreasing_preference_to_report issues = + let cmp (x: Jsonbug_t.jsonbug) (y: Jsonbug_t.jsonbug) = + let n = Int.compare (List.length x.bug_trace) (List.length y.bug_trace) in + if n <> 0 then n + else + let n = String.compare x.hash y.hash in + if n <> 0 then n else Pervasives.compare x y + in + List.sort ~cmp issues + + +let sort_by_location issues = + let cmp (x: Jsonbug_t.jsonbug) (y: Jsonbug_t.jsonbug) = + [%compare : string * int * int] (x.file, x.line, x.column) (y.file, y.line, y.column) + in + List.sort ~cmp issues + + let dedup (issues: Jsonbug_t.jsonbug list) = - List.fold issues ~init:(empty_reported, []) ~f: - (fun (reported, nondup_issues) (issue: Jsonbug_t.jsonbug) -> + List.fold (sort_by_decreasing_preference_to_report issues) + ~init:(empty_reported, LocListSet.empty, []) ~f: + (fun (reported, reported_ends, nondup_issues) (issue: Jsonbug_t.jsonbug) -> match issue.access with - | Some access_serial -> - let access : Access.t = Marshal.from_string (B64.decode access_serial) 0 in - if is_duplicate_report access reported then (reported, nondup_issues) - else (update_reported access reported, {issue with access= None} :: nondup_issues) + | Some encoded -> + let pname, access, end_locs = IssueAuxData.decode encoded in + if is_duplicate_report pname access end_locs reported reported_ends then + (reported, reported_ends, nondup_issues) + else + ( update_reported pname access reported + , LocListSet.add reported_ends end_locs + , {issue with access= None} :: nondup_issues ) | None -> - (reported, {issue with access= None} :: nondup_issues) ) - |> snd + (reported, reported_ends, {issue with access= None} :: nondup_issues) ) + |> trd3 |> sort_by_location type t = {introduced: Jsonbug_t.report; fixed: Jsonbug_t.report; preexisting: Jsonbug_t.report} diff --git a/infer/src/concurrency/RacerD.ml b/infer/src/concurrency/RacerD.ml index 92a4786bb..672988f7a 100644 --- a/infer/src/concurrency/RacerD.ml +++ b/infer/src/concurrency/RacerD.ml @@ -1131,9 +1131,12 @@ let make_trace ~report_kind original_path pdesc = [] in let original_trace = loc_trace_of_path original_path in + let get_end_loc trace = Option.map (List.last trace) ~f:(function {Errlog.lt_loc} -> lt_loc) in + let original_end = get_end_loc original_trace in let make_with_conflicts conflict_sink original_trace ~label1 ~label2 = (* create a trace for one of the conflicts and append it to the trace for the original sink *) let conflict_trace = make_trace_for_sink conflict_sink in + let conflict_end = get_end_loc conflict_trace in let get_start_loc = function head :: _ -> head.Errlog.lt_loc | [] -> Location.dummy in let first_trace_spacer = Errlog.make_trace_element 0 (get_start_loc original_trace) label1 [] @@ -1141,7 +1144,9 @@ let make_trace ~report_kind original_path pdesc = let second_trace_spacer = Errlog.make_trace_element 0 (get_start_loc conflict_trace) label2 [] in - first_trace_spacer :: original_trace @ second_trace_spacer :: conflict_trace + ( first_trace_spacer :: original_trace @ second_trace_spacer :: conflict_trace + , original_end + , conflict_end ) in match report_kind with | ReadWriteRace conflict_sink -> @@ -1151,7 +1156,7 @@ let make_trace ~report_kind original_path pdesc = make_with_conflicts conflict_sink original_trace ~label1:"" ~label2:"" | WriteWriteRace None | UnannotatedInterface -> - original_trace + (original_trace, original_end, None) let report_thread_safety_violation tenv pdesc ~make_description ~report_kind access thread = @@ -1181,7 +1186,7 @@ let report_thread_safety_violation tenv pdesc ~make_description ~report_kind acc let final_sink_site = PathDomain.Sink.call_site final_sink in let initial_sink_site = PathDomain.Sink.call_site initial_sink in let loc = CallSite.loc initial_sink_site in - let ltr = make_trace ~report_kind path pdesc in + let ltr, original_end, conflict_end = make_trace ~report_kind path pdesc in (* what the potential bug is *) let description = make_description pname final_sink_site initial_sink_site initial_sink in (* why we are reporting it *) @@ -1190,7 +1195,8 @@ let report_thread_safety_violation tenv pdesc ~make_description ~report_kind acc let exn = Exceptions.Checkers (issue_type.IssueType.unique_id, Localise.verbatim_desc error_message) in - let access = B64.encode (Marshal.to_string (pname, access) []) in + let end_locs = Option.to_list original_end @ Option.to_list conflict_end in + let access = IssueAuxData.encode (pname, access, end_locs) in Reporting.log_error_deprecated ~store_summary:true pname ~loc ~ltr ~access exn in let trace_of_pname = trace_of_pname access pdesc in diff --git a/infer/src/unit/DifferentialFiltersTests.ml b/infer/src/unit/DifferentialFiltersTests.ml index acc4cd817..70a079acd 100644 --- a/infer/src/unit/DifferentialFiltersTests.ml +++ b/infer/src/unit/DifferentialFiltersTests.ml @@ -184,7 +184,7 @@ let test_skip_duplicated_types_on_filenames = (sorted_hashes_of_issues diff'.fixed) ; assert_equal ~pp_diff:(pp_diff_of_string_list "Hashes of preexisting") - ["11"; "22"; "222"; "55"] + ["111"; "22"; "222"; "55"] (sorted_hashes_of_issues diff'.preexisting) in "test_skip_duplicated_types_on_filenames" >:: do_assert