dedup merged json reports

Summary: public When merging json reports from different buck targets, the same bug may be reported several times. Clean up some bug sorting functions while I'm at it. Reviewed By: martinoluca Differential Revision: D2690665 fb-gh-sync-id: 4a12072
10 years ago · 37d2e84192
parent 8cd68cd890
commit 37d2e84192
3 changed files with 22 additions and 34 deletions
--- a/infer/lib/python/inferlib/capture/buck.py
+++ b/infer/lib/python/inferlib/capture/buck.py
@ -13,7 +13,7 @@ import subprocess
 import traceback
 import util
-from inferlib import config, utils
+from inferlib import config, issues, utils
 MODULE_NAME = __name__
 MODULE_DESCRIPTION = '''Run analysis of code built with a command like:
@ -136,7 +136,7 @@ class BuckAnalyzer:
        if not ret == os.EX_OK:
            return ret
        result_files = self._get_analysis_result_files()
-        all_results = utils.merge_json_arrays_from_files(result_files)
+        all_results = issues.merge_reports_from_paths(result_files)
        merged_results_path = os.path.join(self.args.infer_out,
                                           config.JSON_REPORT_FILENAME)
        utils.dump_json_to_path(all_results, merged_results_path)
--- a/infer/lib/python/inferlib/issues.py
+++ b/infer/lib/python/inferlib/issues.py
@ -12,7 +12,9 @@ from __future__ import unicode_literals
 import codecs
 import csv
 import itertools
 import json
 import operator
 import os
 import shutil
 import sys
@ -108,9 +110,8 @@ def clean_csv(args, csv_report):
                    if args.no_filtering \
                       or _should_report_csv(args.analyzer, row):
                        collected_rows.append(row)
-            collected_rows = sorted(
+            collected_rows.sort(key=operator.itemgetter(CSV_INDEX_FILENAME,
-                collected_rows,
+                                                        CSV_INDEX_LINE))
                cmp=_compare_csv_rows)
            collected_rows = [rows[0]] + collected_rows
    temporary_file = tempfile.mktemp()
    with open(temporary_file, 'w') as file_out:
@ -130,7 +131,8 @@ def clean_json(args, json_report):
                 _should_report_json(args.analyzer, row)))
    rows = filter(is_clean, rows)
-    rows.sort(cmp=_compare_json_rows)
+    rows.sort(key=operator.itemgetter(JSON_INDEX_FILENAME,
                                      JSON_INDEX_LINE))
    temporary_file = tempfile.mktemp()
    utils.dump_json_to_path(rows, temporary_file)
    shutil.move(temporary_file, json_report)
@ -193,29 +195,22 @@ def print_and_save_errors(json_report, bugs_out):
        file_out.write(text)
-def _compare_issues(filename_1, line_1, filename_2, line_2):
+def merge_reports_from_paths(report_paths):
-    if filename_1 < filename_2:
+    json_data = []
-        return -1
+    for json_path in report_paths:
-    elif filename_1 > filename_2:
+        json_data.extend(utils.load_json_from_path(json_path))
-        return 1
+    return _sort_and_uniq_rows(json_data)
    else:
        return line_1 - line_2
 def _compare_csv_rows(row_1, row_2):
    filename_1 = row_1[CSV_INDEX_FILENAME]
    filename_2 = row_2[CSV_INDEX_FILENAME]
    line_1 = int(row_1[CSV_INDEX_LINE])
    line_2 = int(row_2[CSV_INDEX_LINE])
    return _compare_issues(filename_1, line_1, filename_2, line_2)
-def _compare_json_rows(row_1, row_2):
+def _sort_and_uniq_rows(l):
-    filename_1 = row_1[JSON_INDEX_FILENAME]
+    key = operator.itemgetter(JSON_INDEX_FILENAME,
-    filename_2 = row_2[JSON_INDEX_FILENAME]
+                              JSON_INDEX_LINE,
-    line_1 = row_1[JSON_INDEX_LINE]
+                              JSON_INDEX_HASH,
-    line_2 = row_2[JSON_INDEX_LINE]
+                              JSON_INDEX_QUALIFIER)
-    return _compare_issues(filename_1, line_1, filename_2, line_2)
+    l.sort(key=key)
    groups = itertools.groupby(l, key)
    # guaranteed to be at least one element in each group
    return map(lambda (keys, dups): dups.next(), groups)
 def _should_report(analyzer, error_kind, error_type, error_bucket):
--- a/infer/lib/python/inferlib/utils.py
+++ b/infer/lib/python/inferlib/utils.py
@ -147,13 +147,6 @@ def dump_json_to_path(
                  indent, separators, encoding, default, sort_keys, **kw)
 def merge_json_arrays_from_files(report_paths):
    json_data = []
    for json_path in report_paths:
        json_data.extend(load_json_from_path(json_path))
    return json_data
 def infer_version():
    version = json.loads(subprocess.check_output([
        get_cmd_in_bin_dir('InferAnalyze'),