dedup merged json reports

Summary: public
When merging json reports from different buck targets, the same bug may be
reported several times.

Clean up some bug sorting functions while I'm at it.

Reviewed By: martinoluca

Differential Revision: D2690665

fb-gh-sync-id: 4a12072
master
Jules Villard 9 years ago committed by facebook-github-bot-7
parent 8cd68cd890
commit 37d2e84192

@ -13,7 +13,7 @@ import subprocess
import traceback import traceback
import util import util
from inferlib import config, utils from inferlib import config, issues, utils
MODULE_NAME = __name__ MODULE_NAME = __name__
MODULE_DESCRIPTION = '''Run analysis of code built with a command like: MODULE_DESCRIPTION = '''Run analysis of code built with a command like:
@ -136,7 +136,7 @@ class BuckAnalyzer:
if not ret == os.EX_OK: if not ret == os.EX_OK:
return ret return ret
result_files = self._get_analysis_result_files() result_files = self._get_analysis_result_files()
all_results = utils.merge_json_arrays_from_files(result_files) all_results = issues.merge_reports_from_paths(result_files)
merged_results_path = os.path.join(self.args.infer_out, merged_results_path = os.path.join(self.args.infer_out,
config.JSON_REPORT_FILENAME) config.JSON_REPORT_FILENAME)
utils.dump_json_to_path(all_results, merged_results_path) utils.dump_json_to_path(all_results, merged_results_path)

@ -12,7 +12,9 @@ from __future__ import unicode_literals
import codecs import codecs
import csv import csv
import itertools
import json import json
import operator
import os import os
import shutil import shutil
import sys import sys
@ -108,9 +110,8 @@ def clean_csv(args, csv_report):
if args.no_filtering \ if args.no_filtering \
or _should_report_csv(args.analyzer, row): or _should_report_csv(args.analyzer, row):
collected_rows.append(row) collected_rows.append(row)
collected_rows = sorted( collected_rows.sort(key=operator.itemgetter(CSV_INDEX_FILENAME,
collected_rows, CSV_INDEX_LINE))
cmp=_compare_csv_rows)
collected_rows = [rows[0]] + collected_rows collected_rows = [rows[0]] + collected_rows
temporary_file = tempfile.mktemp() temporary_file = tempfile.mktemp()
with open(temporary_file, 'w') as file_out: with open(temporary_file, 'w') as file_out:
@ -130,7 +131,8 @@ def clean_json(args, json_report):
_should_report_json(args.analyzer, row))) _should_report_json(args.analyzer, row)))
rows = filter(is_clean, rows) rows = filter(is_clean, rows)
rows.sort(cmp=_compare_json_rows) rows.sort(key=operator.itemgetter(JSON_INDEX_FILENAME,
JSON_INDEX_LINE))
temporary_file = tempfile.mktemp() temporary_file = tempfile.mktemp()
utils.dump_json_to_path(rows, temporary_file) utils.dump_json_to_path(rows, temporary_file)
shutil.move(temporary_file, json_report) shutil.move(temporary_file, json_report)
@ -193,29 +195,22 @@ def print_and_save_errors(json_report, bugs_out):
file_out.write(text) file_out.write(text)
def _compare_issues(filename_1, line_1, filename_2, line_2): def merge_reports_from_paths(report_paths):
if filename_1 < filename_2: json_data = []
return -1 for json_path in report_paths:
elif filename_1 > filename_2: json_data.extend(utils.load_json_from_path(json_path))
return 1 return _sort_and_uniq_rows(json_data)
else:
return line_1 - line_2
def _compare_csv_rows(row_1, row_2):
filename_1 = row_1[CSV_INDEX_FILENAME]
filename_2 = row_2[CSV_INDEX_FILENAME]
line_1 = int(row_1[CSV_INDEX_LINE])
line_2 = int(row_2[CSV_INDEX_LINE])
return _compare_issues(filename_1, line_1, filename_2, line_2)
def _compare_json_rows(row_1, row_2): def _sort_and_uniq_rows(l):
filename_1 = row_1[JSON_INDEX_FILENAME] key = operator.itemgetter(JSON_INDEX_FILENAME,
filename_2 = row_2[JSON_INDEX_FILENAME] JSON_INDEX_LINE,
line_1 = row_1[JSON_INDEX_LINE] JSON_INDEX_HASH,
line_2 = row_2[JSON_INDEX_LINE] JSON_INDEX_QUALIFIER)
return _compare_issues(filename_1, line_1, filename_2, line_2) l.sort(key=key)
groups = itertools.groupby(l, key)
# guaranteed to be at least one element in each group
return map(lambda (keys, dups): dups.next(), groups)
def _should_report(analyzer, error_kind, error_type, error_bucket): def _should_report(analyzer, error_kind, error_type, error_bucket):

@ -147,13 +147,6 @@ def dump_json_to_path(
indent, separators, encoding, default, sort_keys, **kw) indent, separators, encoding, default, sort_keys, **kw)
def merge_json_arrays_from_files(report_paths):
json_data = []
for json_path in report_paths:
json_data.extend(load_json_from_path(json_path))
return json_data
def infer_version(): def infer_version():
version = json.loads(subprocess.check_output([ version = json.loads(subprocess.check_output([
get_cmd_in_bin_dir('InferAnalyze'), get_cmd_in_bin_dir('InferAnalyze'),

Loading…
Cancel
Save