[RFC] Remove CSV and JSON munging python code

Reviewed By: jvillard Differential Revision: D4063972 fbshipit-source-id: 66ccbc6
9 years ago · 4422893bbd
parent 849799246b
commit 4422893bbd
13 changed files with 164 additions and 215 deletions
--- a/2
+++ b/2
@ -196,6 +196,8 @@ quick-test: test_this_build ocaml_unit_test
 test-replace:
 	@for file in $$(find infer/tests -name "*.exp.test"); do \
 	    mv -f $$file $$(dirname $$file)/$$(basename -s .exp.test $$file).exp; done
+	@for file in $$(find infer/tests -name "*.test.dot"); do \
+	    mv -f $$file $$(dirname $$file)/$$(basename -s .test.dot $$file).dot; done

 .PHONY: uninstall
 uninstall:
--- a/infer/lib/python/inferlib/analyze.py
+++ b/infer/lib/python/inferlib/analyze.py
@ -385,8 +385,6 @@ class AnalyzerWrapper(object):
                'Error with InferPrint with the command: {}'.format(
                    infer_print_cmd))
        else:
-            issues.clean_csv(self.args, csv_report)
-            issues.clean_json(self.args, json_report)
            self.update_stats_with_warnings(csv_report)

        return exit_status
--- a/infer/lib/python/inferlib/issues.py
+++ b/infer/lib/python/inferlib/issues.py
@ -11,7 +11,6 @@ from __future__ import print_function
 from __future__ import unicode_literals

 import codecs
-import csv
 import datetime
 import itertools
 import json
@ -31,69 +30,13 @@ except ImportError:
 from . import colorize, config, source, utils


-# Increase the limit of the CSV parser to sys.maxlimit
-csv.field_size_limit(sys.maxsize)
-
 ISSUE_KIND_ERROR = 'ERROR'
 ISSUE_KIND_WARNING = 'WARNING'
 ISSUE_KIND_INFO = 'INFO'
 ISSUE_KIND_ADVICE = 'ADVICE'

-ISSUE_TYPES = [
-    'ASSERTION_FAILURE',
-    'BAD_POINTER_COMPARISON',
-    # 'CHECKERS_PRINTF_ARGS'
-    # TODO (#8030397): revert this once all the checkers are moved to Infer
-    'CONTEXT_LEAK',
-    'MEMORY_LEAK',
-    'RESOURCE_LEAK',
-    'RETAIN_CYCLE',
-    'STRONG_DELEGATE_WARNING',
-    'TAINTED_VALUE_REACHING_SENSITIVE_FUNCTION',
-    'IVAR_NOT_NULL_CHECKED',
-    'NULL_DEREFERENCE',
-    'EMPTY_VECTOR_ACCESS',
-    'PARAMETER_NOT_NULL_CHECKED',
-    'PREMATURE_NIL_TERMINATION_ARGUMENT',
-    'DIRECT_ATOMIC_PROPERTY_ACCESS',
-    'CXX_REFERENCE_CAPTURED_IN_OBJC_BLOCK',
-    'REGISTERED_OBSERVER_BEING_DEALLOCATED',
-    'ASSIGN_POINTER_WARNING',
-    'GLOBAL_VARIABLE_INITIALIZED_WITH_FUNCTION_OR_METHOD_CALL',
-    'QUANDARY_TAINT_ERROR',
-    # TODO (t11307776): Turn this back on once some of the FP issues are fixed
-    'UNSAFE_GUARDED_BY_ACCESS',
-    'MUTABLE_LOCAL_VARIABLE_IN_COMPONENT_FILE',
-    'COMPONENT_FACTORY_FUNCTION',
-    'COMPONENT_INITIALIZER_WITH_SIDE_EFFECTS',
-    'COMPONENT_WITH_MULTIPLE_FACTORY_METHODS',
-    'COMPONENT_WITH_UNCONVENTIONAL_SUPERCLASS',
-]
-
-NULL_STYLE_ISSUE_TYPES = [
-    'IVAR_NOT_NULL_CHECKED',
-    'NULL_DEREFERENCE',
-    'PARAMETER_NOT_NULL_CHECKED',
-    'PREMATURE_NIL_TERMINATION_ARGUMENT',
-]
-
 # indices in rows of csv reports
-CSV_INDEX_CLASS = 0
-CSV_INDEX_KIND = 1
 CSV_INDEX_TYPE = 2
-CSV_INDEX_QUALIFIER = 3
-CSV_INDEX_SEVERITY = 4
-CSV_INDEX_LINE = 5
-CSV_INDEX_PROCEDURE = 6
-CSV_INDEX_PROCEDURE_ID = 7
-CSV_INDEX_FILENAME = 8
-CSV_INDEX_TRACE = 9
-CSV_INDEX_KEY = 10
-CSV_INDEX_QUALIFIER_TAGS = 11
-CSV_INDEX_HASH = 12
-CSV_INDEX_BUG_ID = 13
-CSV_INDEX_ALWAYS_REPORT = 14
-CSV_INDEX_ADVICE = 15

 # field names in rows of json reports
 JSON_INDEX_DOTTY = 'dotty'
@ -110,66 +53,18 @@ JSON_INDEX_PROCEDURE = 'procedure'
 JSON_INDEX_PROCEDURE_ID = 'procedure_id'
 JSON_INDEX_QUALIFIER = 'qualifier'
 JSON_INDEX_QUALIFIER_TAGS = 'qualifier_tags'
-JSON_INDEX_SEVERITY = 'file'
 JSON_INDEX_TYPE = 'bug_type'
 JSON_INDEX_TRACE = 'bug_trace'
 JSON_INDEX_TRACE_LEVEL = 'level'
 JSON_INDEX_TRACE_FILENAME = 'filename'
 JSON_INDEX_TRACE_LINE = 'line_number'
 JSON_INDEX_TRACE_DESCRIPTION = 'description'
-JSON_INDEX_TRACE_NODE_TAGS = 'node_tags'
-JSON_INDEX_TRACE_NODE_TAGS_TAG = 'tags'
-JSON_INDEX_TRACE_NODE_TAGS_VALUE = 'value'
 JSON_INDEX_VISIBILITY = 'visibility'


-QUALIFIER_TAGS = 'qualifier_tags'
-BUCKET_TAGS = 'bucket'
 ISSUE_TYPES_URL = 'http://fbinfer.com/docs/infer-issue-types.html#'


-def clean_csv(args, csv_report):
-    collected_rows = []
-    with open(csv_report, 'r') as file_in:
-        reader = csv.reader(file_in)
-        rows = [row for row in reader]
-        if len(rows) <= 1:
-            return rows
-        else:
-            for row in rows[1:]:
-                filename = row[CSV_INDEX_FILENAME]
-                if os.path.isfile(filename):
-                    if args.no_filtering \
-                       or _should_report_csv(args.analyzer, row):
-                        collected_rows.append(row)
-            collected_rows.sort(key=operator.itemgetter(CSV_INDEX_FILENAME,
-                                                        CSV_INDEX_LINE))
-            collected_rows = [rows[0]] + collected_rows
-    temporary_file = tempfile.mktemp()
-    with open(temporary_file, 'w') as file_out:
-        writer = csv.writer(file_out)
-        writer.writerows(collected_rows)
-        file_out.flush()
-        shutil.move(temporary_file, csv_report)
-
-
-def clean_json(args, json_report):
-    rows = utils.load_json_from_path(json_report)
-
-    def is_clean(row):
-        filename = row[JSON_INDEX_FILENAME]
-        return (os.path.isfile(filename) and
-                (args.no_filtering or
-                 _should_report_json(args.analyzer, row)))
-
-    rows = filter(is_clean, rows)
-    rows.sort(key=operator.itemgetter(JSON_INDEX_FILENAME,
-                                      JSON_INDEX_LINE))
-    temporary_file = tempfile.mktemp()
-    utils.dump_json_to_path(rows, temporary_file)
-    shutil.move(temporary_file, json_report)
-
-
 def _text_of_infer_loc(loc):
    return ' ({}:{}:{}-{}:)'.format(
        loc[JSON_INDEX_ISL_FILE],
@ -352,62 +247,3 @@ def _sort_and_uniq_rows(l):
    groups = itertools.groupby(l, key)
    # guaranteed to be at least one element in each group
    return map(lambda (keys, dups): dups.next(), groups)
-
-
-def _should_report(analyzer, error_kind, error_type, error_bucket):
-    analyzers_whitelist = [
-        config.ANALYZER_ERADICATE,
-        config.ANALYZER_CHECKERS,
-        config.ANALYZER_TRACING,
-    ]
-    error_kinds = [ISSUE_KIND_ERROR, ISSUE_KIND_WARNING, ISSUE_KIND_ADVICE]
-    null_style_buckets = ['B1', 'B2']
-
-    if analyzer in analyzers_whitelist:
-        return True
-
-    if error_kind not in error_kinds:
-        return False
-
-    if not error_type:
-        return False
-
-    if error_type in NULL_STYLE_ISSUE_TYPES:
-        return error_bucket in null_style_buckets
-
-    return error_type in ISSUE_TYPES
-
-
-def _should_report_csv(analyzer, row):
-    error_kind = row[CSV_INDEX_KIND]
-    error_type = row[CSV_INDEX_TYPE]
-    error_bucket = ''  # can be updated later once we extract it from qualifier
-
-    try:
-        qualifier_xml = ET.fromstring(row[CSV_INDEX_QUALIFIER_TAGS])
-        if qualifier_xml.tag == QUALIFIER_TAGS:
-            bucket = qualifier_xml.find(BUCKET_TAGS)
-            if bucket is not None:
-                error_bucket = bucket.text
-    except ET.ParseError:
-        pass  # this will skip any invalid xmls
-
-    return _should_report(analyzer, error_kind, error_type, error_bucket)
-
-
-def _should_report_json(analyzer, row):
-    error_kind = row[JSON_INDEX_KIND]
-    error_type = row[JSON_INDEX_TYPE]
-    error_bucket = ''  # can be updated later once we extract it from qualifier
-
-    for qual_tag in row[QUALIFIER_TAGS]:
-        if qual_tag['tag'] == BUCKET_TAGS:
-            error_bucket = qual_tag['value']
-            break
-
-    return _should_report(analyzer, error_kind, error_type, error_bucket)
-
-
-def _print_and_write(file_out, message):
-    utils.stdout(message)
-    file_out.write(utils.encode(message + '\n'))
--- a/infer/src/IR/Localise.ml
+++ b/infer/src/IR/Localise.ml
@ -69,6 +69,7 @@ let pointer_size_mismatch = "POINTER_SIZE_MISMATCH"
 let precondition_not_found = "PRECONDITION_NOT_FOUND"
 let precondition_not_met = "PRECONDITION_NOT_MET"
 let premature_nil_termination = "PREMATURE_NIL_TERMINATION_ARGUMENT"
+let quandary_taint_error = "QUANDARY_TAINT_ERROR"
 let registered_observer_being_deallocated = "REGISTERED_OBSERVER_BEING_DEALLOCATED"
 let resource_leak = "RESOURCE_LEAK"
 let retain_cycle = "RETAIN_CYCLE"
--- a/infer/src/IR/Localise.mli
+++ b/infer/src/IR/Localise.mli
@ -65,6 +65,7 @@ val pointer_size_mismatch : t
 val precondition_not_found : t
 val precondition_not_met : t
 val premature_nil_termination : t
+val quandary_taint_error : t
 val registered_observer_being_deallocated : t
 val retain_cycle : t
 val resource_leak : t
--- a/infer/src/Makefile
+++ b/infer/src/Makefile
@ -161,8 +161,7 @@ INFER_BASE_TARGETS = \
  $(INFERPRINT_MAIN).native \
  $(INFERUNIT_MAIN).native \
  $(CHECKCOPYRIGHT_MAIN).native \
-  $(STATSAGGREGATOR_MAIN).native \
-  $(INFERUNIT_MAIN).native
+  $(STATSAGGREGATOR_MAIN).native

 INFER_ALL_TARGETS = $(INFER_BASE_TARGETS) \
  $(INFERJAVA_MAIN).native \
--- a/infer/src/backend/InferPrint.re
+++ b/infer/src/backend/InferPrint.re
@ -313,6 +313,92 @@ let module ProcsXml = {
  let pp_procs_close fmt () => Io_infer.Xml.pp_close fmt "procedures";
 };

+let should_report (issue_kind: Exceptions.err_kind) issue_type error_desc =>
+  if (not Config.filtering) {
+    true
+  } else {
+    let analyzer_is_whitelisted =
+      switch Config.analyzer {
+      | Some (Checkers | Eradicate | Tracing) => true
+      | None
+      | Some (Capture | Compile | Crashcontext | Infer | Linters | Quandary) => false
+      };
+    if analyzer_is_whitelisted {
+      true
+    } else {
+      let issue_kind_is_blacklisted =
+        switch issue_kind {
+        | Kinfo => true
+        | Kerror
+        | Kwarning
+        | Kadvice => false
+        };
+      if issue_kind_is_blacklisted {
+        false
+      } else {
+        let issue_type_is_null_deref = {
+          let null_deref_issue_types =
+            Localise.[
+              field_not_null_checked,
+              null_dereference,
+              parameter_not_null_checked,
+              premature_nil_termination
+            ];
+          IList.mem Localise.equal issue_type null_deref_issue_types
+        };
+        if issue_type_is_null_deref {
+          let issue_bucket_is_high = {
+            let issue_bucket = Localise.error_desc_get_bucket error_desc;
+            let high_buckets = Localise.BucketLevel.[b1, b2];
+            let eq o y =>
+              switch (o, y) {
+              | (None, _) => false
+              | (Some x, y) => string_equal x y
+              };
+            IList.mem eq issue_bucket high_buckets
+          };
+          issue_bucket_is_high
+        } else {
+          let issue_type_is_reportable = {
+            let reportable_issue_types =
+              Localise.[
+                Localise.from_string Config.default_failure_name,
+                assign_pointer_warning,
+                bad_pointer_comparison,
+                component_factory_function,
+                component_initializer_with_side_effects,
+                component_with_multiple_factory_methods,
+                component_with_unconventional_superclass,
+                context_leak,
+                cxx_reference_captured_in_objc_block,
+                direct_atomic_property_access,
+                empty_vector_access,
+                global_variable_initialized_with_function_or_method_call,
+                memory_leak,
+                mutable_local_variable_in_component_file,
+                quandary_taint_error,
+                registered_observer_being_deallocated,
+                resource_leak,
+                retain_cycle,
+                strong_delegate_warning,
+                tainted_value_reaching_sensitive_function,
+                unsafe_guarded_by_access
+              ];
+            IList.mem Localise.equal issue_type reportable_issue_types
+          };
+          issue_type_is_reportable
+        }
+      }
+    }
+  };
+
+let is_file source_file =>
+  switch (Unix.stat (DB.source_file_to_string source_file)) {
+  | {st_kind: S_REG | S_LNK} => true
+  | _ => false
+  | exception Unix.Unix_error _ => false
+  };
+
 let module IssuesCsv = {
  let csv_issues_id = ref 0;
  let pp_header fmt () =>
@ -345,7 +431,11 @@ let module IssuesCsv = {
        | Some proc_loc => proc_loc.Location.file
        | None => loc.Location.file
        };
-      if (in_footprint && error_filter source_file error_desc error_name) {
+      if (
+        in_footprint &&
+        error_filter source_file error_desc error_name &&
+        should_report ekind error_name error_desc && is_file source_file
+      ) {
        let err_desc_string = error_desc_to_csv_string error_desc;
        let err_advice_string = error_advice_to_csv_string error_desc;
        let qualifier_tag_xml = {
@ -441,7 +531,9 @@ let module IssuesJson = {
      let file = DB.source_file_to_string source_file;
      let file_opt = make_cpp_models_path_relative file;
      if (
-        in_footprint && error_filter source_file error_desc error_name && Option.is_some file_opt
+        in_footprint &&
+        error_filter source_file error_desc error_name &&
+        Option.is_some file_opt && should_report ekind error_name error_desc && is_file source_file
      ) {
        let kind = Exceptions.err_kind_string ekind;
        let bug_type = Localise.to_string error_name;
--- a/infer/src/base/CommandLineOption.ml
+++ b/infer/src/base/CommandLineOption.ml
@ -295,9 +295,10 @@ let mk_bool ?(deprecated_no=[]) ?(default=false) ?(f=fun b -> b)
  var

 let mk_bool_group ?(deprecated_no=[]) ?(default=false)
-    ?(deprecated=[]) ~long ?short ?exes ?(meta="") doc children =
+    ?(deprecated=[]) ~long ?short ?exes ?(meta="") doc children no_children =
  let f b =
    IList.iter (fun child -> child := b) children ;
+    IList.iter (fun child -> child := not b) no_children ;
    b
  in
  mk_bool ~deprecated ~deprecated_no ~default ~long ?short ~f ?exes ~meta doc
--- a/infer/src/base/CommandLineOption.mli
+++ b/infer/src/base/CommandLineOption.mli
@ -55,9 +55,11 @@ val mk_option :
    either "Activates:" or "Deactivates:", so should be phrased accordingly. *)
 val mk_bool : ?deprecated_no:string list ->  ?default:bool -> ?f:(bool -> bool) -> bool ref t

-(** [mk_bool_group children] behaves as [mk_bool] with the addition that all the [children] are also
-    set. A child can be unset by including "--no-child" later in the arguments. *)
-val mk_bool_group : ?deprecated_no:string list -> ?default:bool -> (bool ref list -> bool ref) t
+(** [mk_bool_group children not_children] behaves as [mk_bool] with the addition that all the
+    [children] are also set and the [no_children] are unset. A child can be unset by including
+    "--no-child" later in the arguments. *)
+val mk_bool_group :
+  ?deprecated_no:string list -> ?default:bool -> (bool ref list -> bool ref list -> bool ref) t

 val mk_int : default:int -> int ref t

--- a/infer/src/base/Config.ml
+++ b/infer/src/base/Config.ml
@ -658,19 +658,19 @@ and checkers, crashcontext, eradicate, quandary =
  let crashcontext =
    CLOpt.mk_bool_group ~deprecated:["crashcontext"] ~long:"crashcontext"
      ""
-      [checkers]
+      [checkers] []
  in
  (* Activate the eradicate checker for java annotations (also sets --checkers) *)
  let eradicate =
    CLOpt.mk_bool_group ~deprecated:["eradicate"] ~long:"eradicate"
      ""
-      [checkers]
+      [checkers] []
  in
  (* Activate the quandary taint analysis *)
  let quandary =
    CLOpt.mk_bool_group ~deprecated:["quandary"] ~long:"quandary"
      ""
-      [checkers]
+      [checkers] []
  in
  (checkers, crashcontext, eradicate, quandary)

@ -730,11 +730,29 @@ and cxx_experimental =
    ~exes:CLOpt.[Clang]
    "Analyze C++ methods, still experimental"

-and debug, print_types, write_dotty =
-  let print_types =
+and (
+  debug,
+  debug_exceptions,
+  filtering,
+  print_types,
+  reports_include_ml_loc,
+  write_dotty
+) =
+  let filtering =
+    CLOpt.mk_bool ~long:"filtering" ~short:"f" ~default:true
+      ~exes:CLOpt.[Toplevel]
+      "Do not show the results from experimental checks (note: some of them may contain many false \
+       alarms)"
+
+  and print_types =
    CLOpt.mk_bool ~deprecated:["print_types"] ~long:"print-types"
      ~default:(current_exe = CLOpt.Clang)
      "Print types in symbolic heaps"
+
+  and reports_include_ml_loc =
+    CLOpt.mk_bool ~deprecated:["with_infer_src_loc"] ~long:"reports-include-ml-loc"
+      "Include the location in the Infer source code from where reports are generated"
+
  and write_dotty =
    CLOpt.mk_bool ~deprecated:["dotty"] ~long:"write-dotty"
      "Produce dotty files for specs in the results directory"
@ -742,16 +760,25 @@ and debug, print_types, write_dotty =
  let debug =
    CLOpt.mk_bool_group ~deprecated:["debug"] ~long:"debug" ~short:"g"
      ~exes:CLOpt.[Analyze]
-      "Debug mode (also sets --print-types and --write-dotty)"
-      [print_types; write_dotty]
-  in
-  (debug, print_types, write_dotty)
+      "Debug mode (also sets --no-filtering, --print-types, --reports-include-ml-loc, \
+       --write-dotty)"
+      [print_types; reports_include_ml_loc; write_dotty]
+      [filtering]

  and debug_exceptions =
-  CLOpt.mk_bool ~long:"debug-exceptions"
-    ~exes:CLOpt.[Analyze]
-    "Generate lightweight debugging information: just print the internal exceptions during analysis"
-
+    CLOpt.mk_bool_group ~long:"debug-exceptions"
+      "Generate lightweight debugging information: just print the internal exceptions during \
+       analysis (also sets --no-filtering, --reports-include-ml-loc)"
+      [reports_include_ml_loc]
+      [filtering]
+  in (
+    debug,
+    debug_exceptions,
+    filtering,
+    print_types,
+    reports_include_ml_loc,
+    write_dotty
+  )
 and dependencies =
  CLOpt.mk_bool ~deprecated:["dependencies"] ~long:"dependencies"
    ~exes:CLOpt.[Java]
@ -847,12 +874,6 @@ and filter_paths =
  CLOpt.mk_bool ~long:"filter-paths" ~default:true
    "Filters specified in .inferconfig"

-and filtering =
-  CLOpt.mk_bool ~long:"filtering" ~short:"f" ~default:true
-    ~exes:CLOpt.[Toplevel]
-    "Do not show the results from experimental checks (note: some of them may contain many false \
-     alarms)"
-
 and flavors =
  CLOpt.mk_bool ~deprecated:["-use-flavors"] ~long:"flavors"
    ~exes:CLOpt.[Toplevel]
@ -1058,10 +1079,6 @@ and report_custom_error =
  CLOpt.mk_bool ~long:"report-custom-error"
    ""

-and reports_include_ml_loc =
-  CLOpt.mk_bool ~deprecated:["with_infer_src_loc"] ~long:"reports-include-ml-loc"
-    "Include the location in the Infer source code from where reports are generated"
-
 and results_dir =
  CLOpt.mk_path ~deprecated:["results_dir"; "-out"] ~long:"results-dir" ~short:"o"
    ~default:(init_work_dir // "infer-out")
--- a/infer/src/quandary/CppTrace.ml
+++ b/infer/src/quandary/CppTrace.ml
@ -196,7 +196,7 @@ include
          fmt
          "Error: %a -> %a via %a"
          Source.pp source Sink.pp sink Passthrough.Set.pp passthroughs in
-      let msg = "QUANDARY_TAINT_ERROR" in
+      let msg = Localise.to_string Localise.quandary_taint_error in
      let description = pp_to_string pp_error () in
      Exceptions.Checkers (msg, Localise.verbatim_desc description)

--- a/infer/src/quandary/JavaTrace.ml
+++ b/infer/src/quandary/JavaTrace.ml
@ -228,7 +228,7 @@ include
          fmt
          "Error: %a -> %a via %a"
          Source.pp source Sink.pp sink Passthrough.Set.pp passthroughs in
-      let msg = "QUANDARY_TAINT_ERROR" in
+      let msg = Localise.to_string Localise.quandary_taint_error in
      let description = pp_to_string pp_error () in
      Exceptions.Checkers (msg, Localise.verbatim_desc description)

--- a/infer/tests/build_systems/expected_outputs/locale_report.json
+++ b/infer/tests/build_systems/expected_outputs/locale_report.json
@ -2,6 +2,6 @@
  {
    "bug_type": "NULL_DEREFERENCE",
    "file": "utf8_in_function_names.c",
-    "procedure": "test_\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+    "procedure": "test_\uc131\uacf5"
  }
 ]