write filenames as ascii only

Summary:public In Python 2, `shutil.rmtree()`, `os.walk()`, `os.path.join()`, etc. are not happy when the locale cannot decode the filenames they have to deal with. Decrease the likelihood of this happening by making the file names generated by infer ascii-only. Also ignore character decoding errors optimistically when reading the json report file. Add tests that we are able to run the analysis and report the bug on a function with a utf8 name, and that we are able to remove the previous results directory. closes #287 Reviewed By: cristianoc Differential Revision: D3058858 fb-gh-sync-id: b88cd35 shipit-source-id: b88cd35
9 years ago · d95ed8e9a7
parent 588cdcde42
commit d95ed8e9a7
9 changed files with 86 additions and 8 deletions
--- a/infer/lib/python/inferlib/utils.py
+++ b/infer/lib/python/inferlib/utils.py
@ -87,8 +87,9 @@ def get_cmd_in_bin_dir(binary_name):
    return os.path.join(config.BIN_DIRECTORY, binary_name)


-def load_json_from_path(path):
-    with codecs.open(path, 'r', encoding=config.LOCALE) as file_in:
+def load_json_from_path(path, errors='replace'):
+    with codecs.open(path, 'r',
+                     encoding=config.LOCALE, errors=errors) as file_in:
        return json.load(file_in, encoding=config.LOCALE)


--- a/infer/src/backend/escape.ml
+++ b/infer/src/backend/escape.ml
@ -56,3 +56,11 @@ let escape_path s =
        then Some "_"
        else None in
  escape_map map s
+
+(* Python 2 sucks at utf8 so do not write unicode file names to disk
+   as Python may need to see them *)
+let escape_filename s =
+  let map = function
+    | c when Char.code c > 127 -> Some "?" (* non-ascii character: escape *)
+    | _ -> None in
+  escape_map map s
--- a/infer/src/backend/escape.mli
+++ b/infer/src/backend/escape.mli
@ -24,3 +24,6 @@ val escape_path : string -> string

 (** escape a string to be used in an xml file *)
 val escape_xml : string -> string
+
+(** escape a string to be used as a file name *)
+val escape_filename : string -> string
--- a/infer/src/backend/procname.ml
+++ b/infer/src/backend/procname.ml
@ -449,8 +449,8 @@ let to_simplified_string ?(withclass = false) p =
      "block"

 (** Convert a proc name to a filename *)
-let to_filename (pn : proc_name) =
-  string_append_crc_cutoff (to_unique_id pn)
+let to_filename proc_name =
+  Escape.escape_filename @@ string_append_crc_cutoff @@ to_unique_id proc_name

 (** Pretty print a proc name *)
 let pp f pn =
--- a/infer/tests/build_systems/build_integration_tests.py
+++ b/infer/tests/build_systems/build_integration_tests.py
@ -51,9 +51,10 @@ REPORT_FIELDS = [
    issues.JSON_INDEX_TYPE,
 ]

+CODETOANALYZE_DIR = os.path.join(SCRIPT_DIR, 'codetoanalyze')
 EXPECTED_OUTPUTS_DIR = os.path.join(SCRIPT_DIR, 'expected_outputs')

-ALL_TESTS = ['ant', 'buck', 'gradle']
+ALL_TESTS = ['ant', 'buck', 'gradle', 'make', 'locale']

 to_test = ALL_TESTS

@ -94,10 +95,10 @@ def save_report(reports, filename):
                            separators=(',', ': '), sort_keys=True)


-def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None):
+def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None, n=1):
    os.chdir(root)

-    subprocess.check_call(clean_cmd)
+    subprocess.check_call(clean_cmd, env=env)

    temp_out_dir = tempfile.mkdtemp(suffix='_out', prefix='infer_')
    infer_cmd = ['infer', '-a', analyzer, '-o', temp_out_dir, '--'] + build_cmd
@ -106,7 +107,8 @@ def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None):
            mode='w',
            suffix='.out',
            prefix='analysis_') as analysis_output:
-        subprocess.check_call(infer_cmd, stdout=analysis_output, env=env)
+        for i in xrange(n):
+            subprocess.check_call(infer_cmd, stdout=analysis_output, env=env)

    json_path = os.path.join(temp_out_dir, REPORT_JSON)
    found_errors = utils.load_json_from_path(json_path)
@ -234,6 +236,42 @@ class BuildIntegrationTest(unittest.TestCase):
        original = os.path.join(EXPECTED_OUTPUTS_DIR, 'buck_report.json')
        do_test(errors, original)

+    def test_make_integration(self):
+        if 'make' not in to_test:
+            print('\nSkipping make integration test')
+            return
+
+        print('\nRunning make integration test')
+        root = os.path.join(CODETOANALYZE_DIR, 'make')
+        errors = run_analysis(
+            root,
+            ['make', 'clean'],
+            ['make', 'all'],
+            INFER_EXECUTABLE)
+        original = os.path.join(EXPECTED_OUTPUTS_DIR, 'make_report.json')
+        do_test(errors, original)
+
+    def test_wonky_locale_integration(self):
+        if 'locale' not in to_test:
+            print('\nSkipping wonky locale integration test')
+            return
+
+        print('\nRunning wonky locale integration test')
+        root = os.path.join(CODETOANALYZE_DIR, 'make')
+        env = os.environ
+        env['LC_ALL'] = 'C'
+        # check that we are able to remove the previous results by
+        # running the analysis twice
+        errors = run_analysis(
+            root,
+            ['true'],
+            ['clang', '-c', 'utf8_in_function_names.c'],
+            INFER_EXECUTABLE,
+            env=env,
+            n=2)
+        original = os.path.join(EXPECTED_OUTPUTS_DIR, 'locale_report.json')
+        do_test(errors, original)
+

 if __name__ == '__main__':
    # hackish capturing of the arguments after '--'
--- a/infer/tests/build_systems/codetoanalyze/make/Makefile
+++ b/infer/tests/build_systems/codetoanalyze/make/Makefile
@ -0,0 +1,10 @@
+SOURCES = $(shell ls *.c)
+OBJECTS = $(SOURCES:.c=.o)
+
+all: $(OBJECTS)
+
+.c.o:
+	$(CC) -c $<
+
+clean:
+	@rm -rf $(OBJECTS)
--- a/infer/tests/build_systems/codetoanalyze/make/utf8_in_function_names.c
+++ b/infer/tests/build_systems/codetoanalyze/make/utf8_in_function_names.c
@ -0,0 +1,4 @@
+int test_성공() {
+  int *x = 0;
+  return *x;
+}
--- a/infer/tests/build_systems/expected_outputs/locale_report.json
+++ b/infer/tests/build_systems/expected_outputs/locale_report.json
@ -0,0 +1,7 @@
+[
+  {
+    "bug_type": "NULL_DEREFERENCE",
+    "file": "utf8_in_function_names.c",
+    "procedure": "test_\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+  }
+]
--- a/infer/tests/build_systems/expected_outputs/make_report.json
+++ b/infer/tests/build_systems/expected_outputs/make_report.json
@ -0,0 +1,7 @@
+[
+  {
+    "bug_type": "NULL_DEREFERENCE",
+    "file": "utf8_in_function_names.c",
+    "procedure": "test_\uc131\uacf5"
+  }
+]