write filenames as ascii only

Summary:public
In Python 2, `shutil.rmtree()`, `os.walk()`, `os.path.join()`, etc. are not
happy when the locale cannot decode the filenames they have to deal with.
Decrease the likelihood of this happening by making the file names generated by
infer ascii-only.

Also ignore character decoding errors optimistically when reading the json
report file.

Add tests that we are able to run the analysis and report the bug on a function
with a utf8 name, and that we are able to remove the previous results
directory.

closes #287

Reviewed By: cristianoc

Differential Revision: D3058858

fb-gh-sync-id: b88cd35
shipit-source-id: b88cd35
master
Jules Villard 9 years ago committed by Facebook Github Bot 8
parent 588cdcde42
commit d95ed8e9a7

@ -87,8 +87,9 @@ def get_cmd_in_bin_dir(binary_name):
return os.path.join(config.BIN_DIRECTORY, binary_name) return os.path.join(config.BIN_DIRECTORY, binary_name)
def load_json_from_path(path): def load_json_from_path(path, errors='replace'):
with codecs.open(path, 'r', encoding=config.LOCALE) as file_in: with codecs.open(path, 'r',
encoding=config.LOCALE, errors=errors) as file_in:
return json.load(file_in, encoding=config.LOCALE) return json.load(file_in, encoding=config.LOCALE)

@ -56,3 +56,11 @@ let escape_path s =
then Some "_" then Some "_"
else None in else None in
escape_map map s escape_map map s
(* Python 2 sucks at utf8 so do not write unicode file names to disk
as Python may need to see them *)
let escape_filename s =
let map = function
| c when Char.code c > 127 -> Some "?" (* non-ascii character: escape *)
| _ -> None in
escape_map map s

@ -24,3 +24,6 @@ val escape_path : string -> string
(** escape a string to be used in an xml file *) (** escape a string to be used in an xml file *)
val escape_xml : string -> string val escape_xml : string -> string
(** escape a string to be used as a file name *)
val escape_filename : string -> string

@ -449,8 +449,8 @@ let to_simplified_string ?(withclass = false) p =
"block" "block"
(** Convert a proc name to a filename *) (** Convert a proc name to a filename *)
let to_filename (pn : proc_name) = let to_filename proc_name =
string_append_crc_cutoff (to_unique_id pn) Escape.escape_filename @@ string_append_crc_cutoff @@ to_unique_id proc_name
(** Pretty print a proc name *) (** Pretty print a proc name *)
let pp f pn = let pp f pn =

@ -51,9 +51,10 @@ REPORT_FIELDS = [
issues.JSON_INDEX_TYPE, issues.JSON_INDEX_TYPE,
] ]
CODETOANALYZE_DIR = os.path.join(SCRIPT_DIR, 'codetoanalyze')
EXPECTED_OUTPUTS_DIR = os.path.join(SCRIPT_DIR, 'expected_outputs') EXPECTED_OUTPUTS_DIR = os.path.join(SCRIPT_DIR, 'expected_outputs')
ALL_TESTS = ['ant', 'buck', 'gradle'] ALL_TESTS = ['ant', 'buck', 'gradle', 'make', 'locale']
to_test = ALL_TESTS to_test = ALL_TESTS
@ -94,10 +95,10 @@ def save_report(reports, filename):
separators=(',', ': '), sort_keys=True) separators=(',', ': '), sort_keys=True)
def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None): def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None, n=1):
os.chdir(root) os.chdir(root)
subprocess.check_call(clean_cmd) subprocess.check_call(clean_cmd, env=env)
temp_out_dir = tempfile.mkdtemp(suffix='_out', prefix='infer_') temp_out_dir = tempfile.mkdtemp(suffix='_out', prefix='infer_')
infer_cmd = ['infer', '-a', analyzer, '-o', temp_out_dir, '--'] + build_cmd infer_cmd = ['infer', '-a', analyzer, '-o', temp_out_dir, '--'] + build_cmd
@ -106,6 +107,7 @@ def run_analysis(root, clean_cmd, build_cmd, analyzer, env=None):
mode='w', mode='w',
suffix='.out', suffix='.out',
prefix='analysis_') as analysis_output: prefix='analysis_') as analysis_output:
for i in xrange(n):
subprocess.check_call(infer_cmd, stdout=analysis_output, env=env) subprocess.check_call(infer_cmd, stdout=analysis_output, env=env)
json_path = os.path.join(temp_out_dir, REPORT_JSON) json_path = os.path.join(temp_out_dir, REPORT_JSON)
@ -234,6 +236,42 @@ class BuildIntegrationTest(unittest.TestCase):
original = os.path.join(EXPECTED_OUTPUTS_DIR, 'buck_report.json') original = os.path.join(EXPECTED_OUTPUTS_DIR, 'buck_report.json')
do_test(errors, original) do_test(errors, original)
def test_make_integration(self):
if 'make' not in to_test:
print('\nSkipping make integration test')
return
print('\nRunning make integration test')
root = os.path.join(CODETOANALYZE_DIR, 'make')
errors = run_analysis(
root,
['make', 'clean'],
['make', 'all'],
INFER_EXECUTABLE)
original = os.path.join(EXPECTED_OUTPUTS_DIR, 'make_report.json')
do_test(errors, original)
def test_wonky_locale_integration(self):
if 'locale' not in to_test:
print('\nSkipping wonky locale integration test')
return
print('\nRunning wonky locale integration test')
root = os.path.join(CODETOANALYZE_DIR, 'make')
env = os.environ
env['LC_ALL'] = 'C'
# check that we are able to remove the previous results by
# running the analysis twice
errors = run_analysis(
root,
['true'],
['clang', '-c', 'utf8_in_function_names.c'],
INFER_EXECUTABLE,
env=env,
n=2)
original = os.path.join(EXPECTED_OUTPUTS_DIR, 'locale_report.json')
do_test(errors, original)
if __name__ == '__main__': if __name__ == '__main__':
# hackish capturing of the arguments after '--' # hackish capturing of the arguments after '--'

@ -0,0 +1,10 @@
SOURCES = $(shell ls *.c)
OBJECTS = $(SOURCES:.c=.o)
all: $(OBJECTS)
.c.o:
$(CC) -c $<
clean:
@rm -rf $(OBJECTS)

@ -0,0 +1,4 @@
int test_() {
int *x = 0;
return *x;
}

@ -0,0 +1,7 @@
[
{
"bug_type": "NULL_DEREFERENCE",
"file": "utf8_in_function_names.c",
"procedure": "test_\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
}
]

@ -0,0 +1,7 @@
[
{
"bug_type": "NULL_DEREFERENCE",
"file": "utf8_in_function_names.c",
"procedure": "test_\uc131\uacf5"
}
]
Loading…
Cancel
Save