From dc52ce8158c51e22e16c36997324f6b089d6b560 Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Tue, 23 Feb 2016 11:48:47 -0800 Subject: [PATCH] ignore errors as appropriate on input decode/output encode Summary:public This attempts to properly sanitise text input/output in the Python parts of infer. Do three things: - encode user input (coming from the command-line or reading files) - decode infer output - in both cases, we may be using the wrong encoding, eg: locale says we're in ascii, but the source code contains utf-8. In many cases, like error messages, it's safe to ignore these encoding mismatches. Also, since we `import __future__.unicode_literals`, it's safe to remove `u'` prefixes on many unicode literals. Reviewed By: martinoluca Differential Revision: D2960493 fb-gh-sync-id: 9812d7d shipit-source-id: 9812d7d --- infer/lib/python/infer | 14 +++- infer/lib/python/inferTraceBugs | 18 +++-- infer/lib/python/inferlib/analyze.py | 12 +-- infer/lib/python/inferlib/capture/buck.py | 11 ++- infer/lib/python/inferlib/capture/util.py | 10 ++- .../lib/python/inferlib/capture/xcodebuild.py | 2 +- infer/lib/python/inferlib/issues.py | 13 ++-- infer/lib/python/inferlib/source.py | 5 +- infer/lib/python/inferlib/utils.py | 78 +++++-------------- scripts/build_integration_tests.py | 13 +++- 10 files changed, 81 insertions(+), 95 deletions(-) diff --git a/infer/lib/python/infer b/infer/lib/python/infer index 817dbd245..216e7a832 100755 --- a/infer/lib/python/infer +++ b/infer/lib/python/infer @@ -1,5 +1,10 @@ #!/usr/bin/env python2.7 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + import argparse import imp import json @@ -66,8 +71,8 @@ def split_args_to_parse(): dd_index = \ sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv) cmd_raw = sys.argv[dd_index + 1:] - return (sys.argv[1:dd_index], - [arg.decode(config.LOCALE) for arg in cmd_raw]) + return (map(utils.decode, sys.argv[1:dd_index]), + map(utils.decode, cmd_raw)) def create_argparser(parents=[]): @@ -141,8 +146,9 @@ def main(): logging.info('Capture phase was successful') elif capture_module_name is not None: # There was a command, but it's not supported - print('Command "{cmd}" not recognised'.format( - cmd='' if capture_module_name is None else capture_module_name)) + utils.stdout('Command "{cmd}" not recognised' + .format(cmd='' if capture_module_name is None + else capture_module_name)) global_argparser.print_help() sys.exit(1) else: diff --git a/infer/lib/python/inferTraceBugs b/infer/lib/python/inferTraceBugs index 7ad0c2af7..fa93d782a 100755 --- a/infer/lib/python/inferTraceBugs +++ b/infer/lib/python/inferTraceBugs @@ -60,7 +60,7 @@ base_parser.add_argument('--html', def show_error_and_exit(err, show_help): - print(err) + print(utils.encode(err)) if show_help: print('') base_parser.print_help() @@ -144,7 +144,7 @@ class Selector(object): # msg = issues.text_of_report(report) \ .replace('\n', '\n%s' % ((n_length + 2) * ' ')) - print('%s. %s\n' % (str(n).rjust(n_length), msg)) + utils.stdout('%s. %s\n' % (str(n).rjust(n_length), msg)) n += 1 def prompt_report(self): @@ -246,7 +246,7 @@ def get_remote_source_template(): # these if project.endswith('.git'): project = project[:-len('.git')] - print('Detected GitHub project %s' % project) + utils.stdout('Detected GitHub project %s' % project) hash = subprocess.check_output( ['git', 'rev-parse', @@ -345,19 +345,21 @@ def generate_html_report(args, reports): for bug in sel: bug_trace_path = path_of_bug_number(traces_dir, i) with codecs.open(bug_trace_path, 'w', - encoding=config.LOCALE) as bug_trace_file: + encoding=config.LOCALE, + errors='xmlcharrefreplace') as bug_trace_file: bug_trace_file.write(html_bug_trace(args, bug, i)) i += 1 remote_source_template = get_remote_source_template() bug_list_path = os.path.join(html_dir, 'index.html') with codecs.open(bug_list_path, 'w', - encoding=config.LOCALE) as bug_list_file: + encoding=config.LOCALE, + errors='xmlcharrefreplace') as bug_list_file: bug_list_file.write(html_list_of_bugs(args, remote_source_template, sel)) - print('Saved html report in:\n%s' % bug_list_path) + utils.stdout('Saved html report in:\n%s' % bug_list_path) def main(): @@ -383,11 +385,11 @@ def main(): report = sel.prompt_report() max_level = sel.prompt_level() - print(issues.text_of_report(report)) + utils.stdout(issues.text_of_report(report)) tracer = Tracer(args, max_level) tracer.build_report(report) - print(tracer) + utils.stdout(tracer) if __name__ == '__main__': diff --git a/infer/lib/python/inferlib/analyze.py b/infer/lib/python/inferlib/analyze.py index 4b1eb701e..a098d2e86 100644 --- a/infer/lib/python/inferlib/analyze.py +++ b/infer/lib/python/inferlib/analyze.py @@ -36,8 +36,8 @@ def get_infer_version(): return subprocess.check_output([ utils.get_cmd_in_bin_dir(INFER_ANALYZE_BINARY), '-version']) except: - print("Failed to run {0} binary, exiting". - format(INFER_ANALYZE_BINARY)) + utils.stdout('Failed to run {0} binary, exiting' + .format(INFER_ANALYZE_BINARY)) sys.exit(os.EX_UNAVAILABLE) @@ -223,14 +223,14 @@ def clean(infer_out): def help_exit(message): - print(message) + utils.stdout(message) infer_parser.print_usage() exit(1) def run_command(cmd, debug_mode, javac_arguments, step, analyzer): if debug_mode: - print('\n{0}\n'.format(' '.join(cmd))) + utils.stdout('\n{0}\n'.format(' '.join(cmd))) try: return subprocess.check_call(cmd) except subprocess.CalledProcessError as e: @@ -297,7 +297,7 @@ class Infer: def clean_exit(self): if os.path.isdir(self.args.infer_out): - print('removing', self.args.infer_out) + utils.stdout('removing {}'.format(self.args.infer_out)) shutil.rmtree(self.args.infer_out) exit(os.EX_OK) @@ -603,7 +603,7 @@ class Infer: if self.javac.args.version: if self.args.buck: key = self.args.analyzer - print(utils.infer_key(key), file=sys.stderr) + utils.stderr(utils.infer_key(key), errors="strict") else: return self.javac.run() else: diff --git a/infer/lib/python/inferlib/capture/buck.py b/infer/lib/python/inferlib/capture/buck.py index 57a8eb9f6..ae72d7218 100644 --- a/infer/lib/python/inferlib/capture/buck.py +++ b/infer/lib/python/inferlib/capture/buck.py @@ -5,15 +5,20 @@ # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + import argparse import json import logging import os import subprocess import traceback -import util from inferlib import config, issues, utils, bucklib +from . import util MODULE_NAME = __name__ MODULE_DESCRIPTION = '''Run analysis of code built with a command like: @@ -143,8 +148,8 @@ class BuckAnalyzer: merged_results_path = os.path.join(self.args.infer_out, config.JSON_REPORT_FILENAME) utils.dump_json_to_path(all_results, merged_results_path) - print('Results saved in {results_path}'.format( - results_path=merged_results_path)) + utils.stdout('Results saved in {results_path}' + .format(results_path=merged_results_path)) return os.EX_OK def capture_without_flavors(self): diff --git a/infer/lib/python/inferlib/capture/util.py b/infer/lib/python/inferlib/capture/util.py index a0d645f62..04a05ac86 100644 --- a/infer/lib/python/inferlib/capture/util.py +++ b/infer/lib/python/inferlib/capture/util.py @@ -7,13 +7,18 @@ # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + import argparse import os import logging import subprocess import traceback -from inferlib import analyze +from inferlib import analyze, utils def create_infer_command(args, javac_arguments): @@ -40,7 +45,8 @@ def run_compilation_commands(cmds, clean_cmd): """ # TODO call it in parallel if len(cmds) == 0: - print('Nothing to compile. Try running `%s` first.' % clean_cmd) + utils.stdout('Nothing to compile. Try running `{}` first.' + .format(clean_cmd)) return os.EX_NOINPUT for cmd in cmds: if cmd.start() != os.EX_OK: diff --git a/infer/lib/python/inferlib/capture/xcodebuild.py b/infer/lib/python/inferlib/capture/xcodebuild.py index 70191c46f..673a740e0 100644 --- a/infer/lib/python/inferlib/capture/xcodebuild.py +++ b/infer/lib/python/inferlib/capture/xcodebuild.py @@ -75,5 +75,5 @@ class XcodebuildCapture: except subprocess.CalledProcessError as exc: if self.args.debug: traceback.print_exc() - print(exc.output) + utils.stdout(exc.output) return exc.returncode diff --git a/infer/lib/python/inferlib/issues.py b/infer/lib/python/inferlib/issues.py index 12097ed21..aa8926437 100644 --- a/infer/lib/python/inferlib/issues.py +++ b/infer/lib/python/inferlib/issues.py @@ -146,7 +146,7 @@ def clean_json(args, json_report): shutil.move(temporary_file, json_report) def _text_of_infer_loc(loc): - return u' (%s:%d:%d-%d:)' % ( + return ' ({}:{}:{}-{}:)'.format( loc[JSON_INDEX_ISL_FILE], loc[JSON_INDEX_ISL_LNUM], loc[JSON_INDEX_ISL_CNUM], @@ -163,7 +163,7 @@ def text_of_report(report): infer_loc = '' if JSON_INDEX_INFER_SOURCE_LOC in report: infer_loc = _text_of_infer_loc(report[JSON_INDEX_INFER_SOURCE_LOC]) - return u'%s:%d: %s: %s%s\n %s' % ( + return '%s:%d: %s: %s%s\n %s' % ( filename, line, kind.lower(), @@ -234,8 +234,9 @@ def print_and_save_errors(json_report, bugs_out): errors = utils.load_json_from_path(json_report) errors = filter(_is_user_visible, errors) text = _text_of_report_list(errors) - print(text.encode(config.LOCALE)) - with codecs.open(bugs_out, 'w', encoding=config.LOCALE) as file_out: + utils.stdout(text) + with codecs.open(bugs_out, 'w', + encoding=config.LOCALE, errors='replace') as file_out: file_out.write(text) @@ -312,5 +313,5 @@ def _should_report_json(analyzer, row): def _print_and_write(file_out, message): - print(message) - file_out.write(message + '\n') + utils.stdout(message) + file_out.write(utils.encode(message + '\n')) diff --git a/infer/lib/python/inferlib/source.py b/infer/lib/python/inferlib/source.py index f748b3fff..8f2ea27aa 100644 --- a/infer/lib/python/inferlib/source.py +++ b/infer/lib/python/inferlib/source.py @@ -78,14 +78,15 @@ def build_source_context(source_name, mode, report_line): n_length = len(str(end_line)) line_number = 1 s = '' - with codecs.open(source_name, 'r', encoding=config.LOCALE) as source_file: + with codecs.open(source_name, 'r', + encoding=config.LOCALE, errors="replace") as source_file: for line in source_file: if start_line <= line_number <= end_line: num = str(line_number).zfill(n_length) caret = ' ' if line_number == report_line: caret = '> ' - s += u'%s. %s%s' % (num, caret, line) + s += '%s. %s%s' % (num, caret, line) line_number += 1 return _syntax_highlighting(source_name, mode, s) diff --git a/infer/lib/python/inferlib/utils.py b/infer/lib/python/inferlib/utils.py index 5a3f72aa5..051015959 100644 --- a/infer/lib/python/inferlib/utils.py +++ b/infer/lib/python/inferlib/utils.py @@ -84,52 +84,13 @@ def elapsed_time(start_time): def error(msg): - print(msg, file=sys.stderr) + print(encode(msg), file=sys.stderr) def get_cmd_in_bin_dir(binary_name): return os.path.join(config.BIN_DIRECTORY, binary_name) -def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None): - with codecs.open(logfile, 'w', encoding=config.LOCALE) as log_filedesc: - if cmd: - log_filedesc.write(' '.join(cmd) + '\n') - if err is not None: - errors = str(err) - log_filedesc.write('\nSTDERR:\n') - log_filedesc.write(errors) - if out is not None: - output = str(out) - log_filedesc.write('\n\nSTDOUT:\n') - log_filedesc.write(output) - - -def save_failed_command( - infer_out, - cmd, - message, - prefix='failed_', - out=None, - err=None): - cmd_filename = tempfile.mktemp( - '_' + message + ".txt", - prefix, infer_out - ) - write_cmd_streams_to_file(cmd_filename, cmd=cmd, out=out, err=err) - logging.error('\n' + message + ' error saved in ' + cmd_filename) - - -def run_command(cmd, debug_mode, infer_out, message, env=os.environ): - if debug_mode: - print('\n{0}\n'.format(' '.join(cmd))) - try: - return subprocess.check_call(cmd, env=env) - except subprocess.CalledProcessError as e: - save_failed_command(infer_out, cmd, message) - raise e - - def load_json_from_path(path): with codecs.open(path, 'r', encoding=config.LOCALE) as file_in: return json.load(file_in, encoding=config.LOCALE) @@ -143,7 +104,8 @@ def dump_json_to_path( separators=None, encoding=config.LOCALE, # customized default=None, sort_keys=False, **kw): - with codecs.open(path, 'w', encoding=config.LOCALE) as file_out: + with codecs.open(path, 'w', + encoding=config.LOCALE, errors='replace') as file_out: json.dump(data, file_out, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, encoding=encoding, @@ -307,24 +269,6 @@ def uncompress_gzip_file(gzip_file, out_dir): uncompressed_fd.close() -def run_process(cmd, cwd=None, logfile=None): - # Input: - # - command to execute - # - current working directory to cd before running the cmd - # - logfile where to dump stdout/stderr - # Output: - # - exitcode of the executed process - p = subprocess.Popen( - cmd, - cwd=cwd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - (out, err) = p.communicate() - if logfile: - write_cmd_streams_to_file(logfile, cmd=cmd, out=out, err=err) - return p.returncode - - def invoke_function_with_callbacks( func, args, @@ -346,6 +290,22 @@ def get_plural(_str, count): return '%d %s' % (count, plural_str) +def decode(s, errors="replace"): + return s.decode(encoding=config.LOCALE, errors=errors) + + +def encode(u, errors="replace"): + return u.encode(encoding=config.LOCALE, errors=errors) + + +def stdout(s, errors="replace"): + print(encode(s, errors=errors)) + + +def stderr(s, errors="replace"): + print(encode(s, errors=errors), file=sys.stderr) + + class AbsolutePathAction(argparse.Action): """Convert a path from relative to absolute in the arg parser""" def __call__(self, parser, namespace, values, option_string=None): diff --git a/scripts/build_integration_tests.py b/scripts/build_integration_tests.py index 22ab3db96..b5cc121ef 100755 --- a/scripts/build_integration_tests.py +++ b/scripts/build_integration_tests.py @@ -6,6 +6,11 @@ # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + import json import os import shutil @@ -126,14 +131,14 @@ def missing_errors(errors, patterns): def check_results(errors, patterns): unexpected = unexpected_errors(errors, patterns) if unexpected != []: - print('\nInfer found the following unexpected errors:') + utils.stderr('\nInfer found the following unexpected errors:') for e in unexpected: - print('\t{}\n'.format(string_of_error(e))) + utils.stderr('\t{}\n'.format(string_of_error(e))) missing = missing_errors(errors, patterns) if missing != []: - print('\nInfer did not find the following errors:') + utils.stderr('\nInfer did not find the following errors:') for p in missing: - print('\t{}\n'.format(string_of_error(p))) + utils.stderr('\t{}\n'.format(string_of_error(p))) assert unexpected == [] assert missing == []