ignore errors as appropriate on input decode/output encode

Summary:public This attempts to properly sanitise text input/output in the Python parts of infer. Do three things: - encode user input (coming from the command-line or reading files) - decode infer output - in both cases, we may be using the wrong encoding, eg: locale says we're in ascii, but the source code contains utf-8. In many cases, like error messages, it's safe to ignore these encoding mismatches. Also, since we `import __future__.unicode_literals`, it's safe to remove `u'` prefixes on many unicode literals. Reviewed By: martinoluca Differential Revision: D2960493 fb-gh-sync-id: 9812d7d shipit-source-id: 9812d7d
9 years ago · dc52ce8158
parent cd002e5c46
commit dc52ce8158
10 changed files with 81 additions and 95 deletions
--- a/infer/lib/python/infer
+++ b/infer/lib/python/infer
@ -1,5 +1,10 @@
 #!/usr/bin/env python2.7

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
 import argparse
 import imp
 import json
@ -66,8 +71,8 @@ def split_args_to_parse():
    dd_index = \
        sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv)
    cmd_raw = sys.argv[dd_index + 1:]
-    return (sys.argv[1:dd_index],
-            [arg.decode(config.LOCALE) for arg in cmd_raw])
+    return (map(utils.decode, sys.argv[1:dd_index]),
+            map(utils.decode, cmd_raw))


 def create_argparser(parents=[]):
@ -141,8 +146,9 @@ def main():
        logging.info('Capture phase was successful')
    elif capture_module_name is not None:
        # There was a command, but it's not supported
-        print('Command "{cmd}" not recognised'.format(
-            cmd='' if capture_module_name is None else capture_module_name))
+        utils.stdout('Command "{cmd}" not recognised'
+                     .format(cmd='' if capture_module_name is None
+                             else capture_module_name))
        global_argparser.print_help()
        sys.exit(1)
    else:
--- a/infer/lib/python/inferTraceBugs
+++ b/infer/lib/python/inferTraceBugs
@ -60,7 +60,7 @@ base_parser.add_argument('--html',


 def show_error_and_exit(err, show_help):
-    print(err)
+    print(utils.encode(err))
    if show_help:
        print('')
        base_parser.print_help()
@ -144,7 +144,7 @@ class Selector(object):
            #       <second line of report goes here>
            msg = issues.text_of_report(report) \
                        .replace('\n', '\n%s' % ((n_length + 2) * ' '))
-            print('%s. %s\n' % (str(n).rjust(n_length), msg))
+            utils.stdout('%s. %s\n' % (str(n).rjust(n_length), msg))
            n += 1

    def prompt_report(self):
@ -246,7 +246,7 @@ def get_remote_source_template():
            # these
            if project.endswith('.git'):
                project = project[:-len('.git')]
-            print('Detected GitHub project %s' % project)
+            utils.stdout('Detected GitHub project %s' % project)
            hash = subprocess.check_output(
                ['git',
                 'rev-parse',
@ -345,19 +345,21 @@ def generate_html_report(args, reports):
    for bug in sel:
        bug_trace_path = path_of_bug_number(traces_dir, i)
        with codecs.open(bug_trace_path, 'w',
-                         encoding=config.LOCALE) as bug_trace_file:
+                         encoding=config.LOCALE,
+                         errors='xmlcharrefreplace') as bug_trace_file:
            bug_trace_file.write(html_bug_trace(args, bug, i))
        i += 1

    remote_source_template = get_remote_source_template()
    bug_list_path = os.path.join(html_dir, 'index.html')
    with codecs.open(bug_list_path, 'w',
-                     encoding=config.LOCALE) as bug_list_file:
+                     encoding=config.LOCALE,
+                     errors='xmlcharrefreplace') as bug_list_file:
        bug_list_file.write(html_list_of_bugs(args,
                                              remote_source_template,
                                              sel))

-    print('Saved html report in:\n%s' % bug_list_path)
+    utils.stdout('Saved html report in:\n%s' % bug_list_path)


 def main():
@ -383,11 +385,11 @@ def main():
    report = sel.prompt_report()
    max_level = sel.prompt_level()

-    print(issues.text_of_report(report))
+    utils.stdout(issues.text_of_report(report))

    tracer = Tracer(args, max_level)
    tracer.build_report(report)
-    print(tracer)
+    utils.stdout(tracer)


 if __name__ == '__main__':
--- a/infer/lib/python/inferlib/analyze.py
+++ b/infer/lib/python/inferlib/analyze.py
@ -36,8 +36,8 @@ def get_infer_version():
        return subprocess.check_output([
            utils.get_cmd_in_bin_dir(INFER_ANALYZE_BINARY), '-version'])
    except:
-        print("Failed to run {0} binary, exiting".
-              format(INFER_ANALYZE_BINARY))
+        utils.stdout('Failed to run {0} binary, exiting'
+                     .format(INFER_ANALYZE_BINARY))
        sys.exit(os.EX_UNAVAILABLE)


@ -223,14 +223,14 @@ def clean(infer_out):


 def help_exit(message):
-    print(message)
+    utils.stdout(message)
    infer_parser.print_usage()
    exit(1)


 def run_command(cmd, debug_mode, javac_arguments, step, analyzer):
    if debug_mode:
-        print('\n{0}\n'.format(' '.join(cmd)))
+        utils.stdout('\n{0}\n'.format(' '.join(cmd)))
    try:
        return subprocess.check_call(cmd)
    except subprocess.CalledProcessError as e:
@ -297,7 +297,7 @@ class Infer:

    def clean_exit(self):
        if os.path.isdir(self.args.infer_out):
-            print('removing', self.args.infer_out)
+            utils.stdout('removing {}'.format(self.args.infer_out))
            shutil.rmtree(self.args.infer_out)
        exit(os.EX_OK)

@ -603,7 +603,7 @@ class Infer:
        if self.javac.args.version:
            if self.args.buck:
                key = self.args.analyzer
-                print(utils.infer_key(key), file=sys.stderr)
+                utils.stderr(utils.infer_key(key), errors="strict")
            else:
                return self.javac.run()
        else:
--- a/infer/lib/python/inferlib/capture/buck.py
+++ b/infer/lib/python/inferlib/capture/buck.py
@ -5,15 +5,20 @@
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
 import argparse
 import json
 import logging
 import os
 import subprocess
 import traceback
-import util

 from inferlib import config, issues, utils, bucklib
+from . import util

 MODULE_NAME = __name__
 MODULE_DESCRIPTION = '''Run analysis of code built with a command like:
@ -143,8 +148,8 @@ class BuckAnalyzer:
        merged_results_path = os.path.join(self.args.infer_out,
                                           config.JSON_REPORT_FILENAME)
        utils.dump_json_to_path(all_results, merged_results_path)
-        print('Results saved in {results_path}'.format(
-            results_path=merged_results_path))
+        utils.stdout('Results saved in {results_path}'
+                     .format(results_path=merged_results_path))
        return os.EX_OK

    def capture_without_flavors(self):
--- a/infer/lib/python/inferlib/capture/util.py
+++ b/infer/lib/python/inferlib/capture/util.py
@ -7,13 +7,18 @@
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
 import argparse
 import os
 import logging
 import subprocess
 import traceback

-from inferlib import analyze
+from inferlib import analyze, utils


 def create_infer_command(args, javac_arguments):
@ -40,7 +45,8 @@ def run_compilation_commands(cmds, clean_cmd):
    """
    #  TODO call it in parallel
    if len(cmds) == 0:
-        print('Nothing to compile. Try running `%s` first.' % clean_cmd)
+        utils.stdout('Nothing to compile. Try running `{}` first.'
+                     .format(clean_cmd))
        return os.EX_NOINPUT
    for cmd in cmds:
        if cmd.start() != os.EX_OK:
--- a/infer/lib/python/inferlib/capture/xcodebuild.py
+++ b/infer/lib/python/inferlib/capture/xcodebuild.py
@ -75,5 +75,5 @@ class XcodebuildCapture:
        except subprocess.CalledProcessError as exc:
            if self.args.debug:
                traceback.print_exc()
-            print(exc.output)
+            utils.stdout(exc.output)
            return exc.returncode
--- a/infer/lib/python/inferlib/issues.py
+++ b/infer/lib/python/inferlib/issues.py
@ -146,7 +146,7 @@ def clean_json(args, json_report):
    shutil.move(temporary_file, json_report)

 def _text_of_infer_loc(loc):
-    return u' (%s:%d:%d-%d:)' % (
+    return ' ({}:{}:{}-{}:)'.format(
        loc[JSON_INDEX_ISL_FILE],
        loc[JSON_INDEX_ISL_LNUM],
        loc[JSON_INDEX_ISL_CNUM],
@ -163,7 +163,7 @@ def text_of_report(report):
    infer_loc = ''
    if JSON_INDEX_INFER_SOURCE_LOC in report:
        infer_loc = _text_of_infer_loc(report[JSON_INDEX_INFER_SOURCE_LOC])
-    return u'%s:%d: %s: %s%s\n  %s' % (
+    return '%s:%d: %s: %s%s\n  %s' % (
        filename,
        line,
        kind.lower(),
@ -234,8 +234,9 @@ def print_and_save_errors(json_report, bugs_out):
    errors = utils.load_json_from_path(json_report)
    errors = filter(_is_user_visible, errors)
    text = _text_of_report_list(errors)
-    print(text.encode(config.LOCALE))
-    with codecs.open(bugs_out, 'w', encoding=config.LOCALE) as file_out:
+    utils.stdout(text)
+    with codecs.open(bugs_out, 'w',
+                     encoding=config.LOCALE, errors='replace') as file_out:
        file_out.write(text)


@ -312,5 +313,5 @@ def _should_report_json(analyzer, row):


 def _print_and_write(file_out, message):
-    print(message)
-    file_out.write(message + '\n')
+    utils.stdout(message)
+    file_out.write(utils.encode(message + '\n'))
--- a/infer/lib/python/inferlib/source.py
+++ b/infer/lib/python/inferlib/source.py
@ -78,14 +78,15 @@ def build_source_context(source_name, mode, report_line):
    n_length = len(str(end_line))
    line_number = 1
    s = ''
-    with codecs.open(source_name, 'r', encoding=config.LOCALE) as source_file:
+    with codecs.open(source_name, 'r',
+                     encoding=config.LOCALE, errors="replace") as source_file:
        for line in source_file:
            if start_line <= line_number <= end_line:
                num = str(line_number).zfill(n_length)
                caret = '  '
                if line_number == report_line:
                    caret = '> '
-                s += u'%s. %s%s' % (num, caret, line)
+                s += '%s. %s%s' % (num, caret, line)
            line_number += 1
    return _syntax_highlighting(source_name, mode, s)

--- a/infer/lib/python/inferlib/utils.py
+++ b/infer/lib/python/inferlib/utils.py
@ -84,52 +84,13 @@ def elapsed_time(start_time):


 def error(msg):
-    print(msg, file=sys.stderr)
+    print(encode(msg), file=sys.stderr)


 def get_cmd_in_bin_dir(binary_name):
    return os.path.join(config.BIN_DIRECTORY, binary_name)


-def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None):
-    with codecs.open(logfile, 'w', encoding=config.LOCALE) as log_filedesc:
-        if cmd:
-            log_filedesc.write(' '.join(cmd) + '\n')
-        if err is not None:
-            errors = str(err)
-            log_filedesc.write('\nSTDERR:\n')
-            log_filedesc.write(errors)
-        if out is not None:
-            output = str(out)
-            log_filedesc.write('\n\nSTDOUT:\n')
-            log_filedesc.write(output)
-
-
-def save_failed_command(
-        infer_out,
-        cmd,
-        message,
-        prefix='failed_',
-        out=None,
-        err=None):
-    cmd_filename = tempfile.mktemp(
-        '_' + message + ".txt",
-        prefix, infer_out
-    )
-    write_cmd_streams_to_file(cmd_filename, cmd=cmd, out=out, err=err)
-    logging.error('\n' + message + ' error saved in ' + cmd_filename)
-
-
-def run_command(cmd, debug_mode, infer_out, message, env=os.environ):
-    if debug_mode:
-        print('\n{0}\n'.format(' '.join(cmd)))
-    try:
-        return subprocess.check_call(cmd, env=env)
-    except subprocess.CalledProcessError as e:
-        save_failed_command(infer_out, cmd, message)
-        raise e
-
-
 def load_json_from_path(path):
    with codecs.open(path, 'r', encoding=config.LOCALE) as file_in:
        return json.load(file_in, encoding=config.LOCALE)
@ -143,7 +104,8 @@ def dump_json_to_path(
        separators=None,
        encoding=config.LOCALE,  # customized
        default=None, sort_keys=False, **kw):
-    with codecs.open(path, 'w', encoding=config.LOCALE) as file_out:
+    with codecs.open(path, 'w',
+                     encoding=config.LOCALE, errors='replace') as file_out:
        json.dump(data, file_out, skipkeys=skipkeys, ensure_ascii=ensure_ascii,
                  check_circular=check_circular, allow_nan=allow_nan, cls=cls,
                  indent=indent, separators=separators, encoding=encoding,
@ -307,24 +269,6 @@ def uncompress_gzip_file(gzip_file, out_dir):
            uncompressed_fd.close()


-def run_process(cmd, cwd=None, logfile=None):
-    # Input:
-    #    - command to execute
-    #    - current working directory to cd before running the cmd
-    #    - logfile where to dump stdout/stderr
-    # Output:
-    #    - exitcode of the executed process
-    p = subprocess.Popen(
-        cmd,
-        cwd=cwd,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-    (out, err) = p.communicate()
-    if logfile:
-        write_cmd_streams_to_file(logfile, cmd=cmd, out=out, err=err)
-    return p.returncode
-
-
 def invoke_function_with_callbacks(
        func,
        args,
@ -346,6 +290,22 @@ def get_plural(_str, count):
    return '%d %s' % (count, plural_str)


+def decode(s, errors="replace"):
+    return s.decode(encoding=config.LOCALE, errors=errors)
+
+
+def encode(u, errors="replace"):
+    return u.encode(encoding=config.LOCALE, errors=errors)
+
+
+def stdout(s, errors="replace"):
+    print(encode(s, errors=errors))
+
+
+def stderr(s, errors="replace"):
+    print(encode(s, errors=errors), file=sys.stderr)
+
+
 class AbsolutePathAction(argparse.Action):
    """Convert a path from relative to absolute in the arg parser"""
    def __call__(self, parser, namespace, values, option_string=None):
--- a/scripts/build_integration_tests.py
+++ b/scripts/build_integration_tests.py
@ -6,6 +6,11 @@
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
 import json
 import os
 import shutil
@ -126,14 +131,14 @@ def missing_errors(errors, patterns):
 def check_results(errors, patterns):
    unexpected = unexpected_errors(errors, patterns)
    if unexpected != []:
-        print('\nInfer found the following unexpected errors:')
+        utils.stderr('\nInfer found the following unexpected errors:')
        for e in unexpected:
-            print('\t{}\n'.format(string_of_error(e)))
+            utils.stderr('\t{}\n'.format(string_of_error(e)))
    missing = missing_errors(errors, patterns)
    if missing != []:
-        print('\nInfer did not find the following errors:')
+        utils.stderr('\nInfer did not find the following errors:')
        for p in missing:
-            print('\t{}\n'.format(string_of_error(p)))
+            utils.stderr('\t{}\n'.format(string_of_error(p)))
    assert unexpected == []
    assert missing == []