ignore errors as appropriate on input decode/output encode

Summary:public
This attempts to properly sanitise text input/output in the Python parts of
infer. Do three things:
- encode user input (coming from the command-line or reading files)
- decode infer output
- in both cases, we may be using the wrong encoding, eg: locale says we're in
  ascii, but the source code contains utf-8. In many cases, like error
  messages, it's safe to ignore these encoding mismatches.

Also, since we `import __future__.unicode_literals`, it's safe to remove `u'`
prefixes on many unicode literals.

Reviewed By: martinoluca

Differential Revision: D2960493

fb-gh-sync-id: 9812d7d
shipit-source-id: 9812d7d
master
Jules Villard 9 years ago committed by facebook-github-bot-5
parent cd002e5c46
commit dc52ce8158

@ -1,5 +1,10 @@
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse import argparse
import imp import imp
import json import json
@ -66,8 +71,8 @@ def split_args_to_parse():
dd_index = \ dd_index = \
sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv) sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv)
cmd_raw = sys.argv[dd_index + 1:] cmd_raw = sys.argv[dd_index + 1:]
return (sys.argv[1:dd_index], return (map(utils.decode, sys.argv[1:dd_index]),
[arg.decode(config.LOCALE) for arg in cmd_raw]) map(utils.decode, cmd_raw))
def create_argparser(parents=[]): def create_argparser(parents=[]):
@ -141,8 +146,9 @@ def main():
logging.info('Capture phase was successful') logging.info('Capture phase was successful')
elif capture_module_name is not None: elif capture_module_name is not None:
# There was a command, but it's not supported # There was a command, but it's not supported
print('Command "{cmd}" not recognised'.format( utils.stdout('Command "{cmd}" not recognised'
cmd='' if capture_module_name is None else capture_module_name)) .format(cmd='' if capture_module_name is None
else capture_module_name))
global_argparser.print_help() global_argparser.print_help()
sys.exit(1) sys.exit(1)
else: else:

@ -60,7 +60,7 @@ base_parser.add_argument('--html',
def show_error_and_exit(err, show_help): def show_error_and_exit(err, show_help):
print(err) print(utils.encode(err))
if show_help: if show_help:
print('') print('')
base_parser.print_help() base_parser.print_help()
@ -144,7 +144,7 @@ class Selector(object):
# <second line of report goes here> # <second line of report goes here>
msg = issues.text_of_report(report) \ msg = issues.text_of_report(report) \
.replace('\n', '\n%s' % ((n_length + 2) * ' ')) .replace('\n', '\n%s' % ((n_length + 2) * ' '))
print('%s. %s\n' % (str(n).rjust(n_length), msg)) utils.stdout('%s. %s\n' % (str(n).rjust(n_length), msg))
n += 1 n += 1
def prompt_report(self): def prompt_report(self):
@ -246,7 +246,7 @@ def get_remote_source_template():
# these # these
if project.endswith('.git'): if project.endswith('.git'):
project = project[:-len('.git')] project = project[:-len('.git')]
print('Detected GitHub project %s' % project) utils.stdout('Detected GitHub project %s' % project)
hash = subprocess.check_output( hash = subprocess.check_output(
['git', ['git',
'rev-parse', 'rev-parse',
@ -345,19 +345,21 @@ def generate_html_report(args, reports):
for bug in sel: for bug in sel:
bug_trace_path = path_of_bug_number(traces_dir, i) bug_trace_path = path_of_bug_number(traces_dir, i)
with codecs.open(bug_trace_path, 'w', with codecs.open(bug_trace_path, 'w',
encoding=config.LOCALE) as bug_trace_file: encoding=config.LOCALE,
errors='xmlcharrefreplace') as bug_trace_file:
bug_trace_file.write(html_bug_trace(args, bug, i)) bug_trace_file.write(html_bug_trace(args, bug, i))
i += 1 i += 1
remote_source_template = get_remote_source_template() remote_source_template = get_remote_source_template()
bug_list_path = os.path.join(html_dir, 'index.html') bug_list_path = os.path.join(html_dir, 'index.html')
with codecs.open(bug_list_path, 'w', with codecs.open(bug_list_path, 'w',
encoding=config.LOCALE) as bug_list_file: encoding=config.LOCALE,
errors='xmlcharrefreplace') as bug_list_file:
bug_list_file.write(html_list_of_bugs(args, bug_list_file.write(html_list_of_bugs(args,
remote_source_template, remote_source_template,
sel)) sel))
print('Saved html report in:\n%s' % bug_list_path) utils.stdout('Saved html report in:\n%s' % bug_list_path)
def main(): def main():
@ -383,11 +385,11 @@ def main():
report = sel.prompt_report() report = sel.prompt_report()
max_level = sel.prompt_level() max_level = sel.prompt_level()
print(issues.text_of_report(report)) utils.stdout(issues.text_of_report(report))
tracer = Tracer(args, max_level) tracer = Tracer(args, max_level)
tracer.build_report(report) tracer.build_report(report)
print(tracer) utils.stdout(tracer)
if __name__ == '__main__': if __name__ == '__main__':

@ -36,8 +36,8 @@ def get_infer_version():
return subprocess.check_output([ return subprocess.check_output([
utils.get_cmd_in_bin_dir(INFER_ANALYZE_BINARY), '-version']) utils.get_cmd_in_bin_dir(INFER_ANALYZE_BINARY), '-version'])
except: except:
print("Failed to run {0} binary, exiting". utils.stdout('Failed to run {0} binary, exiting'
format(INFER_ANALYZE_BINARY)) .format(INFER_ANALYZE_BINARY))
sys.exit(os.EX_UNAVAILABLE) sys.exit(os.EX_UNAVAILABLE)
@ -223,14 +223,14 @@ def clean(infer_out):
def help_exit(message): def help_exit(message):
print(message) utils.stdout(message)
infer_parser.print_usage() infer_parser.print_usage()
exit(1) exit(1)
def run_command(cmd, debug_mode, javac_arguments, step, analyzer): def run_command(cmd, debug_mode, javac_arguments, step, analyzer):
if debug_mode: if debug_mode:
print('\n{0}\n'.format(' '.join(cmd))) utils.stdout('\n{0}\n'.format(' '.join(cmd)))
try: try:
return subprocess.check_call(cmd) return subprocess.check_call(cmd)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
@ -297,7 +297,7 @@ class Infer:
def clean_exit(self): def clean_exit(self):
if os.path.isdir(self.args.infer_out): if os.path.isdir(self.args.infer_out):
print('removing', self.args.infer_out) utils.stdout('removing {}'.format(self.args.infer_out))
shutil.rmtree(self.args.infer_out) shutil.rmtree(self.args.infer_out)
exit(os.EX_OK) exit(os.EX_OK)
@ -603,7 +603,7 @@ class Infer:
if self.javac.args.version: if self.javac.args.version:
if self.args.buck: if self.args.buck:
key = self.args.analyzer key = self.args.analyzer
print(utils.infer_key(key), file=sys.stderr) utils.stderr(utils.infer_key(key), errors="strict")
else: else:
return self.javac.run() return self.javac.run()
else: else:

@ -5,15 +5,20 @@
# LICENSE file in the root directory of this source tree. An additional grant # LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory. # of patent rights can be found in the PATENTS file in the same directory.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse import argparse
import json import json
import logging import logging
import os import os
import subprocess import subprocess
import traceback import traceback
import util
from inferlib import config, issues, utils, bucklib from inferlib import config, issues, utils, bucklib
from . import util
MODULE_NAME = __name__ MODULE_NAME = __name__
MODULE_DESCRIPTION = '''Run analysis of code built with a command like: MODULE_DESCRIPTION = '''Run analysis of code built with a command like:
@ -143,8 +148,8 @@ class BuckAnalyzer:
merged_results_path = os.path.join(self.args.infer_out, merged_results_path = os.path.join(self.args.infer_out,
config.JSON_REPORT_FILENAME) config.JSON_REPORT_FILENAME)
utils.dump_json_to_path(all_results, merged_results_path) utils.dump_json_to_path(all_results, merged_results_path)
print('Results saved in {results_path}'.format( utils.stdout('Results saved in {results_path}'
results_path=merged_results_path)) .format(results_path=merged_results_path))
return os.EX_OK return os.EX_OK
def capture_without_flavors(self): def capture_without_flavors(self):

@ -7,13 +7,18 @@
# LICENSE file in the root directory of this source tree. An additional grant # LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory. # of patent rights can be found in the PATENTS file in the same directory.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse import argparse
import os import os
import logging import logging
import subprocess import subprocess
import traceback import traceback
from inferlib import analyze from inferlib import analyze, utils
def create_infer_command(args, javac_arguments): def create_infer_command(args, javac_arguments):
@ -40,7 +45,8 @@ def run_compilation_commands(cmds, clean_cmd):
""" """
# TODO call it in parallel # TODO call it in parallel
if len(cmds) == 0: if len(cmds) == 0:
print('Nothing to compile. Try running `%s` first.' % clean_cmd) utils.stdout('Nothing to compile. Try running `{}` first.'
.format(clean_cmd))
return os.EX_NOINPUT return os.EX_NOINPUT
for cmd in cmds: for cmd in cmds:
if cmd.start() != os.EX_OK: if cmd.start() != os.EX_OK:

@ -75,5 +75,5 @@ class XcodebuildCapture:
except subprocess.CalledProcessError as exc: except subprocess.CalledProcessError as exc:
if self.args.debug: if self.args.debug:
traceback.print_exc() traceback.print_exc()
print(exc.output) utils.stdout(exc.output)
return exc.returncode return exc.returncode

@ -146,7 +146,7 @@ def clean_json(args, json_report):
shutil.move(temporary_file, json_report) shutil.move(temporary_file, json_report)
def _text_of_infer_loc(loc): def _text_of_infer_loc(loc):
return u' (%s:%d:%d-%d:)' % ( return ' ({}:{}:{}-{}:)'.format(
loc[JSON_INDEX_ISL_FILE], loc[JSON_INDEX_ISL_FILE],
loc[JSON_INDEX_ISL_LNUM], loc[JSON_INDEX_ISL_LNUM],
loc[JSON_INDEX_ISL_CNUM], loc[JSON_INDEX_ISL_CNUM],
@ -163,7 +163,7 @@ def text_of_report(report):
infer_loc = '' infer_loc = ''
if JSON_INDEX_INFER_SOURCE_LOC in report: if JSON_INDEX_INFER_SOURCE_LOC in report:
infer_loc = _text_of_infer_loc(report[JSON_INDEX_INFER_SOURCE_LOC]) infer_loc = _text_of_infer_loc(report[JSON_INDEX_INFER_SOURCE_LOC])
return u'%s:%d: %s: %s%s\n %s' % ( return '%s:%d: %s: %s%s\n %s' % (
filename, filename,
line, line,
kind.lower(), kind.lower(),
@ -234,8 +234,9 @@ def print_and_save_errors(json_report, bugs_out):
errors = utils.load_json_from_path(json_report) errors = utils.load_json_from_path(json_report)
errors = filter(_is_user_visible, errors) errors = filter(_is_user_visible, errors)
text = _text_of_report_list(errors) text = _text_of_report_list(errors)
print(text.encode(config.LOCALE)) utils.stdout(text)
with codecs.open(bugs_out, 'w', encoding=config.LOCALE) as file_out: with codecs.open(bugs_out, 'w',
encoding=config.LOCALE, errors='replace') as file_out:
file_out.write(text) file_out.write(text)
@ -312,5 +313,5 @@ def _should_report_json(analyzer, row):
def _print_and_write(file_out, message): def _print_and_write(file_out, message):
print(message) utils.stdout(message)
file_out.write(message + '\n') file_out.write(utils.encode(message + '\n'))

@ -78,14 +78,15 @@ def build_source_context(source_name, mode, report_line):
n_length = len(str(end_line)) n_length = len(str(end_line))
line_number = 1 line_number = 1
s = '' s = ''
with codecs.open(source_name, 'r', encoding=config.LOCALE) as source_file: with codecs.open(source_name, 'r',
encoding=config.LOCALE, errors="replace") as source_file:
for line in source_file: for line in source_file:
if start_line <= line_number <= end_line: if start_line <= line_number <= end_line:
num = str(line_number).zfill(n_length) num = str(line_number).zfill(n_length)
caret = ' ' caret = ' '
if line_number == report_line: if line_number == report_line:
caret = '> ' caret = '> '
s += u'%s. %s%s' % (num, caret, line) s += '%s. %s%s' % (num, caret, line)
line_number += 1 line_number += 1
return _syntax_highlighting(source_name, mode, s) return _syntax_highlighting(source_name, mode, s)

@ -84,52 +84,13 @@ def elapsed_time(start_time):
def error(msg): def error(msg):
print(msg, file=sys.stderr) print(encode(msg), file=sys.stderr)
def get_cmd_in_bin_dir(binary_name): def get_cmd_in_bin_dir(binary_name):
return os.path.join(config.BIN_DIRECTORY, binary_name) return os.path.join(config.BIN_DIRECTORY, binary_name)
def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None):
with codecs.open(logfile, 'w', encoding=config.LOCALE) as log_filedesc:
if cmd:
log_filedesc.write(' '.join(cmd) + '\n')
if err is not None:
errors = str(err)
log_filedesc.write('\nSTDERR:\n')
log_filedesc.write(errors)
if out is not None:
output = str(out)
log_filedesc.write('\n\nSTDOUT:\n')
log_filedesc.write(output)
def save_failed_command(
infer_out,
cmd,
message,
prefix='failed_',
out=None,
err=None):
cmd_filename = tempfile.mktemp(
'_' + message + ".txt",
prefix, infer_out
)
write_cmd_streams_to_file(cmd_filename, cmd=cmd, out=out, err=err)
logging.error('\n' + message + ' error saved in ' + cmd_filename)
def run_command(cmd, debug_mode, infer_out, message, env=os.environ):
if debug_mode:
print('\n{0}\n'.format(' '.join(cmd)))
try:
return subprocess.check_call(cmd, env=env)
except subprocess.CalledProcessError as e:
save_failed_command(infer_out, cmd, message)
raise e
def load_json_from_path(path): def load_json_from_path(path):
with codecs.open(path, 'r', encoding=config.LOCALE) as file_in: with codecs.open(path, 'r', encoding=config.LOCALE) as file_in:
return json.load(file_in, encoding=config.LOCALE) return json.load(file_in, encoding=config.LOCALE)
@ -143,7 +104,8 @@ def dump_json_to_path(
separators=None, separators=None,
encoding=config.LOCALE, # customized encoding=config.LOCALE, # customized
default=None, sort_keys=False, **kw): default=None, sort_keys=False, **kw):
with codecs.open(path, 'w', encoding=config.LOCALE) as file_out: with codecs.open(path, 'w',
encoding=config.LOCALE, errors='replace') as file_out:
json.dump(data, file_out, skipkeys=skipkeys, ensure_ascii=ensure_ascii, json.dump(data, file_out, skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, cls=cls, check_circular=check_circular, allow_nan=allow_nan, cls=cls,
indent=indent, separators=separators, encoding=encoding, indent=indent, separators=separators, encoding=encoding,
@ -307,24 +269,6 @@ def uncompress_gzip_file(gzip_file, out_dir):
uncompressed_fd.close() uncompressed_fd.close()
def run_process(cmd, cwd=None, logfile=None):
# Input:
# - command to execute
# - current working directory to cd before running the cmd
# - logfile where to dump stdout/stderr
# Output:
# - exitcode of the executed process
p = subprocess.Popen(
cmd,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
(out, err) = p.communicate()
if logfile:
write_cmd_streams_to_file(logfile, cmd=cmd, out=out, err=err)
return p.returncode
def invoke_function_with_callbacks( def invoke_function_with_callbacks(
func, func,
args, args,
@ -346,6 +290,22 @@ def get_plural(_str, count):
return '%d %s' % (count, plural_str) return '%d %s' % (count, plural_str)
def decode(s, errors="replace"):
return s.decode(encoding=config.LOCALE, errors=errors)
def encode(u, errors="replace"):
return u.encode(encoding=config.LOCALE, errors=errors)
def stdout(s, errors="replace"):
print(encode(s, errors=errors))
def stderr(s, errors="replace"):
print(encode(s, errors=errors), file=sys.stderr)
class AbsolutePathAction(argparse.Action): class AbsolutePathAction(argparse.Action):
"""Convert a path from relative to absolute in the arg parser""" """Convert a path from relative to absolute in the arg parser"""
def __call__(self, parser, namespace, values, option_string=None): def __call__(self, parser, namespace, values, option_string=None):

@ -6,6 +6,11 @@
# LICENSE file in the root directory of this source tree. An additional grant # LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory. # of patent rights can be found in the PATENTS file in the same directory.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import json import json
import os import os
import shutil import shutil
@ -126,14 +131,14 @@ def missing_errors(errors, patterns):
def check_results(errors, patterns): def check_results(errors, patterns):
unexpected = unexpected_errors(errors, patterns) unexpected = unexpected_errors(errors, patterns)
if unexpected != []: if unexpected != []:
print('\nInfer found the following unexpected errors:') utils.stderr('\nInfer found the following unexpected errors:')
for e in unexpected: for e in unexpected:
print('\t{}\n'.format(string_of_error(e))) utils.stderr('\t{}\n'.format(string_of_error(e)))
missing = missing_errors(errors, patterns) missing = missing_errors(errors, patterns)
if missing != []: if missing != []:
print('\nInfer did not find the following errors:') utils.stderr('\nInfer did not find the following errors:')
for p in missing: for p in missing:
print('\t{}\n'.format(string_of_error(p))) utils.stderr('\t{}\n'.format(string_of_error(p)))
assert unexpected == [] assert unexpected == []
assert missing == [] assert missing == []

Loading…
Cancel
Save