handle unicode in source code

Summary: public
Should address #175.

Reviewed By: akotulski

Differential Revision: D2641689

fb-gh-sync-id: e98da88
master
Jules Villard 9 years ago committed by facebook-github-bot-1
parent 0cd533f892
commit ae81d8d215

@ -338,7 +338,7 @@ def load_stats(opened_jar):
def load_csv_report(opened_jar): def load_csv_report(opened_jar):
try: try:
sio = io.StringIO(opened_jar.read(INFER_CSV_REPORT).decode()) sio = io.StringIO(opened_jar.read(INFER_CSV_REPORT).decode())
return list(csv.reader(sio)) return list(utils.locale_csv_reader(sio))
except KeyError as e: except KeyError as e:
raise NotFoundInJar raise NotFoundInJar

@ -68,7 +68,9 @@ def load_module(mod_name):
def split_args_to_parse(): def split_args_to_parse():
dd_index = \ dd_index = \
sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv) sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv)
return sys.argv[1:dd_index], sys.argv[dd_index + 1:] cmd_raw = sys.argv[dd_index + 1:]
return (sys.argv[1:dd_index],
[arg.decode(utils.LOCALE) for arg in cmd_raw])
def create_argparser(parents=[]): def create_argparser(parents=[]):
@ -168,7 +170,8 @@ def main():
bugs_filename = os.path.join(args.infer_out, bugs_filename = os.path.join(args.infer_out,
utils.JSON_REPORT_FILENAME) utils.JSON_REPORT_FILENAME)
try: try:
with open(bugs_filename) as bugs_file: with codecs.open(bugs_filename, 'r',
encoding=utils.LOCALE) as bugs_file:
bugs = json.load(bugs_file) bugs = json.load(bugs_file)
if len(bugs) > 0: if len(bugs) > 0:
sys.exit(analyze.BUG_FOUND_ERROR_CODE) sys.exit(analyze.BUG_FOUND_ERROR_CODE)

@ -355,13 +355,15 @@ def generate_html_report(args, reports):
i = 0 i = 0
for bug in sel: for bug in sel:
bug_trace_path = path_of_bug_number(traces_dir, i) bug_trace_path = path_of_bug_number(traces_dir, i)
with open(bug_trace_path, 'w') as bug_trace_file: with codecs.open(bug_trace_path, 'w',
encoding=utils.LOCALE) as bug_trace_file:
bug_trace_file.write(html_bug_trace(args, bug, i)) bug_trace_file.write(html_bug_trace(args, bug, i))
i += 1 i += 1
remote_source_template = get_remote_source_template() remote_source_template = get_remote_source_template()
bug_list_path = os.path.join(html_dir, 'index.html') bug_list_path = os.path.join(html_dir, 'index.html')
with open(bug_list_path, 'w') as bug_list_file: with codecs.open(bug_list_path, 'w',
encoding=utils.LOCALE) as bug_list_file:
bug_list_file.write(html_list_of_bugs(args, bug_list_file.write(html_list_of_bugs(args,
remote_source_template, remote_source_template,
sel)) sel))
@ -373,7 +375,8 @@ def main():
args = base_parser.parse_args() args = base_parser.parse_args()
report_filename = os.path.join(args.infer_out, utils.JSON_REPORT_FILENAME) report_filename = os.path.join(args.infer_out, utils.JSON_REPORT_FILENAME)
with open(report_filename) as report_file: with codecs.open(report_filename, 'r',
encoding=utils.LOCALE) as report_file:
reports = json.load(report_file) reports = json.load(report_file)
if args.html: if args.html:

@ -11,6 +11,7 @@ from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import argparse import argparse
import codecs
import csv import csv
import glob import glob
import json import json
@ -220,7 +221,6 @@ def create_results_dir(results_dir):
def clean(infer_out): def clean(infer_out):
directories = ['multicore', 'classnames', 'sources', jwlib.FILELISTS] directories = ['multicore', 'classnames', 'sources', jwlib.FILELISTS]
extensions = ['.cfg', '.cg'] extensions = ['.cfg', '.cg']
@ -346,7 +346,7 @@ def should_report_json(analyzer, row):
def clean_json(args, json_report): def clean_json(args, json_report):
collected_rows = [] collected_rows = []
with open(json_report, 'r') as file_in: with codecs.open(json_report, 'r', encoding=utils.LOCALE) as file_in:
rows = json.load(file_in) rows = json.load(file_in)
for row in rows: for row in rows:
filename = row[utils.JSON_INDEX_FILENAME] filename = row[utils.JSON_INDEX_FILENAME]
@ -357,7 +357,7 @@ def clean_json(args, json_report):
collected_rows, collected_rows,
cmp=compare_json_rows) cmp=compare_json_rows)
temporary_file = tempfile.mktemp() temporary_file = tempfile.mktemp()
with open(temporary_file, 'w') as file_out: with codecs.open(temporary_file, 'w', encoding=utils.LOCALE) as file_out:
json.dump(collected_rows, file_out) json.dump(collected_rows, file_out)
file_out.flush() file_out.flush()
shutil.move(temporary_file, json_report) shutil.move(temporary_file, json_report)
@ -366,7 +366,7 @@ def clean_json(args, json_report):
def clean_csv(args, csv_report): def clean_csv(args, csv_report):
collected_rows = [] collected_rows = []
with open(csv_report, 'r') as file_in: with open(csv_report, 'r') as file_in:
reader = csv.reader(file_in) reader = utils.locale_csv_reader(file_in)
rows = [row for row in reader] rows = [row for row in reader]
if len(rows) <= 1: if len(rows) <= 1:
return rows return rows
@ -395,8 +395,8 @@ def print_and_write(file_out, message):
def print_errors(csv_report, bugs_out): def print_errors(csv_report, bugs_out):
with open(csv_report, 'r') as file_in: with codecs.open(csv_report, 'r', encoding=utils.LOCALE) as file_in:
reader = csv.reader(file_in) reader = utils.locale_csv_reader(file_in)
reader.next() # first line is header, skip it reader.next() # first line is header, skip it
errors = filter( errors = filter(
@ -404,7 +404,7 @@ def print_errors(csv_report, bugs_out):
reader reader
) )
with open(bugs_out, 'w') as file_out: with codecs.open(bugs_out, 'w', encoding=utils.LOCALE) as file_out:
text_errors_list = [] text_errors_list = []
for row in errors: for row in errors:
filename = row[utils.CSV_INDEX_FILENAME] filename = row[utils.CSV_INDEX_FILENAME]
@ -419,9 +419,9 @@ def print_errors(csv_report, bugs_out):
utils.build_source_context(filename, utils.build_source_context(filename,
utils.TERMINAL_FORMATTER, utils.TERMINAL_FORMATTER,
int(line))) int(line)))
source_context = str(indenter) source_context = unicode(indenter)
text_errors_list.append( text_errors_list.append(
'{0}:{1}: {2}: {3}\n {4}\n{5}'.format( u'{0}:{1}: {2}: {3}\n {4}\n{5}'.format(
filename, filename,
line, line,
kind.lower(), kind.lower(),
@ -674,7 +674,7 @@ class Infer:
def update_stats_with_warnings(self, csv_report): def update_stats_with_warnings(self, csv_report):
with open(csv_report, 'r') as file_in: with open(csv_report, 'r') as file_in:
reader = csv.reader(file_in) reader = utils.locale_csv_reader(file_in)
rows = [row for row in reader][1:] rows = [row for row in reader][1:]
for row in rows: for row in rows:
key = row[utils.CSV_INDEX_TYPE] key = row[utils.CSV_INDEX_TYPE]
@ -722,7 +722,8 @@ class Infer:
# capture and compile mode do not create proc_stats.json # capture and compile mode do not create proc_stats.json
if os.path.isfile(proc_stats_path): if os.path.isfile(proc_stats_path):
with open(proc_stats_path, 'r') as proc_stats_file: with codecs.open(proc_stats_path, 'r',
encoding=utils.LOCALE) as proc_stats_file:
proc_stats = json.load(proc_stats_file) proc_stats = json.load(proc_stats_file)
self.stats['int'].update(proc_stats) self.stats['int'].update(proc_stats)
@ -741,7 +742,7 @@ class Infer:
} }
stats_path = os.path.join(self.args.infer_out, utils.STATS_FILENAME) stats_path = os.path.join(self.args.infer_out, utils.STATS_FILENAME)
with open(stats_path, 'w') as stats_file: with codecs.open(stats_path, 'w', encoding=utils.LOCALE) as stats_file:
json.dump(self.stats, stats_file, indent=2) json.dump(self.stats, stats_file, indent=2)

@ -11,10 +11,12 @@ from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import argparse import argparse
import codecs
import csv import csv
import fnmatch import fnmatch
import gzip import gzip
import json import json
import locale
import logging import logging
import os import os
import re import re
@ -30,9 +32,12 @@ import tempfile
import time import time
LOCALE = locale.getpreferredencoding()
# this assumes that this file lives in infer/lib/python/infer/ and the binaries # this assumes that this file lives in infer/lib/python/infer/ and the binaries
# are in infer/bin/ # are in infer/bin/
INFER_PYTHON_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) INFER_PYTHON_DIRECTORY = os.path.dirname(os.path.realpath(__file__)
.decode(LOCALE))
INFER_INFER_DIRECTORY = os.path.join(INFER_PYTHON_DIRECTORY, INFER_INFER_DIRECTORY = os.path.join(INFER_PYTHON_DIRECTORY,
os.pardir, os.pardir, os.pardir) os.pardir, os.pardir, os.pardir)
INFER_ROOT_DIRECTORY = os.path.join(INFER_INFER_DIRECTORY, os.pardir) INFER_ROOT_DIRECTORY = os.path.join(INFER_INFER_DIRECTORY, os.pardir)
@ -46,7 +51,7 @@ ANNOT_PROCESSOR_JAR = os.path.join(JAVA_LIB_DIRECTORY, 'processor.jar')
WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'wrappers') WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'wrappers')
XCODE_WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'xcode_wrappers') XCODE_WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'xcode_wrappers')
DEFAULT_INFER_OUT = os.path.join(os.getcwd(), 'infer-out') DEFAULT_INFER_OUT = os.path.join(os.getcwd().decode(LOCALE), 'infer-out')
CSV_PERF_FILENAME = 'performances.csv' CSV_PERF_FILENAME = 'performances.csv'
STATS_FILENAME = 'stats.json' STATS_FILENAME = 'stats.json'
PROC_STATS_FILENAME = 'proc_stats.json' PROC_STATS_FILENAME = 'proc_stats.json'
@ -132,6 +137,14 @@ if "check_output" not in dir(subprocess):
subprocess.check_output = f subprocess.check_output = f
# csv.reader() doesn't support utf-8. Do not use csv.reader(). Use
# this instead.
def locale_csv_reader(iterable, dialect='excel', **kwargs):
rows = csv.reader(iterable, dialect=dialect, **kwargs)
for row in rows:
yield [unicode(cell, LOCALE) for cell in row]
def configure_logging(debug, quiet=False): def configure_logging(debug, quiet=False):
"""Configures the default logger. This can be called only once and has to """Configures the default logger. This can be called only once and has to
be called before any logging is done. be called before any logging is done.
@ -164,7 +177,7 @@ def get_cmd_in_bin_dir(binary_name):
def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None): def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None):
with open(logfile, 'w') as log_filedesc: with codecs.open(logfile, 'w', encoding=LOCALE) as log_filedesc:
if cmd: if cmd:
log_filedesc.write(' '.join(cmd) + '\n') log_filedesc.write(' '.join(cmd) + '\n')
if err is not None: if err is not None:
@ -424,14 +437,19 @@ class Indenter(str):
self.text += '\n' self.text += '\n'
def add(self, x): def add(self, x):
if type(x) != unicode:
x = x.decode(LOCALE)
lines = x.splitlines() lines = x.splitlines()
indent = self.indent_get() indent = self.indent_get()
lines = [indent + l for l in lines] lines = [indent + l for l in lines]
self.text += '\n'.join(lines) self.text += '\n'.join(lines)
def __str__(self): def __unicode__(self):
return self.text return self.text
def __str__(self):
return unicode(self).encode(LOCALE)
def syntax_highlighting(source_name, mode, s): def syntax_highlighting(source_name, mode, s):
if pygments is None or mode == PLAIN_FORMATTER: if pygments is None or mode == PLAIN_FORMATTER:
@ -454,14 +472,14 @@ def build_source_context(source_name, mode, report_line):
n_length = len(str(end_line)) n_length = len(str(end_line))
line_number = 1 line_number = 1
s = '' s = ''
with open(source_name) as source_file: with codecs.open(source_name, 'r', encoding=LOCALE) as source_file:
for line in source_file: for line in source_file:
if start_line <= line_number <= end_line: if start_line <= line_number <= end_line:
num = str(line_number).zfill(n_length) num = str(line_number).zfill(n_length)
caret = ' ' caret = ' '
if line_number == report_line: if line_number == report_line:
caret = '> ' caret = '> '
s += num + '. ' + caret + line s += u'%s. %s%s' % (num, caret, line)
line_number += 1 line_number += 1
return syntax_highlighting(source_name, mode, s) return syntax_highlighting(source_name, mode, s)

Loading…
Cancel
Save