handle unicode in source code

Summary: public
Should address #175.

Reviewed By: akotulski

Differential Revision: D2641689

fb-gh-sync-id: e98da88
master
Jules Villard 9 years ago committed by facebook-github-bot-1
parent 0cd533f892
commit ae81d8d215

@ -338,7 +338,7 @@ def load_stats(opened_jar):
def load_csv_report(opened_jar):
try:
sio = io.StringIO(opened_jar.read(INFER_CSV_REPORT).decode())
return list(csv.reader(sio))
return list(utils.locale_csv_reader(sio))
except KeyError as e:
raise NotFoundInJar

@ -68,7 +68,9 @@ def load_module(mod_name):
def split_args_to_parse():
dd_index = \
sys.argv.index(CMD_MARKER) if CMD_MARKER in sys.argv else len(sys.argv)
return sys.argv[1:dd_index], sys.argv[dd_index + 1:]
cmd_raw = sys.argv[dd_index + 1:]
return (sys.argv[1:dd_index],
[arg.decode(utils.LOCALE) for arg in cmd_raw])
def create_argparser(parents=[]):
@ -168,7 +170,8 @@ def main():
bugs_filename = os.path.join(args.infer_out,
utils.JSON_REPORT_FILENAME)
try:
with open(bugs_filename) as bugs_file:
with codecs.open(bugs_filename, 'r',
encoding=utils.LOCALE) as bugs_file:
bugs = json.load(bugs_file)
if len(bugs) > 0:
sys.exit(analyze.BUG_FOUND_ERROR_CODE)

@ -355,13 +355,15 @@ def generate_html_report(args, reports):
i = 0
for bug in sel:
bug_trace_path = path_of_bug_number(traces_dir, i)
with open(bug_trace_path, 'w') as bug_trace_file:
with codecs.open(bug_trace_path, 'w',
encoding=utils.LOCALE) as bug_trace_file:
bug_trace_file.write(html_bug_trace(args, bug, i))
i += 1
remote_source_template = get_remote_source_template()
bug_list_path = os.path.join(html_dir, 'index.html')
with open(bug_list_path, 'w') as bug_list_file:
with codecs.open(bug_list_path, 'w',
encoding=utils.LOCALE) as bug_list_file:
bug_list_file.write(html_list_of_bugs(args,
remote_source_template,
sel))
@ -373,7 +375,8 @@ def main():
args = base_parser.parse_args()
report_filename = os.path.join(args.infer_out, utils.JSON_REPORT_FILENAME)
with open(report_filename) as report_file:
with codecs.open(report_filename, 'r',
encoding=utils.LOCALE) as report_file:
reports = json.load(report_file)
if args.html:

@ -11,6 +11,7 @@ from __future__ import print_function
from __future__ import unicode_literals
import argparse
import codecs
import csv
import glob
import json
@ -220,7 +221,6 @@ def create_results_dir(results_dir):
def clean(infer_out):
directories = ['multicore', 'classnames', 'sources', jwlib.FILELISTS]
extensions = ['.cfg', '.cg']
@ -346,7 +346,7 @@ def should_report_json(analyzer, row):
def clean_json(args, json_report):
collected_rows = []
with open(json_report, 'r') as file_in:
with codecs.open(json_report, 'r', encoding=utils.LOCALE) as file_in:
rows = json.load(file_in)
for row in rows:
filename = row[utils.JSON_INDEX_FILENAME]
@ -357,7 +357,7 @@ def clean_json(args, json_report):
collected_rows,
cmp=compare_json_rows)
temporary_file = tempfile.mktemp()
with open(temporary_file, 'w') as file_out:
with codecs.open(temporary_file, 'w', encoding=utils.LOCALE) as file_out:
json.dump(collected_rows, file_out)
file_out.flush()
shutil.move(temporary_file, json_report)
@ -366,7 +366,7 @@ def clean_json(args, json_report):
def clean_csv(args, csv_report):
collected_rows = []
with open(csv_report, 'r') as file_in:
reader = csv.reader(file_in)
reader = utils.locale_csv_reader(file_in)
rows = [row for row in reader]
if len(rows) <= 1:
return rows
@ -395,8 +395,8 @@ def print_and_write(file_out, message):
def print_errors(csv_report, bugs_out):
with open(csv_report, 'r') as file_in:
reader = csv.reader(file_in)
with codecs.open(csv_report, 'r', encoding=utils.LOCALE) as file_in:
reader = utils.locale_csv_reader(file_in)
reader.next() # first line is header, skip it
errors = filter(
@ -404,7 +404,7 @@ def print_errors(csv_report, bugs_out):
reader
)
with open(bugs_out, 'w') as file_out:
with codecs.open(bugs_out, 'w', encoding=utils.LOCALE) as file_out:
text_errors_list = []
for row in errors:
filename = row[utils.CSV_INDEX_FILENAME]
@ -419,9 +419,9 @@ def print_errors(csv_report, bugs_out):
utils.build_source_context(filename,
utils.TERMINAL_FORMATTER,
int(line)))
source_context = str(indenter)
source_context = unicode(indenter)
text_errors_list.append(
'{0}:{1}: {2}: {3}\n {4}\n{5}'.format(
u'{0}:{1}: {2}: {3}\n {4}\n{5}'.format(
filename,
line,
kind.lower(),
@ -674,7 +674,7 @@ class Infer:
def update_stats_with_warnings(self, csv_report):
with open(csv_report, 'r') as file_in:
reader = csv.reader(file_in)
reader = utils.locale_csv_reader(file_in)
rows = [row for row in reader][1:]
for row in rows:
key = row[utils.CSV_INDEX_TYPE]
@ -722,7 +722,8 @@ class Infer:
# capture and compile mode do not create proc_stats.json
if os.path.isfile(proc_stats_path):
with open(proc_stats_path, 'r') as proc_stats_file:
with codecs.open(proc_stats_path, 'r',
encoding=utils.LOCALE) as proc_stats_file:
proc_stats = json.load(proc_stats_file)
self.stats['int'].update(proc_stats)
@ -741,7 +742,7 @@ class Infer:
}
stats_path = os.path.join(self.args.infer_out, utils.STATS_FILENAME)
with open(stats_path, 'w') as stats_file:
with codecs.open(stats_path, 'w', encoding=utils.LOCALE) as stats_file:
json.dump(self.stats, stats_file, indent=2)

@ -11,10 +11,12 @@ from __future__ import print_function
from __future__ import unicode_literals
import argparse
import codecs
import csv
import fnmatch
import gzip
import json
import locale
import logging
import os
import re
@ -30,9 +32,12 @@ import tempfile
import time
LOCALE = locale.getpreferredencoding()
# this assumes that this file lives in infer/lib/python/infer/ and the binaries
# are in infer/bin/
INFER_PYTHON_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
INFER_PYTHON_DIRECTORY = os.path.dirname(os.path.realpath(__file__)
.decode(LOCALE))
INFER_INFER_DIRECTORY = os.path.join(INFER_PYTHON_DIRECTORY,
os.pardir, os.pardir, os.pardir)
INFER_ROOT_DIRECTORY = os.path.join(INFER_INFER_DIRECTORY, os.pardir)
@ -46,7 +51,7 @@ ANNOT_PROCESSOR_JAR = os.path.join(JAVA_LIB_DIRECTORY, 'processor.jar')
WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'wrappers')
XCODE_WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'xcode_wrappers')
DEFAULT_INFER_OUT = os.path.join(os.getcwd(), 'infer-out')
DEFAULT_INFER_OUT = os.path.join(os.getcwd().decode(LOCALE), 'infer-out')
CSV_PERF_FILENAME = 'performances.csv'
STATS_FILENAME = 'stats.json'
PROC_STATS_FILENAME = 'proc_stats.json'
@ -132,6 +137,14 @@ if "check_output" not in dir(subprocess):
subprocess.check_output = f
# csv.reader() doesn't support utf-8. Do not use csv.reader(). Use
# this instead.
def locale_csv_reader(iterable, dialect='excel', **kwargs):
rows = csv.reader(iterable, dialect=dialect, **kwargs)
for row in rows:
yield [unicode(cell, LOCALE) for cell in row]
def configure_logging(debug, quiet=False):
"""Configures the default logger. This can be called only once and has to
be called before any logging is done.
@ -164,7 +177,7 @@ def get_cmd_in_bin_dir(binary_name):
def write_cmd_streams_to_file(logfile, cmd=None, out=None, err=None):
with open(logfile, 'w') as log_filedesc:
with codecs.open(logfile, 'w', encoding=LOCALE) as log_filedesc:
if cmd:
log_filedesc.write(' '.join(cmd) + '\n')
if err is not None:
@ -424,14 +437,19 @@ class Indenter(str):
self.text += '\n'
def add(self, x):
if type(x) != unicode:
x = x.decode(LOCALE)
lines = x.splitlines()
indent = self.indent_get()
lines = [indent + l for l in lines]
self.text += '\n'.join(lines)
def __str__(self):
def __unicode__(self):
return self.text
def __str__(self):
return unicode(self).encode(LOCALE)
def syntax_highlighting(source_name, mode, s):
if pygments is None or mode == PLAIN_FORMATTER:
@ -454,14 +472,14 @@ def build_source_context(source_name, mode, report_line):
n_length = len(str(end_line))
line_number = 1
s = ''
with open(source_name) as source_file:
with codecs.open(source_name, 'r', encoding=LOCALE) as source_file:
for line in source_file:
if start_line <= line_number <= end_line:
num = str(line_number).zfill(n_length)
caret = ' '
if line_number == report_line:
caret = '> '
s += num + '. ' + caret + line
s += u'%s. %s%s' % (num, caret, line)
line_number += 1
return syntax_highlighting(source_name, mode, s)

Loading…
Cancel
Save