From b9c9f208926a687208a909196a86893940a4272b Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Fri, 22 Apr 2016 10:10:46 -0700 Subject: [PATCH] get encoding from `locale.getdefaultlocale()` Summary:This enables controlling the encoding chosen by infer via the usual environment variables. For instance: ``` LC_ALL="C" infer ... # sets LOCALE to "ascii" LC_ALL="en_US.UTF-8" infer ... # sets LOCALE to "UTF-8" ``` This gives an easy solution to #320: run `LC_ALL="en_US.UTF-8" infer ...`. Right now the only solution is to edit the Python scripts by hand instead! Reviewed By: jberdine Differential Revision: D3207573 fb-gh-sync-id: 62d5b98 fbshipit-source-id: 62d5b98 --- infer/lib/python/infer | 5 +++++ infer/lib/python/inferTraceBugs | 4 ++-- infer/lib/python/inferlib/config.py | 10 ++++++---- infer/lib/python/inferlib/issues.py | 7 ++++--- infer/lib/python/inferlib/source.py | 6 +++--- infer/lib/python/inferlib/utils.py | 14 +++++++------- 6 files changed, 27 insertions(+), 19 deletions(-) diff --git a/infer/lib/python/infer b/infer/lib/python/infer index 41435c49b..6eb3a7161 100755 --- a/infer/lib/python/infer +++ b/infer/lib/python/infer @@ -8,6 +8,7 @@ from __future__ import unicode_literals import argparse import imp import json +import locale import logging import os import platform @@ -138,6 +139,10 @@ def main(): logging.info('PATH=%s', os.getenv('PATH')) logging.info('SHELL=%s', os.getenv('SHELL')) logging.info('PWD=%s', os.getenv('PWD')) + logging.info('output of locale.getdefaultlocale(): %s', + str(locale.getdefaultlocale())) + logging.info('encoding we chose in the end: %s', + config.CODESET) capture_exitcode = imported_module.gen_instance(args, cmd).capture() if capture_exitcode != os.EX_OK: diff --git a/infer/lib/python/inferTraceBugs b/infer/lib/python/inferTraceBugs index 5e9ba4bda..d782bc1c2 100755 --- a/infer/lib/python/inferTraceBugs +++ b/infer/lib/python/inferTraceBugs @@ -345,7 +345,7 @@ def generate_html_report(args, reports): for bug in sel: bug_trace_path = path_of_bug_number(traces_dir, i) with codecs.open(bug_trace_path, 'w', - encoding=config.LOCALE, + encoding=config.CODESET, errors='xmlcharrefreplace') as bug_trace_file: bug_trace_file.write(html_bug_trace(args, bug, i)) i += 1 @@ -353,7 +353,7 @@ def generate_html_report(args, reports): remote_source_template = get_remote_source_template() bug_list_path = os.path.join(html_dir, 'index.html') with codecs.open(bug_list_path, 'w', - encoding=config.LOCALE, + encoding=config.CODESET, errors='xmlcharrefreplace') as bug_list_file: bug_list_file.write(html_list_of_bugs(args, remote_source_template, diff --git a/infer/lib/python/inferlib/config.py b/infer/lib/python/inferlib/config.py index b99f449a7..ab0404ddd 100644 --- a/infer/lib/python/inferlib/config.py +++ b/infer/lib/python/inferlib/config.py @@ -14,13 +14,15 @@ import locale import os -LOCALE = locale.getpreferredencoding() - +locale.setlocale(locale.LC_ALL, '') +CODESET = locale.getlocale(locale.LC_CTYPE)[1] +if CODESET is None: + CODESET = 'ascii' # this assumes that this file lives in infer/lib/python/infer/ and the binaries # are in infer/bin/ INFER_PYTHON_DIRECTORY = os.path.dirname(os.path.realpath(__file__) - .decode(LOCALE)) + .decode(CODESET)) INFER_INFER_DIRECTORY = os.path.join(INFER_PYTHON_DIRECTORY, os.pardir, os.pardir, os.pardir) INFER_ROOT_DIRECTORY = os.path.join(INFER_INFER_DIRECTORY, os.pardir) @@ -34,7 +36,7 @@ ANNOT_PROCESSOR_NAMES = 'com.facebook.infer.annotprocess.CollectSuppressWarnings WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'wrappers') XCODE_WRAPPERS_DIRECTORY = os.path.join(LIB_DIRECTORY, 'xcode_wrappers') -DEFAULT_INFER_OUT = os.path.join(os.getcwd().decode(LOCALE), 'infer-out') +DEFAULT_INFER_OUT = os.path.join(os.getcwd().decode(CODESET), 'infer-out') CSV_PERF_FILENAME = 'performances.csv' STATS_FILENAME = 'stats.json' PROC_STATS_FILENAME = 'proc_stats.json' diff --git a/infer/lib/python/inferlib/issues.py b/infer/lib/python/inferlib/issues.py index b16810518..82856e2f3 100644 --- a/infer/lib/python/inferlib/issues.py +++ b/infer/lib/python/inferlib/issues.py @@ -269,11 +269,12 @@ def print_and_save_errors(json_report, bugs_out, xml_out): plain_out = _text_of_report_list(errors, formatter=colorize.PLAIN_FORMATTER) with codecs.open(bugs_out, 'w', - encoding=config.LOCALE, errors='replace') as file_out: + encoding=config.CODESET, errors='replace') as file_out: file_out.write(plain_out) if xml_out is not None: with codecs.open(xml_out, 'w', - encoding=config.LOCALE, errors='replace') as file_out: + encoding=config.CODESET, + errors='replace') as file_out: file_out.write(_pmd_xml_of_issues(errors)) @@ -323,7 +324,7 @@ def _pmd_xml_of_issues(issues): violation.text = issue[JSON_INDEX_QUALIFIER] file_node.append(violation) root.append(file_node) - return etree.tostring(root, pretty_print=True, encoding=config.LOCALE) + return etree.tostring(root, pretty_print=True, encoding=config.CODESET) def _sort_and_uniq_rows(l): diff --git a/infer/lib/python/inferlib/source.py b/infer/lib/python/inferlib/source.py index 868f3b6b8..9f4546130 100644 --- a/infer/lib/python/inferlib/source.py +++ b/infer/lib/python/inferlib/source.py @@ -44,7 +44,7 @@ class Indenter(str): def add(self, x): if type(x) != unicode: - x = x.decode(config.LOCALE) + x = x.decode(config.CODESET) lines = x.splitlines() indent = self.indent_get() lines = [indent + l for l in lines] @@ -55,7 +55,7 @@ class Indenter(str): return self.text def __str__(self): - return unicode(self).encode(config.LOCALE) + return unicode(self).encode(config.CODESET) def build_source_context(source_name, mode, report_line): @@ -67,7 +67,7 @@ def build_source_context(source_name, mode, report_line): line_number = 1 excerpt = '' with codecs.open(source_name, 'r', - encoding=config.LOCALE, errors="replace") as source_file: + encoding=config.CODESET, errors="replace") as source_file: # avoid going past the end of the file for line in source_file: if start_line <= line_number <= end_line: diff --git a/infer/lib/python/inferlib/utils.py b/infer/lib/python/inferlib/utils.py index 877e43e28..ea384a62e 100644 --- a/infer/lib/python/inferlib/utils.py +++ b/infer/lib/python/inferlib/utils.py @@ -55,7 +55,7 @@ if 'check_output' not in dir(subprocess): def locale_csv_reader(iterable, dialect='excel', **kwargs): rows = csv.reader(iterable, dialect=dialect, **kwargs) for row in rows: - yield [unicode(cell, config.LOCALE) for cell in row] + yield [unicode(cell, config.CODESET) for cell in row] def configure_logging(args): @@ -89,8 +89,8 @@ def get_cmd_in_bin_dir(binary_name): def load_json_from_path(path, errors='replace'): with codecs.open(path, 'r', - encoding=config.LOCALE, errors=errors) as file_in: - return json.load(file_in, encoding=config.LOCALE) + encoding=config.CODESET, errors=errors) as file_in: + return json.load(file_in, encoding=config.CODESET) def dump_json_to_path( @@ -99,10 +99,10 @@ def dump_json_to_path( cls=None, indent=2, # customized separators=None, - encoding=config.LOCALE, # customized + encoding=config.CODESET, # customized default=None, sort_keys=False, **kw): with codecs.open(path, 'w', - encoding=config.LOCALE, errors='replace') as file_out: + encoding=config.CODESET, errors='replace') as file_out: json.dump(data, file_out, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, encoding=encoding, @@ -288,11 +288,11 @@ def get_plural(_str, count): def decode(s, errors='replace'): - return s.decode(encoding=config.LOCALE, errors=errors) + return s.decode(encoding=config.CODESET, errors=errors) def encode(u, errors='replace'): - return u.encode(encoding=config.LOCALE, errors=errors) + return u.encode(encoding=config.CODESET, errors=errors) def stdout(s, errors='replace'):