[python] more UTF-8 fun

Summary: Make sure `inferTraceBugs` works with non-ascii characters. Harden the code a bit more on the way. Fixes #592 Reviewed By: mbouaziz Differential Revision: D4659016 fbshipit-source-id: 79f7a80
9 years ago · ee9a2aa38c
parent 2c2626359c
commit ee9a2aa38c
3 changed files with 15 additions and 7 deletions
--- a/infer/lib/python/inferTraceBugs
+++ b/infer/lib/python/inferTraceBugs
@ -123,6 +123,9 @@ class Tracer(object):
    def build_report(self, report):
        self.build_trace(report[issues.JSON_INDEX_TRACE])
    def __unicode__(self):
        return unicode(self.indenter)
    def __str__(self):
        return str(self.indenter)
@ -278,7 +281,7 @@ def html_bug_trace(args, report, bug_id):
    bug_trace += '%s\n' % issues.text_of_report(report)
    tracer = Tracer(args)
    tracer.build_report(report)
-    bug_trace += str(tracer)
+    bug_trace += unicode(tracer)
    return bug_trace
@ -389,7 +392,7 @@ def main():
    tracer = Tracer(args, max_level)
    tracer.build_report(report)
-    utils.stdout(str(tracer))
+    utils.stdout(unicode(tracer))
 if __name__ == '__main__':
--- a/infer/lib/python/inferlib/source.py
+++ b/infer/lib/python/inferlib/source.py
@ -13,14 +13,14 @@ from __future__ import unicode_literals
 import codecs
 import os
-from . import colorize, config
+from . import colorize, config, utils
 BASE_INDENT = 2
 # how many lines of context around each report
 SOURCE_CONTEXT = 2
-class Indenter(str):
+class Indenter(unicode):
    def __init__(self):
        super(Indenter, self).__init__()
        self.text = ''
@ -46,7 +46,7 @@ class Indenter(str):
    def add(self, x):
        if type(x) != unicode:
-            x = x.decode(config.CODESET)
+            x = utils.decode(x)
        lines = x.splitlines()
        indent = self.indent_get()
        lines = [indent + l for l in lines]
@ -57,7 +57,7 @@ class Indenter(str):
        return self.text
    def __str__(self):
-        return unicode(self).encode(config.CODESET)
+        return utils.encode(unicode(self))
 def build_source_context(source_name, mode, report_line):
--- a/infer/tests/build_systems/utf8_in_procname/Makefile
+++ b/infer/tests/build_systems/utf8_in_procname/Makefile
@ -17,11 +17,16 @@ SOURCES = ../codetoanalyze/make/utf8_in_function_names.c
 include $(TESTS_DIR)/clang.make
-infer-out/report.json: $(CLANG_DEPS) $(SOURCES) $(HEADERS)
+infer-out/report.json: $(CLANG_DEPS) $(INFERTRACEBUGS_BIN) $(SOURCES) $(HEADERS) $(MAKEFILE_LIST)
 #	set non-utf8-supporting locale
 	LC_ALL=C; \
 	$(call silent_on_success,\
 	  $(INFER_BIN) --check-duplicate-symbols $(INFER_OPTIONS) -a $(ANALYZER) -- clang $(CLANG_OPTIONS) $(SOURCES) 2>duplicates.txt)
 	grep "DUPLICATE_SYMBOLS" duplicates.txt; test $$? -ne 0
 #	make sure inferTraceBugs is immune to UTF-8
 	$(call silent_on_success, $(INFERTRACEBUGS_BIN) --max-level max --select 0)
 	$(call silent_on_success, $(INFERTRACEBUGS_BIN) --html)
 	[ -f infer-out/report.html/index.html ]
 #	run again to check that infer manages to delete the results directory
 	LC_ALL=C; \
 	$(call silent_on_success,\