You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
533 lines
22 KiB
533 lines
22 KiB
# epydoc -- Marked-up Representations for Python Values
|
|
#
|
|
# Copyright (C) 2005 Edward Loper
|
|
# Author: Edward Loper <edloper@loper.org>
|
|
# URL: <http://epydoc.sf.net>
|
|
#
|
|
# $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $
|
|
|
|
"""
|
|
Syntax highlighter for Python values. Currently provides special
|
|
colorization support for:
|
|
|
|
- lists, tuples, sets, frozensets, dicts
|
|
- numbers
|
|
- strings
|
|
- compiled regexps
|
|
|
|
The highlighter also takes care of line-wrapping, and automatically
|
|
stops generating repr output as soon as it has exceeded the specified
|
|
number of lines (which should make it faster than pprint for large
|
|
values). It does I{not} bother to do automatic cycle detection,
|
|
because maxlines is typically around 5, so it's really not worth it.
|
|
|
|
The syntax-highlighted output is encoded using a
|
|
L{ParsedEpytextDocstring}, which can then be used to generate output in
|
|
a variety of formats.
|
|
"""
|
|
__docformat__ = 'epytext en'
|
|
|
|
# Implementation note: we use exact tests for classes (list, etc)
|
|
# rather than using isinstance, because subclasses might override
|
|
# __repr__.
|
|
|
|
import types, re
|
|
import epydoc.apidoc
|
|
from epydoc.util import decode_with_backslashreplace
|
|
from epydoc.util import plaintext_to_html, plaintext_to_latex
|
|
from epydoc.compat import *
|
|
import sre_parse, sre_constants
|
|
|
|
from epydoc.markup.epytext import Element, ParsedEpytextDocstring
|
|
|
|
def is_re_pattern(pyval):
|
|
return type(pyval).__name__ == 'SRE_Pattern'
|
|
|
|
class _ColorizerState:
|
|
"""
|
|
An object uesd to keep track of the current state of the pyval
|
|
colorizer. The L{mark()}/L{restore()} methods can be used to set
|
|
a backup point, and restore back to that backup point. This is
|
|
used by several colorization methods that first try colorizing
|
|
their object on a single line (setting linebreakok=False); and
|
|
then fall back on a multi-line output if that fails. The L{score}
|
|
variable is used to keep track of a 'score', reflecting how good
|
|
we think this repr is. E.g., unhelpful values like '<Foo instance
|
|
at 0x12345>' get low scores. If the score is too low, we'll use
|
|
the parse-derived repr instead.
|
|
"""
|
|
def __init__(self):
|
|
self.result = []
|
|
self.charpos = 0
|
|
self.lineno = 1
|
|
self.linebreakok = True
|
|
|
|
#: How good this represention is?
|
|
self.score = 0
|
|
|
|
def mark(self):
|
|
return (len(self.result), self.charpos,
|
|
self.lineno, self.linebreakok, self.score)
|
|
|
|
def restore(self, mark):
|
|
n, self.charpos, self.lineno, self.linebreakok, self.score = mark
|
|
del self.result[n:]
|
|
|
|
class _Maxlines(Exception):
|
|
"""A control-flow exception that is raised when PyvalColorizer
|
|
exeeds the maximum number of allowed lines."""
|
|
|
|
class _Linebreak(Exception):
|
|
"""A control-flow exception that is raised when PyvalColorizer
|
|
generates a string containing a newline, but the state object's
|
|
linebreakok variable is False."""
|
|
|
|
class ColorizedPyvalRepr(ParsedEpytextDocstring):
|
|
"""
|
|
@ivar score: A score, evaluating how good this repr is.
|
|
@ivar is_complete: True if this colorized repr completely describes
|
|
the object.
|
|
"""
|
|
def __init__(self, tree, score, is_complete):
|
|
ParsedEpytextDocstring.__init__(self, tree)
|
|
self.score = score
|
|
self.is_complete = is_complete
|
|
|
|
def colorize_pyval(pyval, parse_repr=None, min_score=None,
|
|
linelen=75, maxlines=5, linebreakok=True, sort=True):
|
|
return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize(
|
|
pyval, parse_repr, min_score)
|
|
|
|
class PyvalColorizer:
|
|
"""
|
|
Syntax highlighter for Python values.
|
|
"""
|
|
|
|
def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
|
|
self.linelen = linelen
|
|
self.maxlines = maxlines
|
|
self.linebreakok = linebreakok
|
|
self.sort = sort
|
|
|
|
#////////////////////////////////////////////////////////////
|
|
# Colorization Tags & other constants
|
|
#////////////////////////////////////////////////////////////
|
|
|
|
GROUP_TAG = 'variable-group' # e.g., "[" and "]"
|
|
COMMA_TAG = 'variable-op' # The "," that separates elements
|
|
COLON_TAG = 'variable-op' # The ":" in dictionaries
|
|
CONST_TAG = None # None, True, False
|
|
NUMBER_TAG = None # ints, floats, etc
|
|
QUOTE_TAG = 'variable-quote' # Quotes around strings.
|
|
STRING_TAG = 'variable-string' # Body of string literals
|
|
|
|
RE_CHAR_TAG = None
|
|
RE_GROUP_TAG = 're-group'
|
|
RE_REF_TAG = 're-ref'
|
|
RE_OP_TAG = 're-op'
|
|
RE_FLAGS_TAG = 're-flags'
|
|
|
|
ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
|
|
LINEWRAP = Element('symbol', u'crarr')
|
|
UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
|
|
|
|
GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)
|
|
|
|
ESCAPE_UNICODE = False # should we escape non-ascii unicode chars?
|
|
|
|
#////////////////////////////////////////////////////////////
|
|
# Entry Point
|
|
#////////////////////////////////////////////////////////////
|
|
|
|
def colorize(self, pyval, parse_repr=None, min_score=None):
|
|
"""
|
|
@return: A L{ColorizedPyvalRepr} describing the given pyval.
|
|
"""
|
|
UNKNOWN = epydoc.apidoc.UNKNOWN
|
|
# Create an object to keep track of the colorization.
|
|
state = _ColorizerState()
|
|
state.linebreakok = self.linebreakok
|
|
# Colorize the value. If we reach maxlines, then add on an
|
|
# ellipsis marker and call it a day.
|
|
try:
|
|
if pyval is not UNKNOWN:
|
|
self._colorize(pyval, state)
|
|
elif parse_repr not in (None, UNKNOWN):
|
|
self._output(parse_repr, None, state)
|
|
else:
|
|
state.result.append(PyvalColorizer.UNKNOWN_REPR)
|
|
is_complete = True
|
|
except (_Maxlines, _Linebreak):
|
|
if self.linebreakok:
|
|
state.result.append('\n')
|
|
state.result.append(self.ELLIPSIS)
|
|
else:
|
|
if state.result[-1] is self.LINEWRAP:
|
|
state.result.pop()
|
|
self._trim_result(state.result, 3)
|
|
state.result.append(self.ELLIPSIS)
|
|
is_complete = False
|
|
# If we didn't score high enough, then try again.
|
|
if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
|
|
and min_score is not None and state.score < min_score):
|
|
return self.colorize(UNKNOWN, parse_repr)
|
|
# Put it all together.
|
|
tree = Element('epytext', *state.result)
|
|
return ColorizedPyvalRepr(tree, state.score, is_complete)
|
|
|
|
def _colorize(self, pyval, state):
|
|
pyval_type = type(pyval)
|
|
state.score += 1
|
|
|
|
if pyval is None or pyval is True or pyval is False:
|
|
self._output(unicode(pyval), self.CONST_TAG, state)
|
|
elif pyval_type in (int, float, long, types.ComplexType):
|
|
self._output(unicode(pyval), self.NUMBER_TAG, state)
|
|
elif pyval_type is str:
|
|
self._colorize_str(pyval, state, '', 'string-escape')
|
|
elif pyval_type is unicode:
|
|
if self.ESCAPE_UNICODE:
|
|
self._colorize_str(pyval, state, 'u', 'unicode-escape')
|
|
else:
|
|
self._colorize_str(pyval, state, 'u', None)
|
|
elif pyval_type is list:
|
|
self._multiline(self._colorize_iter, pyval, state, '[', ']')
|
|
elif pyval_type is tuple:
|
|
self._multiline(self._colorize_iter, pyval, state, '(', ')')
|
|
elif pyval_type is set:
|
|
self._multiline(self._colorize_iter, self._sort(pyval),
|
|
state, 'set([', '])')
|
|
elif pyval_type is frozenset:
|
|
self._multiline(self._colorize_iter, self._sort(pyval),
|
|
state, 'frozenset([', '])')
|
|
elif pyval_type is dict:
|
|
self._multiline(self._colorize_dict, self._sort(pyval.items()),
|
|
state, '{', '}')
|
|
elif is_re_pattern(pyval):
|
|
self._colorize_re(pyval, state)
|
|
else:
|
|
try:
|
|
pyval_repr = repr(pyval)
|
|
if not isinstance(pyval_repr, (str, unicode)):
|
|
pyval_repr = unicode(pyval_repr)
|
|
pyval_repr_ok = True
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except:
|
|
pyval_repr_ok = False
|
|
state.score -= 100
|
|
|
|
if pyval_repr_ok:
|
|
if self.GENERIC_OBJECT_RE.match(pyval_repr):
|
|
state.score -= 5
|
|
self._output(pyval_repr, None, state)
|
|
else:
|
|
state.result.append(self.UNKNOWN_REPR)
|
|
|
|
def _sort(self, items):
|
|
if not self.sort: return items
|
|
try: return sorted(items)
|
|
except KeyboardInterrupt: raise
|
|
except: return items
|
|
|
|
def _trim_result(self, result, num_chars):
|
|
while num_chars > 0:
|
|
if not result: return
|
|
if isinstance(result[-1], Element):
|
|
assert len(result[-1].children) == 1
|
|
trim = min(num_chars, len(result[-1].children[0]))
|
|
result[-1].children[0] = result[-1].children[0][:-trim]
|
|
if not result[-1].children[0]: result.pop()
|
|
num_chars -= trim
|
|
else:
|
|
trim = min(num_chars, len(result[-1]))
|
|
result[-1] = result[-1][:-trim]
|
|
if not result[-1]: result.pop()
|
|
num_chars -= trim
|
|
|
|
#////////////////////////////////////////////////////////////
|
|
# Object Colorization Functions
|
|
#////////////////////////////////////////////////////////////
|
|
|
|
def _multiline(self, func, pyval, state, *args):
|
|
"""
|
|
Helper for container-type colorizers. First, try calling
|
|
C{func(pyval, state, *args)} with linebreakok set to false;
|
|
and if that fails, then try again with it set to true.
|
|
"""
|
|
linebreakok = state.linebreakok
|
|
mark = state.mark()
|
|
|
|
try:
|
|
state.linebreakok = False
|
|
func(pyval, state, *args)
|
|
state.linebreakok = linebreakok
|
|
|
|
except _Linebreak:
|
|
if not linebreakok:
|
|
raise
|
|
state.restore(mark)
|
|
func(pyval, state, *args)
|
|
|
|
def _colorize_iter(self, pyval, state, prefix, suffix):
|
|
self._output(prefix, self.GROUP_TAG, state)
|
|
indent = state.charpos
|
|
for i, elt in enumerate(pyval):
|
|
if i>=1:
|
|
if state.linebreakok:
|
|
self._output(',', self.COMMA_TAG, state)
|
|
self._output('\n'+' '*indent, None, state)
|
|
else:
|
|
self._output(', ', self.COMMA_TAG, state)
|
|
self._colorize(elt, state)
|
|
self._output(suffix, self.GROUP_TAG, state)
|
|
|
|
def _colorize_dict(self, items, state, prefix, suffix):
|
|
self._output(prefix, self.GROUP_TAG, state)
|
|
indent = state.charpos
|
|
for i, (key, val) in enumerate(items):
|
|
if i>=1:
|
|
if state.linebreakok:
|
|
self._output(',', self.COMMA_TAG, state)
|
|
self._output('\n'+' '*indent, None, state)
|
|
else:
|
|
self._output(', ', self.COMMA_TAG, state)
|
|
self._colorize(key, state)
|
|
self._output(': ', self.COLON_TAG, state)
|
|
self._colorize(val, state)
|
|
self._output(suffix, self.GROUP_TAG, state)
|
|
|
|
def _colorize_str(self, pyval, state, prefix, encoding):
|
|
# Decide which quote to use.
|
|
if '\n' in pyval and state.linebreakok: quote = "'''"
|
|
else: quote = "'"
|
|
# Divide the string into lines.
|
|
if state.linebreakok:
|
|
lines = pyval.split('\n')
|
|
else:
|
|
lines = [pyval]
|
|
# Open quote.
|
|
self._output(prefix+quote, self.QUOTE_TAG, state)
|
|
# Body
|
|
for i, line in enumerate(lines):
|
|
if i>0: self._output('\n', None, state)
|
|
if encoding: line = line.encode(encoding)
|
|
self._output(line, self.STRING_TAG, state)
|
|
# Close quote.
|
|
self._output(quote, self.QUOTE_TAG, state)
|
|
|
|
def _colorize_re(self, pyval, state):
|
|
# Extract the flag & pattern from the regexp.
|
|
pat, flags = pyval.pattern, pyval.flags
|
|
# If the pattern is a string, decode it to unicode.
|
|
if isinstance(pat, str):
|
|
pat = decode_with_backslashreplace(pat)
|
|
# Parse the regexp pattern.
|
|
tree = sre_parse.parse(pat, flags)
|
|
groups = dict([(num,name) for (name,num) in
|
|
tree.pattern.groupdict.items()])
|
|
# Colorize it!
|
|
self._output("re.compile(r'", None, state)
|
|
self._colorize_re_flags(tree.pattern.flags, state)
|
|
self._colorize_re_tree(tree, state, True, groups)
|
|
self._output("')", None, state)
|
|
|
|
def _colorize_re_flags(self, flags, state):
|
|
if flags:
|
|
flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
|
|
if (n&flags)]
|
|
flags = '(?%s)' % ''.join(flags)
|
|
self._output(flags, self.RE_FLAGS_TAG, state)
|
|
|
|
def _colorize_re_tree(self, tree, state, noparen, groups):
|
|
assert noparen in (True, False)
|
|
if len(tree) > 1 and not noparen:
|
|
self._output('(', self.RE_GROUP_TAG, state)
|
|
for elt in tree:
|
|
op = elt[0]
|
|
args = elt[1]
|
|
|
|
if op == sre_constants.LITERAL:
|
|
c = unichr(args)
|
|
# Add any appropriate escaping.
|
|
if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
|
|
elif c == '\t': c = '\\t'
|
|
elif c == '\r': c = '\\r'
|
|
elif c == '\n': c = '\\n'
|
|
elif c == '\f': c = '\\f'
|
|
elif c == '\v': c = '\\v'
|
|
elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
|
|
elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
|
|
elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
|
|
self._output(c, self.RE_CHAR_TAG, state)
|
|
|
|
elif op == sre_constants.ANY:
|
|
self._output('.', self.RE_CHAR_TAG, state)
|
|
|
|
elif op == sre_constants.BRANCH:
|
|
if args[0] is not None:
|
|
raise ValueError('Branch expected None arg but got %s'
|
|
% args[0])
|
|
for i, item in enumerate(args[1]):
|
|
if i > 0:
|
|
self._output('|', self.RE_OP_TAG, state)
|
|
self._colorize_re_tree(item, state, True, groups)
|
|
|
|
elif op == sre_constants.IN:
|
|
if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
|
|
self._colorize_re_tree(args, state, False, groups)
|
|
else:
|
|
self._output('[', self.RE_GROUP_TAG, state)
|
|
self._colorize_re_tree(args, state, True, groups)
|
|
self._output(']', self.RE_GROUP_TAG, state)
|
|
|
|
elif op == sre_constants.CATEGORY:
|
|
if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
|
|
elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
|
|
elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
|
|
elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
|
|
elif args == sre_constants.CATEGORY_WORD: val = r'\w'
|
|
elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
|
|
else: raise ValueError('Unknown category %s' % args)
|
|
self._output(val, self.RE_CHAR_TAG, state)
|
|
|
|
elif op == sre_constants.AT:
|
|
if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
|
|
elif args == sre_constants.AT_BEGINNING: val = r'^'
|
|
elif args == sre_constants.AT_END: val = r'$'
|
|
elif args == sre_constants.AT_BOUNDARY: val = r'\b'
|
|
elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
|
|
elif args == sre_constants.AT_END_STRING: val = r'\Z'
|
|
else: raise ValueError('Unknown position %s' % args)
|
|
self._output(val, self.RE_CHAR_TAG, state)
|
|
|
|
elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
|
|
minrpt = args[0]
|
|
maxrpt = args[1]
|
|
if maxrpt == sre_constants.MAXREPEAT:
|
|
if minrpt == 0: val = '*'
|
|
elif minrpt == 1: val = '+'
|
|
else: val = '{%d,}' % (minrpt)
|
|
elif minrpt == 0:
|
|
if maxrpt == 1: val = '?'
|
|
else: val = '{,%d}' % (maxrpt)
|
|
elif minrpt == maxrpt:
|
|
val = '{%d}' % (maxrpt)
|
|
else:
|
|
val = '{%d,%d}' % (minrpt, maxrpt)
|
|
if op == sre_constants.MIN_REPEAT:
|
|
val += '?'
|
|
|
|
self._colorize_re_tree(args[2], state, False, groups)
|
|
self._output(val, self.RE_OP_TAG, state)
|
|
|
|
elif op == sre_constants.SUBPATTERN:
|
|
if args[0] is None:
|
|
self._output('(?:', self.RE_GROUP_TAG, state)
|
|
elif args[0] in groups:
|
|
self._output('(?P<', self.RE_GROUP_TAG, state)
|
|
self._output(groups[args[0]], self.RE_REF_TAG, state)
|
|
self._output('>', self.RE_GROUP_TAG, state)
|
|
elif isinstance(args[0], (int, long)):
|
|
# This is cheating:
|
|
self._output('(', self.RE_GROUP_TAG, state)
|
|
else:
|
|
self._output('(?P<', self.RE_GROUP_TAG, state)
|
|
self._output(args[0], self.RE_REF_TAG, state)
|
|
self._output('>', self.RE_GROUP_TAG, state)
|
|
self._colorize_re_tree(args[1], state, True, groups)
|
|
self._output(')', self.RE_GROUP_TAG, state)
|
|
|
|
elif op == sre_constants.GROUPREF:
|
|
self._output('\\%d' % args, self.RE_REF_TAG, state)
|
|
|
|
elif op == sre_constants.RANGE:
|
|
self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
|
|
state, False, groups )
|
|
self._output('-', self.RE_OP_TAG, state)
|
|
self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
|
|
state, False, groups )
|
|
|
|
elif op == sre_constants.NEGATE:
|
|
self._output('^', self.RE_OP_TAG, state)
|
|
|
|
elif op == sre_constants.ASSERT:
|
|
if args[0] > 0:
|
|
self._output('(?=', self.RE_GROUP_TAG, state)
|
|
else:
|
|
self._output('(?<=', self.RE_GROUP_TAG, state)
|
|
self._colorize_re_tree(args[1], state, True, groups)
|
|
self._output(')', self.RE_GROUP_TAG, state)
|
|
|
|
elif op == sre_constants.ASSERT_NOT:
|
|
if args[0] > 0:
|
|
self._output('(?!', self.RE_GROUP_TAG, state)
|
|
else:
|
|
self._output('(?<!', self.RE_GROUP_TAG, state)
|
|
self._colorize_re_tree(args[1], state, True, groups)
|
|
self._output(')', self.RE_GROUP_TAG, state)
|
|
|
|
elif op == sre_constants.NOT_LITERAL:
|
|
self._output('[^', self.RE_GROUP_TAG, state)
|
|
self._colorize_re_tree( ((sre_constants.LITERAL, args),),
|
|
state, False, groups )
|
|
self._output(']', self.RE_GROUP_TAG, state)
|
|
else:
|
|
log.error("Error colorizing regexp: unknown elt %r" % elt)
|
|
if len(tree) > 1 and not noparen:
|
|
self._output(')', self.RE_GROUP_TAG, state)
|
|
|
|
#////////////////////////////////////////////////////////////
|
|
# Output function
|
|
#////////////////////////////////////////////////////////////
|
|
|
|
def _output(self, s, tag, state):
|
|
"""
|
|
Add the string `s` to the result list, tagging its contents
|
|
with tag `tag`. Any lines that go beyond `self.linelen` will
|
|
be line-wrapped. If the total number of lines exceeds
|
|
`self.maxlines`, then raise a `_Maxlines` exception.
|
|
"""
|
|
# Make sure the string is unicode.
|
|
if isinstance(s, str):
|
|
s = decode_with_backslashreplace(s)
|
|
|
|
# Split the string into segments. The first segment is the
|
|
# content to add to the current line, and the remaining
|
|
# segments are new lines.
|
|
segments = s.split('\n')
|
|
|
|
for i, segment in enumerate(segments):
|
|
# If this isn't the first segment, then add a newline to
|
|
# split it from the previous segment.
|
|
if i > 0:
|
|
if (state.lineno+1) > self.maxlines:
|
|
raise _Maxlines()
|
|
if not state.linebreakok:
|
|
raise _Linebreak()
|
|
state.result.append(u'\n')
|
|
state.lineno += 1
|
|
state.charpos = 0
|
|
|
|
# If the segment fits on the current line, then just call
|
|
# markup to tag it, and store the result.
|
|
if state.charpos + len(segment) <= self.linelen:
|
|
state.charpos += len(segment)
|
|
if tag:
|
|
segment = Element('code', segment, style=tag)
|
|
state.result.append(segment)
|
|
|
|
# If the segment doesn't fit on the current line, then
|
|
# line-wrap it, and insert the remainder of the line into
|
|
# the segments list that we're iterating over. (We'll go
|
|
# the the beginning of the next line at the start of the
|
|
# next iteration through the loop.)
|
|
else:
|
|
split = self.linelen-state.charpos
|
|
segments.insert(i+1, segment[split:])
|
|
segment = segment[:split]
|
|
if tag:
|
|
segment = Element('code', segment, style=tag)
|
|
state.result += [segment, self.LINEWRAP]
|
|
|