# -*- coding: utf-8 -*-
"""
Defines the function `repr2`, which allows for a bit more customization than
`repr` or `pprint`. See the docstring for more details.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import six
import collections
[docs]def repr2(data, **kwargs):
"""
Makes a pretty and easy-to-doctest string representation!
This is an alternative to repr, and `pprint.pformat` that attempts to be
both more configurable and generate output that is consistent between
python versions.
Notes:
This function has many keyword arguments that can be used to customize
the final representation. For convinience some of the more frequently
used kwargs have short aliases. See `Args` for more details.
Args:
data (object): an arbitrary python object
**kwargs: see `the Kwargs` section
Kwargs:
si, stritems, (bool):
dict/list items use str instead of repr
strkeys, sk (bool):
dict keys use str instead of repr
strvals, sv (bool):
dict values use str instead of repr
nl, newlines (int | bool):
number of top level nestings to place a newline after. If true all
items are followed by newlines regardless of nesting level.
Defaults to 1 for lists and True for dicts.
nobr, nobraces (bool, default=False):
if True, text will not contain outer braces for containers
cbr, compact_brace (bool, default=False):
if True, braces are compactified (i.e. they will not have newlines
placed directly after them, think java / K&R / 1TBS)
trailsep, trailing_sep (bool):
if True, a separator is placed after the last item in a sequence.
By default this is True if there are any `nl > 0`.
explicit (bool, default=False):
changes dict representation from `{k1: v1, ...}` to
`dict(k1=v1, ...)`.
precision (int, default=None):
if specified floats are formatted with this precision
kvsep (str, default=': '):
separator between keys and values
itemsep (str, default=' '):
separator between items
sort (bool):
if True, attempts to sort all unordered collections in the returned
text. NOTE: currently if True this will sort lists, this may not be
a correct thing to do, as such the behavior of this arg is subject
to change.
suppress_small (bool):
passed to `numpy.array2string` for ndarrays
max_line_width (int):
passed to `numpy.array2string` for ndarrays
with_dtype (bool):
only relevant to ndarrays. if True includes the dtype.
Returns:
str: outstr: output string
Notes:
There are also internal kwargs, which should not be used:
_return_info (bool): return information about child context
_root_info (depth): information about parent context
CommandLine:
python -m ubelt.util_format repr2:0
python -m ubelt.util_format repr2:1
Example:
>>> from ubelt.util_format import *
>>> import ubelt as ub
>>> dict_ = {
... 'custom_types': [slice(0, 1, None), 1/3],
... 'nest_dict': {'k1': [1, 2, {3: {4, 5}}],
... 'key2': [1, 2, {3: {4, 5}}],
... 'key3': [1, 2, {3: {4, 5}}],
... },
... 'nest_dict2': {'k': [1, 2, {3: {4, 5}}]},
... 'nested_tuples': [tuple([1]), tuple([2, 3]), frozenset([4, 5, 6])],
... 'one_tup': tuple([1]),
... 'simple_dict': {'spam': 'eggs', 'ham': 'jam'},
... 'simple_list': [1, 2, 'red', 'blue'],
... 'odict': ub.odict([(1, '1'), (2, '2')]),
... }
>>> result = repr2(dict_, nl=3, precision=2); print(result)
>>> result = repr2(dict_, nl=2, precision=2); print(result)
>>> result = repr2(dict_, nl=1, precision=2); print(result)
>>> result = repr2(dict_, nl=1, precision=2, itemsep='', explicit=True); print(result)
>>> result = repr2(dict_, nl=1, precision=2, nobr=1, itemsep='', explicit=True); print(result)
>>> result = repr2(dict_, nl=3, precision=2, cbr=True); print(result)
>>> result = repr2(dict_, nl=3, precision=2, si=True); print(result)
>>> result = repr2(dict_, nl=3, sort=True); print(result)
>>> result = repr2(dict_, nl=3, sort=False, trailing_sep=False); print(result)
>>> result = repr2(dict_, nl=3, sort=False, trailing_sep=False, nobr=True); print(result)
Example:
>>> from ubelt.util_format import *
>>> def _nest(d, w):
... if d == 0:
... return {}
... else:
... return {'n{}'.format(d): _nest(d - 1, w + 1), 'm{}'.format(d): _nest(d - 1, w + 1)}
>>> dict_ = _nest(d=4, w=1)
>>> result = repr2(dict_, nl=6, precision=2, cbr=1)
>>> print('---')
>>> print(result)
>>> result = repr2(dict_, nl=-1, precision=2)
>>> print('---')
>>> print(result)
"""
custom_extensions = kwargs.get('extensions', None)
_return_info = kwargs.get('_return_info', False)
kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None))
outstr = None
_leaf_info = None
if custom_extensions:
func = custom_extensions.lookup(data)
if func is not None:
outstr = func(data, **kwargs)
if outstr is None:
if isinstance(data, dict):
outstr, _leaf_info = _format_dict(data, **kwargs)
elif isinstance(data, (list, tuple, set, frozenset)):
outstr, _leaf_info = _format_list(data, **kwargs)
if outstr is None:
# check any globally registered functions for special formatters
func = _FORMATTER_EXTENSIONS.lookup(data)
if func is not None:
outstr = func(data, **kwargs)
else:
outstr = _format_object(data, **kwargs)
if _return_info:
_leaf_info = _rectify_leaf_info(_leaf_info)
return outstr, _leaf_info
else:
return outstr
def _rectify_root_info(_root_info):
if _root_info is None:
_root_info = {
'depth': 0,
}
return _root_info
def _rectify_leaf_info(_leaf_info):
if _leaf_info is None:
_leaf_info = {
'max_height': 0,
'min_height': 0,
}
return _leaf_info
_FORMATTER_EXTENSIONS = FormatterExtensions()
_FORMATTER_EXTENSIONS._register_builtin_extensions()
@_FORMATTER_EXTENSIONS.lazy_init.append
def _lazy_init():
try:
# TODO: can we use lazy loading to prevent trying to import numpy until
# some attribute of _FORMATTER_EXTENSIONS is used?
_FORMATTER_EXTENSIONS._register_numpy_extensions()
# TODO: register pandas by default if available
pass
except ImportError: # nocover
pass
def _format_object(val, **kwargs):
stritems = kwargs.get('si', kwargs.get('stritems', False))
strvals = stritems or kwargs.get('sv', kwargs.get('strvals', False))
base_valfunc = six.text_type if strvals else repr
itemstr = base_valfunc(val)
# Remove unicode repr from python2 to agree with python3 output
if six.PY2 and isinstance(val, six.string_types): # nocover
if itemstr.startswith(("u'", 'u"')):
itemstr = itemstr[1:]
return itemstr
def _format_list(list_, **kwargs):
"""
Makes a pretty printable / human-readable string representation of a
sequence. In most cases this string could be evaled.
Args:
list_ (list): input list
**kwargs: nl, newlines, packed, nobr, nobraces, itemsep, trailing_sep,
strvals indent_, precision, use_numpy, with_dtype, force_dtype,
stritems, strkeys, explicit, sort, key_order, maxlen
Returns:
Tuple[str, Dict] : retstr, _leaf_info
Example:
>>> print(_format_list([])[0])
[]
>>> print(_format_list([], nobr=True)[0])
[]
>>> print(_format_list([1], nl=0)[0])
[1]
>>> print(_format_list([1], nobr=True)[0])
1,
"""
kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None))
kwargs['_root_info']['depth'] += 1
newlines = kwargs.pop('nl', kwargs.pop('newlines', 1))
kwargs['nl'] = _rectify_countdown_or_bool(newlines)
nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False))
itemsep = kwargs.get('itemsep', ' ')
compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))
# kwargs['cbr'] = _rectify_countdown_or_bool(compact_brace)
itemstrs, _leaf_info = _list_itemstrs(list_, **kwargs)
if len(itemstrs) == 0:
nobraces = False # force braces to prevent empty output
is_tuple = isinstance(list_, tuple)
is_set = isinstance(list_, (set, frozenset,))
if nobraces:
lbr, rbr = '', ''
elif is_tuple:
lbr, rbr = '(', ')'
elif is_set:
lbr, rbr = '{', '}'
else:
lbr, rbr = '[', ']'
# Doesn't actually put in trailing comma if on same line
trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0 and len(itemstrs)))
# The trailing separator is always needed for single item tuples
if is_tuple and len(list_) <= 1:
trailing_sep = True
if len(itemstrs) == 0:
newlines = False
retstr = _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces,
trailing_sep, compact_brace, lbr, rbr)
return retstr, _leaf_info
def _format_dict(dict_, **kwargs):
"""
Makes a pretty printable / human-readable string representation of a
dictionary. In most cases this string could be evaled.
Args:
dict_ (dict): a dictionary
**kwargs: si, stritems, strkeys, strvals, sk, sv, nl, newlines, nobr,
nobraces, cbr, compact_brace, trailing_sep,
explicit, itemsep, precision, kvsep, sort
Returns:
Tuple[str, Dict] : retstr, _leaf_info
Kwargs:
sort (None): if True, sorts ALL collections and subcollections,
note, collections with undefined orders (e.g. dicts, sets) are
sorted by default. (default = None)
nl (int): preferred alias for newline. can be a countdown variable
(default = None)
explicit (int): can be a countdown variable. if True, uses
dict(a=b) syntax instead of {'a': b}
nobr (bool): removes outer braces (default = False)
"""
kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None))
kwargs['_root_info']['depth'] += 1
stritems = kwargs.pop('si', kwargs.pop('stritems', False))
if stritems:
kwargs['strkeys'] = True
kwargs['strvals'] = True
kwargs['strkeys'] = kwargs.pop('sk', kwargs.pop('strkeys', False))
kwargs['strvals'] = kwargs.pop('sv', kwargs.pop('strvals', False))
newlines = kwargs.pop('nl', kwargs.pop('newlines', True))
kwargs['nl'] = _rectify_countdown_or_bool(newlines)
nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False))
compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))
# kwargs['cbr'] = _rectify_countdown_or_bool(compact_brace)
# Doesn't actually put in trailing comma if on same line
trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0))
explicit = kwargs.get('explicit', False)
itemsep = kwargs.get('itemsep', ' ')
if len(dict_) == 0:
retstr = 'dict()' if explicit else '{}'
_leaf_info = None
else:
itemstrs, _leaf_info = _dict_itemstrs(dict_, **kwargs)
if nobraces:
lbr, rbr = '', ''
elif explicit:
lbr, rbr = 'dict(', ')'
else:
lbr, rbr = '{', '}'
retstr = _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces,
trailing_sep, compact_brace, lbr, rbr)
return retstr, _leaf_info
def _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces,
trailing_sep, compact_brace, lbr, rbr):
"""
Joins string-ified items with separators newlines and container-braces.
"""
# positive newlines means start counting from the root
use_newline = newlines > 0
# negative countdown values mean start counting from the leafs
# if compact_brace < 0:
# compact_brace = (-compact_brace) >= _leaf_info['max_height']
if newlines < 0:
use_newline = (-newlines) < _leaf_info['max_height']
if use_newline:
sep = ',\n'
if nobraces:
body_str = sep.join(itemstrs)
if trailing_sep and len(itemstrs) > 0:
body_str += ','
retstr = body_str
else:
if compact_brace:
# Why must we modify the indentation below and not here?
# prefix = ''
# rest = [ub.indent(s, prefix) for s in itemstrs[1:]]
# indented = itemstrs[0:1] + rest
indented = itemstrs
else:
import ubelt as ub
prefix = ' ' * 4
indented = [ub.indent(s, prefix) for s in itemstrs]
body_str = sep.join(indented)
if trailing_sep and len(itemstrs) > 0:
body_str += ','
if compact_brace:
# Why can we modify the indentation here but not above?
braced_body_str = (lbr + body_str.replace('\n', '\n ') + rbr)
else:
braced_body_str = (lbr + '\n' + body_str + '\n' + rbr)
retstr = braced_body_str
else:
sep = ',' + itemsep
body_str = sep.join(itemstrs)
if trailing_sep and len(itemstrs) > 0:
body_str += ','
retstr = (lbr + body_str + rbr)
return retstr
def _dict_itemstrs(dict_, **kwargs):
"""
Create a string representation for each item in a dict.
Example:
>>> from ubelt.util_format import *
>>> dict_ = {'b': .1, 'l': 'st', 'g': 1.0, 's': 10, 'm': 0.9, 'w': .5}
>>> kwargs = {'strkeys': True}
>>> itemstrs, _ = _dict_itemstrs(dict_, **kwargs)
>>> char_order = [p[0] for p in itemstrs]
>>> assert char_order == ['b', 'g', 'l', 'm', 's', 'w']
"""
import ubelt as ub
explicit = kwargs.get('explicit', False)
kwargs['explicit'] = _rectify_countdown_or_bool(explicit)
precision = kwargs.get('precision', None)
kvsep = kwargs.get('kvsep', ': ')
if explicit:
kvsep = '='
def make_item_str(key, val):
if explicit or kwargs.get('strkeys', False):
key_str = six.text_type(key)
else:
key_str = repr2(key, precision=precision, newlines=0)
prefix = key_str + kvsep
kwargs['_return_info'] = True
val_str, _leaf_info = repr2(val, **kwargs)
# If the first line does not end with an open nest char
# (e.g. for ndarrays), otherwise we need to worry about
# residual indentation.
pos = val_str.find('\n')
first_line = val_str if pos == -1 else val_str[:pos]
compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))
if compact_brace or not first_line.rstrip().endswith(tuple('([{<')):
rest = '' if pos == -1 else val_str[pos:]
val_str = first_line.lstrip() + rest
if '\n' in prefix:
# Fix issue with keys that span new lines
item_str = prefix + val_str
else:
item_str = ub.hzcat([prefix, val_str])
else:
item_str = prefix + val_str
return item_str, _leaf_info
items = list(six.iteritems(dict_))
_tups = [make_item_str(key, val) for (key, val) in items]
itemstrs = [t[0] for t in _tups]
max_height = max([t[1]['max_height'] for t in _tups]) if _tups else 0
_leaf_info = {
'max_height': max_height + 1,
}
sort = kwargs.get('sort', None)
if sort is None:
# Force ordering on unordered dicts
sort = True
if isinstance(dict_, collections.OrderedDict):
# never sort ordered dicts; they are perfect just the way they are!
sort = False
if sort:
itemstrs = _sort_itemstrs(items, itemstrs)
return itemstrs, _leaf_info
def _list_itemstrs(list_, **kwargs):
"""
Create a string representation for each item in a list.
"""
items = list(list_)
kwargs['_return_info'] = True
_tups = [repr2(item, **kwargs) for item in items]
itemstrs = [t[0] for t in _tups]
max_height = max([t[1]['max_height'] for t in _tups]) if _tups else 0
_leaf_info = {
'max_height': max_height + 1,
}
sort = kwargs.get('sort', None)
if sort is None:
# Force orderings on sets.
sort = isinstance(list_, (set, frozenset))
if sort:
itemstrs = _sort_itemstrs(items, itemstrs)
return itemstrs, _leaf_info
def _sort_itemstrs(items, itemstrs):
"""
Equivalent to `sorted(items)` except if `items` are unorderable, then
string values are used to define an ordering.
"""
# First try to sort items by their normal values
# If that doesnt work, then sort by their string values
import ubelt as ub
try:
# Set ordering is not unique. Sort by strings values instead.
if _peek_isinstance(items, (set, frozenset)):
raise TypeError
sortx = ub.argsort(items)
except TypeError:
sortx = ub.argsort(itemstrs)
itemstrs = [itemstrs[x] for x in sortx]
return itemstrs
def _peek_isinstance(items, types):
return len(items) > 0 and isinstance(items[0], types)
def _rectify_countdown_or_bool(count_or_bool):
"""
used by recursive functions to specify which level to turn a bool on in
counting down yields True, True, ..., False
counting up yields False, False, False, ... True
Args:
count_or_bool (bool or int): if positive and an integer, it will count
down, otherwise it will remain the same.
Returns:
int or bool: count_or_bool_
CommandLine:
python -m utool.util_str --test-_rectify_countdown_or_bool
Example:
>>> from ubelt.util_format import _rectify_countdown_or_bool # NOQA
>>> count_or_bool = True
>>> a1 = (_rectify_countdown_or_bool(2))
>>> a2 = (_rectify_countdown_or_bool(1))
>>> a3 = (_rectify_countdown_or_bool(0))
>>> a4 = (_rectify_countdown_or_bool(-1))
>>> a5 = (_rectify_countdown_or_bool(-2))
>>> a6 = (_rectify_countdown_or_bool(True))
>>> a7 = (_rectify_countdown_or_bool(False))
>>> a8 = (_rectify_countdown_or_bool(None))
>>> result = [a1, a2, a3, a4, a5, a6, a7, a8]
>>> print(result)
[1, 0, 0, -1, -2, True, False, False]
"""
if count_or_bool is True or count_or_bool is False:
count_or_bool_ = count_or_bool
elif isinstance(count_or_bool, int):
if count_or_bool == 0:
return 0
elif count_or_bool > 0:
count_or_bool_ = count_or_bool - 1
else:
# We dont countup negatives anymore
count_or_bool_ = count_or_bool
else:
count_or_bool_ = False
return count_or_bool_