Source code for ubelt.util_format

import re
import six
import math
import collections

# __all__ = [
#     'repr2',
#     '_format_list',
#     '_format_dict',
# ]


[docs]def repr2(val, **kwargs):
    """
    Constructs a "pretty" string representation.

    This is an alternative to repr, and `pprint.pformat` that attempts to be
    both more configurable and generate output that is consistent between
    python versions.

    Args:
        val (object): an arbitrary python object
        **kwargs: si, stritems, strkeys, strvals, sk, sv, nl, newlines, nobr,
                  nobraces, cbr, compact_brace, trailsep, trailing_sep,
                  explicit, itemsep, precision, kvsep, sort

    Returns:
        str: output string

    CommandLine:
        python -m ubelt.util_format repr2:0
        python -m ubelt.util_format repr2:1

    Example:
        >>> from ubelt.util_format import *
        >>> import ubelt as ub
        >>> dict_ = {
        ...     'custom_types': [slice(0, 1, None), 1/3],
        ...     'nest_dict': {'k1': [1, 2, {3: {4, 5}}],
        ...                   'key2': [1, 2, {3: {4, 5}}],
        ...                   'key3': [1, 2, {3: {4, 5}}],
        ...                   },
        ...     'nest_dict2': {'k': [1, 2, {3: {4, 5}}]},
        ...     'nested_tuples': [tuple([1]), tuple([2, 3]), frozenset([4, 5, 6])],
        ...     'one_tup': tuple([1]),
        ...     'simple_dict': {'spam': 'eggs', 'ham': 'jam'},
        ...     'simple_list': [1, 2, 'red', 'blue'],
        ...     'odict': ub.odict([(1, '1'), (2, '2')]),
        ... }
        >>> result = repr2(dict_, nl=3, precision=2); print(result)
        >>> result = repr2(dict_, nl=2, precision=2); print(result)
        >>> result = repr2(dict_, nl=1, precision=2); print(result)
        >>> result = repr2(dict_, nl=1, precision=2, itemsep='', explicit=True); print(result)
        >>> result = repr2(dict_, nl=1, precision=2, nobr=1, itemsep='', explicit=True); print(result)
        >>> result = repr2(dict_, nl=3, precision=2, cbr=True); print(result)
        >>> result = repr2(dict_, nl=3, precision=2, si=True); print(result)
        >>> result = repr2(dict_, nl=3, sort=True); print(result)
        >>> result = repr2(dict_, nl=3, sort=False, trailing_sep=False); print(result)
        >>> result = repr2(dict_, nl=3, sort=False, trailing_sep=False, nobr=True); print(result)

    Example:
        >>> from ubelt.util_format import *
        >>> def _nest(d, w):
        ...     if d == 0:
        ...         return {}
        ...     else:
        ...         return {'n{}'.format(d): _nest(d - 1, w + 1), 'm{}'.format(d): _nest(d - 1, w + 1)}
        >>> dict_ = _nest(d=4, w=1)
        >>> result = repr2(dict_, nl=6, precision=2, cbr=1)
        >>> print('---')
        >>> print(result)
    """
    if isinstance(val, dict):
        return _format_dict(val, **kwargs)
    elif isinstance(val, (list, tuple, set, frozenset)):
        return _format_list(val, **kwargs)
    # check any registered functions for special formatters
    func = _FORMATTER_EXTENSIONS.lookup(val)
    for type, func in _FORMATTER_EXTENSIONS.func_registry.items():
        if isinstance(val, type):
            return func(val, **kwargs)
    # base case
    return _format_object(val, **kwargs)


class _FormatterExtensions(object):
    """
    Singleton helper class for managing non-builtin (e.g. numpy) format types
    """
    # set_types = [set, frozenset]
    # list_types = [list, tuple]
    # dict_types = [dict]
    # custom_types = {
    #     'numpy': [],
    #     'pandas': [],
    # }
    # @classmethod
    # def sequence_types(cls):
    #     return cls.list_types + cls.set_types

    def __init__(self):
        self.func_registry = {}

    def register(self, type):
        """
        Registers a custom formatting function with ub.repr2
        """
        def _decorator(func):
            self.func_registry[type] = func
            return func
        return _decorator

    def lookup(self, data):
        """
        Returns an appropriate function to format `data` if one has been
        registered.
        """
        for type, func in _FORMATTER_EXTENSIONS.func_registry.items():
            if isinstance(data, type):
                return func

    def _register_numpy_extensions(self):
        """
        CommandLine:
            python -m ubelt.util_format _FormatterExtensions._register_numpy_extensions

        Example:
            >>> import sys
            >>> import pytest
            >>> if 'numpy' not in sys.modules:
            ...     raise pytest.skip()
            >>> # xdoctest: +IGNORE_WHITESPACE
            >>> import ubelt as ub
            >>> data = np.array([[.2, 42, 5], [21.2, 3, .4]])
            >>> print(ub.repr2(data))
            np.array([[ 0.2, 42. ,  5. ],
                      [21.2,  3. ,  0.4]], dtype=np.float64)
            >>> print(ub.repr2(data, with_dtype=False))
            np.array([[ 0.2, 42. ,  5. ],
                      [21.2,  3. ,  0.4]])
            >>> print(ub.repr2(data, strvals=True))
            [[ 0.2, 42. ,  5. ],
             [21.2,  3. ,  0.4]]
            >>> data = np.empty((0, 10), dtype=np.float64)
            >>> print(ub.repr2(data, strvals=False))
            np.empty((0, 10), dtype=np.float64)
            >>> print(ub.repr2(data, strvals=True))
            []
            >>> data = np.ma.empty((0, 10), dtype=np.float64)
            >>> print(ub.repr2(data, strvals=False))
            np.ma.empty((0, 10), dtype=np.float64)
        """
        @self.register(np.ndarray)
        def format_ndarray(data, **kwargs):
            strvals = kwargs.get('strvals', False)
            itemsep = kwargs.get('itemsep', ' ')
            precision = kwargs.get('precision', None)
            suppress_small = kwargs.get('supress_small', None)
            max_line_width = kwargs.get('max_line_width', None)
            with_dtype = kwargs.get('with_dtype', not strvals)
            newlines = kwargs.pop('nl', kwargs.pop('newlines', 1))

            # if with_dtype and strvals:
            #     raise ValueError('cannot format with strvals and dtype')

            separator = ',' + itemsep

            if strvals:
                prefix = ''
                suffix = ''
            else:
                modname = type(data).__module__
                # substitute shorthand for numpy module names
                np_nice = 'np'
                modname = re.sub('\\bnumpy\\b', np_nice, modname)
                modname = re.sub('\\bma.core\\b', 'ma', modname)

                class_name = type(data).__name__
                if class_name == 'ndarray':
                    class_name = 'array'

                prefix = modname + '.' + class_name + '('

                if with_dtype:
                    dtype_repr = data.dtype.name
                    # dtype_repr = np.core.arrayprint.dtype_short_repr(data.dtype)
                    suffix = ',{}dtype={}.{})'.format(itemsep, np_nice, dtype_repr)
                else:
                    suffix = ')'

            if not strvals and data.size == 0 and data.shape != (0,):
                # Special case for displaying empty data
                prefix = modname + '.empty('
                body = repr(tuple(map(int, data.shape)))
            else:
                body = np.array2string(data, precision=precision,
                                       separator=separator,
                                       suppress_small=suppress_small,
                                       prefix=prefix,
                                       max_line_width=max_line_width)
            if not newlines:
                # remove newlines if we need to
                body = re.sub('\n *', '', body)
            formatted = prefix + body + suffix
            return formatted

        # Hack, make sure we also register numpy floats
        self.register(np.float32)(self.func_registry[float])

    def _register_builtin_extensions(self):
        @self.register(float)
        def format_float(data, **kwargs):
            precision = kwargs.get('precision', None)
            if precision is None:
                return six.text_type(data)
            else:
                return ('{:.%df}' % precision).format(data)

        @self.register(slice)
        def format_slice(data, **kwargs):
            if kwargs.get('itemsep', ' ') == '':
                return 'slice(%r,%r,%r)' % (data.start, data.stop, data.step)
            else:
                return _format_object(data, **kwargs)

_FORMATTER_EXTENSIONS = _FormatterExtensions()
_FORMATTER_EXTENSIONS._register_builtin_extensions()
try:
    import numpy as np
    _FORMATTER_EXTENSIONS._register_numpy_extensions()
    # TODO: register pandas by default if available
except ImportError:  # nocover
    pass


def _format_object(val, **kwargs):
    stritems = kwargs.get('si', kwargs.get('stritems', False))
    strvals = stritems or kwargs.get('sv', kwargs.get('strvals', False))
    base_valfunc = six.text_type if strvals else repr

    itemstr = base_valfunc(val)

    # Remove unicode repr from python2 to agree with python3 output
    if six.PY2 and isinstance(val, six.string_types):  # nocover
        if itemstr.startswith(("u'", 'u"')):
            itemstr = itemstr[1:]
    return itemstr


def _format_list(list_, **kwargs):
    r"""
    Makes a pretty printable / human-readable string representation of a
    sequence. In most cases this string could be evaled.

    Args:
        list_ (list): input list
        **kwargs: nl, newlines, packed, nobr, nobraces, itemsep, trailing_sep,
            strvals indent_, precision, use_numpy, with_dtype, force_dtype,
            stritems, strkeys, align, explicit, sort, key_order, maxlen

    Returns:
        str: retstr

    Example:
        >>> print(_format_list([]))
        []
        >>> print(_format_list([], nobr=True))
        []
        >>> print(_format_list([1], nl=0))
        [1]
        >>> print(_format_list([1], nobr=True))
        1,
    """
    newlines = kwargs.pop('nl', kwargs.pop('newlines', 1))
    kwargs['nl'] = _rectify_countdown_or_bool(newlines)

    nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False))

    itemsep = kwargs.get('itemsep', ' ')
    # Doesn't actually put in trailing comma if on same line
    compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))

    itemstrs = _list_itemstrs(list_, **kwargs)
    if len(itemstrs) == 0:
        nobraces = False  # force braces to prevent empty output

    is_tuple = isinstance(list_, tuple)
    is_set = isinstance(list_, (set, frozenset,))
    if nobraces:
        lbr, rbr = '', ''
    elif is_tuple:
        lbr, rbr  = '(', ')'
    elif is_set:
        lbr, rbr  = '{', '}'
    else:
        lbr, rbr  = '[', ']'

    trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0 and len(itemstrs)))

    # The trailing separator is always needed for single item tuples
    if is_tuple and len(list_) <= 1:
        trailing_sep = True

    if len(itemstrs) == 0:
        newlines = False

    retstr = _join_itemstrs(itemstrs, itemsep, newlines, nobraces,
                            trailing_sep, compact_brace, lbr, rbr)
    return retstr


def _format_dict(dict_, **kwargs):
    r"""
    Makes a pretty printable / human-readable string representation of a
    dictionary. In most cases this string could be evaled.

    Args:
        dict_ (dict_):  a dictionary
        **kwargs: si, stritems, strkeys, strvals, sk, sv, nl, newlines, nobr,
                  nobraces, cbr, compact_brace, trailing_sep,
                  explicit, itemsep, precision, kvsep, sort

    Kwargs:
        sort (None): if True, sorts ALL collectsions and subcollections,
            note, collections with undefined orders (e.g. dicts, sets) are
            sorted by default. (default = None)
        nl (int): prefered alias for newline. can be a countdown variable
            (default = None)
        explicit (int): can be a countdown variable. if True, uses
            dict(a=b) syntax instead of {'a': b}
        nobr (bool): removes outer braces (default = False)
    """
    stritems = kwargs.pop('si', kwargs.pop('stritems', False))
    if stritems:
        kwargs['strkeys'] = True
        kwargs['strvals'] = True

    kwargs['strkeys'] = kwargs.pop('sk', kwargs.pop('strkeys', False))
    kwargs['strvals'] = kwargs.pop('sv', kwargs.pop('strvals', False))

    newlines = kwargs.pop('nl', kwargs.pop('newlines', True))
    kwargs['nl'] = _rectify_countdown_or_bool(newlines)

    nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False))

    # Doesn't actually put in trailing comma if on same line
    compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))
    trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0))
    explicit = kwargs.get('explicit', False)
    itemsep = kwargs.get('itemsep', ' ')

    if len(dict_) == 0:
        return 'dict()' if explicit else '{}'

    itemstrs = _dict_itemstrs(dict_, **kwargs)

    if nobraces:
        lbr, rbr = '', ''
    elif explicit:
        lbr, rbr = 'dict(', ')'
    else:
        lbr, rbr = '{', '}'

    retstr = _join_itemstrs(itemstrs, itemsep, newlines, nobraces,
                            trailing_sep, compact_brace, lbr, rbr)
    return retstr


def _join_itemstrs(itemstrs, itemsep, newlines, nobraces, trailing_sep,
                   compact_brace, lbr, rbr):
    """
    Joins stringified items with separators newlines and container-braces.
    """
    import ubelt as ub

    if newlines > 0:
        sep = ',\n'
        if nobraces:
            body_str = sep.join(itemstrs)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            retstr = body_str
        else:
            if compact_brace:
                # Why must we modify the indentation below and not here?
                # prefix = ''
                # rest = [ub.indent(s, prefix) for s in itemstrs[1:]]
                # indented = itemstrs[0:1] + rest
                indented = itemstrs
            else:
                prefix = ' ' * 4
                indented = [ub.indent(s, prefix) for s in itemstrs]

            body_str = sep.join(indented)
            if trailing_sep and len(itemstrs) > 0:
                body_str += ','
            if compact_brace:
                # Why can we modify the indentation here but not above?
                braced_body_str = (lbr + body_str.replace('\n', '\n ') + rbr)
            else:
                braced_body_str = (lbr + '\n' + body_str + '\n' + rbr)
            retstr = braced_body_str
    else:
        sep = ',' + itemsep
        body_str = sep.join(itemstrs)
        if trailing_sep and len(itemstrs) > 0:
            body_str += ','
        retstr  = (lbr + body_str +  rbr)
    return retstr


def _dict_itemstrs(dict_, **kwargs):
    """
    Create a string representation for each item in a dict.

    Example:
        >>> from ubelt.util_format import *
        >>> dict_ =  {'b': .1, 'l': 'st', 'g': 1.0, 's': 10, 'm': 0.9, 'w': .5}
        >>> kwargs = {'strkeys': True}
        >>> itemstrs = _dict_itemstrs(dict_, **kwargs)
        >>> char_order = [p[0] for p in itemstrs]
        >>> assert char_order == ['b', 'g', 'l', 'm', 's', 'w']
    """
    import ubelt as ub
    explicit = kwargs.get('explicit', False)
    kwargs['explicit'] = _rectify_countdown_or_bool(explicit)
    precision = kwargs.get('precision', None)
    kvsep = kwargs.get('kvsep', ': ')
    if explicit:
        kvsep = '='

    def make_item_str(key, val):
        if explicit or kwargs.get('strkeys', False):
            key_str = six.text_type(key)
        else:
            key_str = repr2(key, precision=precision)

        prefix = key_str + kvsep
        val_str = repr2(val, **kwargs)

        # If the first line does not end with an open nest char
        # (e.g. for ndarrays), otherwise we need to worry about
        # residual indentation.
        pos = val_str.find('\n')
        first_line = val_str if pos == -1 else val_str[:pos]

        compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False))

        if compact_brace or not first_line.rstrip().endswith(tuple('([{<')):
            rest = '' if pos == -1 else val_str[pos:]
            val_str = first_line.lstrip() + rest
            # Fix issue with tuple keys (keys that span new lines)
            if '\n' not in prefix:
                item_str = ub.hzcat([prefix, val_str])
            else:
                item_str = prefix + val_str
        else:
            item_str = prefix + val_str
        return item_str

    items = list(six.iteritems(dict_))
    itemstrs = [make_item_str(key, val) for (key, val) in items]

    sort = kwargs.get('sort', None)
    if sort is None:
        # Force ordering on unordered dicts
        sort = True
    if isinstance(dict_, collections.OrderedDict):
        # never sort ordered dicts; they are perfect just the way they are!
        sort = False
    if sort:
        itemstrs = _sort_itemstrs(items, itemstrs)
    return itemstrs


def _list_itemstrs(list_, **kwargs):
    """
    Create a string representation for each item in a list.
    """
    items = list(list_)
    itemstrs = [repr2(item, **kwargs) for item in items]

    sort = kwargs.get('sort', None)
    if sort is None:
        # Force orderings on sets.
        sort = isinstance(list_, (set, frozenset))
    if sort:
        itemstrs = _sort_itemstrs(items, itemstrs)
    return itemstrs


def _sort_itemstrs(items, itemstrs):
    """
    Equivalent to `sorted(items)` except if `items` are unorderable, then
    string values are used to define an ordering.
    """
    # First try to sort items by their normal values
    # If that doesnt work, then sort by their string values
    import ubelt as ub
    try:
        # Set ordering is not unique. Sort by strings values instead.
        if _peek_isinstance(items, (set, frozenset)):
            raise TypeError
        sortx = ub.argsort(items)
    except TypeError:
        sortx = ub.argsort(itemstrs)
    itemstrs = list(ub.take(itemstrs, sortx))
    return itemstrs


def _peek_isinstance(items, types):
    return len(items) > 0 and isinstance(items[0], types)


def _rectify_countdown_or_bool(count_or_bool):
    """
    used by recrusive functions to specify which level to turn a bool on in
    counting down yeilds True, True, ..., False
    conting up yeilds False, False, False, ... True

    Args:
        count_or_bool (bool or int): if positive will count down, if negative
            will count up, if bool will remain same

    Returns:
        int or bool: count_or_bool_

    CommandLine:
        python -m utool.util_str --test-_rectify_countdown_or_bool

    Example:
        >>> from ubelt.util_format import _rectify_countdown_or_bool  # NOQA
        >>> count_or_bool = True
        >>> a1 = (_rectify_countdown_or_bool(2))
        >>> a2 = (_rectify_countdown_or_bool(1))
        >>> a3 = (_rectify_countdown_or_bool(0))
        >>> a4 = (_rectify_countdown_or_bool(-1))
        >>> a5 = (_rectify_countdown_or_bool(-2))
        >>> a6 = (_rectify_countdown_or_bool(True))
        >>> a7 = (_rectify_countdown_or_bool(False))
        >>> a8 = (_rectify_countdown_or_bool(None))
        >>> result = [a1, a2, a3, a4, a5, a6, a7, a8]
        >>> print(result)
        [1, 0, 0, 0, -1, True, False, False]
    """
    if count_or_bool is True or count_or_bool is False:
        count_or_bool_ = count_or_bool
    elif isinstance(count_or_bool, int):
        if count_or_bool == 0:
            return 0
        sign_ =  math.copysign(1, count_or_bool)
        count_or_bool_ = int(count_or_bool - sign_)
    else:
        count_or_bool_ = False
    return count_or_bool_


if __name__ == '__main__':
    r"""
    CommandLine:
        python -m ubelt.util_format
    """
    import xdoctest as xdoc
    xdoc.doctest_module()