# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import, unicode_literals
import sys
import codecs
import unicodedata
import textwrap
from six.moves import cStringIO
import six
__all__ = [
'CaptureStdout',
'indent',
'codeblock',
'hzcat',
'ensure_unicode',
]
[docs]class CaptureStdout(object):
r"""
Context manager that captures stdout and stores it in an internal stream
Args:
enabled (bool): (default = True)
CommandLine:
python -m ubelt.util_str CaptureStdout
Notes:
use version in xdoctest?
Example:
>>> from ubelt.util_str import * # NOQA
>>> self = CaptureStdout(enabled=True)
>>> print('dont capture the table flip (╯°□°)╯︵ ┻━┻')
>>> with self:
>>> print('capture the heart ♥')
>>> print('dont capture look of disapproval ಠ_ಠ')
>>> assert isinstance(self.text, six.text_type)
>>> assert self.text == 'capture the heart ♥\n', 'failed capture text'
"""
def __init__(self, enabled=True):
self.enabled = enabled
self.orig_stdout = sys.stdout
self.cap_stdout = cStringIO()
if six.PY2:
# http://stackoverflow.com/questions/1817695/stringio-accept-utf8
codecinfo = codecs.lookup('utf8')
self.cap_stdout = codecs.StreamReaderWriter(
self.cap_stdout, codecinfo.streamreader,
codecinfo.streamwriter)
self.text = None
def __enter__(self):
if self.enabled:
sys.stdout = self.cap_stdout
return self
def __exit__(self, type_, value, trace):
if self.enabled:
try:
self.cap_stdout.seek(0)
self.text = self.cap_stdout.read()
if six.PY2:
self.text = self.text.decode('utf8')
except Exception: # nocover
pass
finally:
self.cap_stdout.close()
sys.stdout = self.orig_stdout
if trace is not None:
return False # return a falsey value on error
[docs]def indent(text, prefix=' '):
r"""
Indents a block of text
Args:
text (str): text to indent
prefix (str): prefix to add to each line (default = ' ')
Returns:
str: indented text
CommandLine:
python -m util_str indent
Example:
>>> from ubelt.util_str import * # NOQA
>>> text = 'Lorem ipsum\ndolor sit amet'
>>> prefix = ' '
>>> result = indent(text, prefix)
>>> assert all(t.startswith(prefix) for t in result.split('\n'))
"""
return prefix + text.replace('\n', '\n' + prefix)
[docs]def codeblock(block_str):
r"""
Wraps multiline string blocks and returns unindented code.
Useful for templated code defined in indented parts of code.
Args:
block_str (str): typically in the form of a multiline string
Returns:
str: the unindented string
CommandLine:
python -m ubelt.util_str codeblock
Example:
>>> from ubelt.util_str import * # NOQA
>>> # Simulate an indented part of code
>>> if True:
>>> # notice the indentation on this will be normal
>>> codeblock_version = codeblock(
... '''
... def foo():
... return 'bar'
... '''
... )
>>> # notice the indentation and newlines on this will be odd
>>> normal_version = ('''
... def foo():
... return 'bar'
... ''')
>>> assert normal_version != codeblock_version
>>> print('Without codeblock')
>>> print(normal_version)
>>> print('With codeblock')
>>> print(codeblock_version)
"""
return textwrap.dedent(block_str).strip('\n')
[docs]def hzcat(args, sep=''):
"""
Horizontally concatenates strings preserving indentation
Concats a list of objects ensuring that the next item in the list
is all the way to the right of any previous items.
Args:
args (list): strings to concat
sep (str): separator (defaults to '')
CommandLine:
python -m ubelt.util_str hzcat
Example1:
>>> import ubelt as ub
>>> B = ub.repr2([[1, 2], [3, 457]], nl=1, cbr=True, trailsep=False)
>>> C = ub.repr2([[5, 6], [7, 8]], nl=1, cbr=True, trailsep=False)
>>> args = ['A = ', B, ' * ', C]
>>> print(ub.hzcat(args))
A = [[1, 2], * [[5, 6],
[3, 457]] [7, 8]]
Example2:
>>> from ubelt.util_str import *
>>> import ubelt as ub
>>> aa = unicodedata.normalize('NFD', 'á') # a unicode char with len2
>>> B = ub.repr2([['θ', aa], [aa, aa, aa]], nl=1, si=True, cbr=True, trailsep=False)
>>> C = ub.repr2([[5, 6], [7, 'θ']], nl=1, si=True, cbr=True, trailsep=False)
>>> args = ['A', '=', B, '*', C]
>>> print(ub.hzcat(args, sep='|'))
A|=|[[θ, á], |*|[[5, 6],
| | [á, á, á]]| | [7, θ]]
"""
# TODO: ensure unicode data works correctly for python2
args = [unicodedata.normalize('NFC', ensure_unicode(val)) for val in args]
arglines = [a.split('\n') for a in args]
height = max(map(len, arglines))
# Do vertical padding
arglines = [lines + [''] * (height - len(lines)) for lines in arglines]
# Initialize output
all_lines = ['' for _ in range(height)]
width = 0
n_args = len(args)
for sx, lines in enumerate(arglines):
# Concatenate the new string
for lx, line in enumerate(lines):
all_lines[lx] += line
# Find the new maximum horiztonal width
width = max(width, max(map(len, all_lines)))
if sx < n_args - 1:
# Horizontal padding on all but last iter
for lx, line in list(enumerate(all_lines)):
residual = width - len(line)
all_lines[lx] = line + (' ' * residual) + sep
width += len(sep)
# Clean up trailing whitespace
all_lines = [line.rstrip(' ') for line in all_lines]
ret = '\n'.join(all_lines)
return ret
[docs]def ensure_unicode(text):
r"""
Casts bytes into utf8 (mostly for python2 compatibility)
References:
http://stackoverflow.com/questions/12561063/python-extract-data-from-file
Example:
>>> from ubelt.util_str import *
>>> assert ensure_unicode('my ünicôdé strįng') == 'my ünicôdé strįng'
>>> assert ensure_unicode('text1') == 'text1'
>>> assert ensure_unicode('text1'.encode('utf8')) == 'text1'
>>> assert ensure_unicode('text1'.encode('utf8')) == 'text1'
>>> assert (codecs.BOM_UTF8 + 'text»¿'.encode('utf8')).decode('utf8')
"""
if isinstance(text, six.text_type):
return text
elif isinstance(text, six.binary_type):
return text.decode('utf8')
else: # nocover
raise ValueError('unknown input type {!r}'.format(text))
# if something with the above code goes wrong, refer to this
# except UnicodeDecodeError:
# if text.startswith(codecs.BOM_UTF8):
# # Can safely remove the utf8 marker
# text = text[len(codecs.BOM_UTF8):]
# return text.decode('utf-8')
if __name__ == '__main__':
r"""
CommandLine:
python -m ubelt.util_str
python -m ubelt.util_str all
"""
import xdoctest as xdoc
xdoc.doctest_module()