Skip to content

Commit

Permalink
CLN: move common printing utilties to pandas.io.formats.printing (pan…
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored May 29, 2018
1 parent b64e9d5 commit b2eec25
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 117 deletions.
123 changes: 6 additions & 117 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,11 @@
import pandas.core.missing as missing
import pandas.core.algorithms as algos
import pandas.core.sorting as sorting
from pandas.io.formats.printing import pprint_thing
from pandas.io.formats.printing import (
pprint_thing, default_pprint, format_object_summary, format_object_attrs)
from pandas.core.ops import make_invalid_op
from pandas.core.config import get_option
from pandas.core.strings import StringMethods


# simplify
default_pprint = lambda x, max_seq_items=None: \
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
max_seq_items=max_seq_items)

__all__ = ['Index']

_unsortable_types = frozenset(('mixed', 'mixed-integer'))
Expand Down Expand Up @@ -1034,133 +1028,28 @@ def _format_space(self):
@property
def _formatter_func(self):
"""
Return the formatted data as a unicode string
Return the formatter function
"""
return default_pprint

def _format_data(self, name=None):
"""
Return the formatted data as a unicode string
"""
from pandas.io.formats.console import get_console_size
from pandas.io.formats.format import _get_adjustment
display_width, _ = get_console_size()
if display_width is None:
display_width = get_option('display.width') or 80
if name is None:
name = self.__class__.__name__

space1 = "\n%s" % (' ' * (len(name) + 1))
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(self)
sep = ','
max_seq_items = get_option('display.max_seq_items') or n
formatter = self._formatter_func

# do we want to justify (only do so for non-objects)
is_justify = not (self.inferred_type in ('string', 'unicode') or
(self.inferred_type == 'categorical' and
is_object_dtype(self.categories)))

# are we a truncated display
is_truncated = n > max_seq_items

# adj can optionally handle unicode eastern asian width
adj = _get_adjustment()

def _extend_line(s, line, value, display_width, next_line_prefix):

if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
display_width):
s += line.rstrip()
line = next_line_prefix
line += value
return s, line

def best_len(values):
if values:
return max(adj.len(x) for x in values)
else:
return 0

if n == 0:
summary = '[], '
elif n == 1:
first = formatter(self[0])
summary = '[%s], ' % first
elif n == 2:
first = formatter(self[0])
last = formatter(self[-1])
summary = '[%s, %s], ' % (first, last)
else:

if n > max_seq_items:
n = min(max_seq_items // 2, 10)
head = [formatter(x) for x in self[:n]]
tail = [formatter(x) for x in self[-n:]]
else:
head = []
tail = [formatter(x) for x in self]

# adjust all values to max length if needed
if is_justify:

# however, if we are not truncated and we are only a single
# line, then don't justify
if (is_truncated or
not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_len = max(best_len(head), best_len(tail))
head = [x.rjust(max_len) for x in head]
tail = [x.rjust(max_len) for x in tail]

summary = ""
line = space2

for i in range(len(head)):
word = head[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

if is_truncated:
# remove trailing space of last line
summary += line.rstrip() + space2 + '...'
line = space2

for i in range(len(tail) - 1):
word = tail[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

# last value: no sep added + 1 space of width used for trailing ','
summary, line = _extend_line(summary, line, tail[-1],
display_width - 2, space2)
summary += line
summary += '],'

if len(summary) > (display_width):
summary += space1
else: # one row
summary += ' '

# remove initial space
summary = '[' + summary[len(space2):]

return summary
return format_object_summary(self, self._formatter_func,
is_justify=is_justify, name=name)

def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
attrs = []
attrs.append(('dtype', "'%s'" % self.dtype))
if self.name is not None:
attrs.append(('name', default_pprint(self.name)))
max_seq_items = get_option('display.max_seq_items') or len(self)
if len(self) > max_seq_items:
attrs.append(('length', len(self)))
return attrs
return format_object_attrs(self)

def to_series(self, index=None, name=None):
"""
Expand Down
154 changes: 154 additions & 0 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,157 @@ class TableSchemaFormatter(BaseFormatter):
# unregister tableschema mime-type
if mimetype in formatters:
formatters[mimetype].enabled = False


default_pprint = lambda x, max_seq_items=None: \
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
max_seq_items=max_seq_items)


def format_object_summary(obj, formatter, is_justify=True, name=None):
"""
Return the formatted obj as a unicode string
Parameters
----------
obj : object
must be iterable and support __getitem__
formatter : callable
string formatter for an element
is_justify : boolean
should justify the display
name : name, optiona
defaults to the class name of the obj
Returns
-------
summary string
"""
from pandas.io.formats.console import get_console_size
from pandas.io.formats.format import _get_adjustment

display_width, _ = get_console_size()
if display_width is None:
display_width = get_option('display.width') or 80
if name is None:
name = obj.__class__.__name__

space1 = "\n%s" % (' ' * (len(name) + 1))
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(obj)
sep = ','
max_seq_items = get_option('display.max_seq_items') or n

# are we a truncated display
is_truncated = n > max_seq_items

# adj can optionally handle unicode eastern asian width
adj = _get_adjustment()

def _extend_line(s, line, value, display_width, next_line_prefix):

if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
display_width):
s += line.rstrip()
line = next_line_prefix
line += value
return s, line

def best_len(values):
if values:
return max(adj.len(x) for x in values)
else:
return 0

if n == 0:
summary = '[], '
elif n == 1:
first = formatter(obj[0])
summary = '[%s], ' % first
elif n == 2:
first = formatter(obj[0])
last = formatter(obj[-1])
summary = '[%s, %s], ' % (first, last)
else:

if n > max_seq_items:
n = min(max_seq_items // 2, 10)
head = [formatter(x) for x in obj[:n]]
tail = [formatter(x) for x in obj[-n:]]
else:
head = []
tail = [formatter(x) for x in obj]

# adjust all values to max length if needed
if is_justify:

# however, if we are not truncated and we are only a single
# line, then don't justify
if (is_truncated or
not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_len = max(best_len(head), best_len(tail))
head = [x.rjust(max_len) for x in head]
tail = [x.rjust(max_len) for x in tail]

summary = ""
line = space2

for i in range(len(head)):
word = head[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

if is_truncated:
# remove trailing space of last line
summary += line.rstrip() + space2 + '...'
line = space2

for i in range(len(tail) - 1):
word = tail[i] + sep + ' '
summary, line = _extend_line(summary, line, word,
display_width, space2)

# last value: no sep added + 1 space of width used for trailing ','
summary, line = _extend_line(summary, line, tail[-1],
display_width - 2, space2)
summary += line
summary += '],'

if len(summary) > (display_width):
summary += space1
else: # one row
summary += ' '

# remove initial space
summary = '[' + summary[len(space2):]

return summary


def format_object_attrs(obj):
"""
Return a list of tuples of the (attr, formatted_value)
for common attrs, including dtype, name, length
Parameters
----------
obj : object
must be iterable
Returns
-------
list
"""
attrs = []
if hasattr(obj, 'dtype'):
attrs.append(('dtype', "'{}'".format(obj.dtype)))
if getattr(obj, 'name', None) is not None:
attrs.append(('name', default_pprint(obj.name)))
max_seq_items = get_option('display.max_seq_items') or len(obj)
if len(obj) > max_seq_items:
attrs.append(('length', len(obj)))
return attrs

0 comments on commit b2eec25

Please sign in to comment.