Skip to content

Commit

Permalink
add info method to dataset (#1176)
Browse files Browse the repository at this point in the history
add info method to dataset
  • Loading branch information
Joe Hamman authored Dec 23, 2016
1 parent bb12c69 commit 8192190
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ install:
- python setup.py install

script:
- py.test xarray --cov=xarray --cov-report term-missing
- py.test xarray --cov=xarray --cov-report term-missing --verbose

after_success:
- coveralls
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ Dataset methods
Dataset.load
Dataset.chunk
Dataset.filter_by_attrs
Dataset.info

DataArray methods
-----------------
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ Enhancements
plots (:issue:`897`). See :ref:`plotting.figsize` for more details.
By `Stephan Hoyer <https://github.com/shoyer>`_ and
`Fabien Maussion <https://github.com/fmaussion>`_.
- New :py:meth:`~Dataset.info` method to summarize ``Dataset`` variables
and attributes. The method prints to a buffer (e.g. ``stdout``) with output
similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`).
By `Joe Hamman <https://github.com/jhamman>`_.

Bug fixes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[wheel]
universal = 1

[pytest]
[tool:pytest]
python_files=test_*.py
41 changes: 40 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from collections import Mapping
from numbers import Number

import sys

import numpy as np
import pandas as pd

Expand All @@ -25,10 +27,10 @@
from .variable import (Variable, as_variable, IndexVariable, broadcast_variables)
from .pycompat import (iteritems, basestring, OrderedDict,
dask_array_type, range)
from .formatting import ensure_valid_repr
from .combine import concat
from .options import OPTIONS


# list of attributes of pd.DatetimeIndex that are ndarrays of time info
_DATETIMEINDEX_COMPONENTS = ['year', 'month', 'day', 'hour', 'minute',
'second', 'microsecond', 'nanosecond', 'date',
Expand Down Expand Up @@ -802,6 +804,43 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None,
def __unicode__(self):
return formatting.dataset_repr(self)

def info(self, buf=None):
"""
Concise summary of a Dataset variables and attributes.
Parameters
----------
buf : writable buffer, defaults to sys.stdout
See Also
--------
pandas.DataFrame.assign
netCDF's ncdump
"""

if buf is None: # pragma: no cover
buf = sys.stdout

lines = []
lines.append(u'xarray.Dataset {')
lines.append(u'dimensions:')
for name, size in self.dims.items():
lines.append(u'\t{name} = {size} ;'.format(name=name, size=size))
lines.append(u'\nvariables:')
for name, da in self.variables.items():
dims = u', '.join(da.dims)
lines.append(u'\t{type} {name}({dims}) ;'.format(
type=da.dtype, name=name, dims=dims))
for k, v in da.attrs.items():
lines.append(u'\t\t{name}:{k} = {v} ;'.format(name=name, k=k,
v=v))
lines.append(u'\n// global attributes:')
for k, v in self.attrs.items():
lines.append(u'\t:{k} = {v} ;'.format(k=k, v=v))
lines.append(u'}')

buf.write(u'\n'.join(lines))

@property
def chunks(self):
"""Block dimensions for this dataset's data or None if it's not a dask
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from .options import OPTIONS
from .pycompat import (
PY2, unicode_type, bytes_type, dask_array_type, OrderedDict)
PY2, unicode_type, bytes_type, dask_array_type, OrderedDict, basestring)


def pretty_print(x, numchars):
Expand Down Expand Up @@ -87,6 +87,7 @@ def first_n_items(x, n_desired):
x = x[indexer]
return np.asarray(x).flat[:n_desired]


def last_item(x):
"""Returns the last item of an array"""
if x.size == 0:
Expand All @@ -96,6 +97,7 @@ def last_item(x):
indexer = (slice(-1, None), ) * x.ndim
return np.array(x[indexer], ndmin=1)


def format_timestamp(t):
"""Cast given object to a Timestamp and return a nicely formatted string"""
# Timestamp is only valid for 1678 to 2262
Expand Down
37 changes: 37 additions & 0 deletions xarray/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import dask.array as da
except ImportError:
pass
from io import StringIO

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -190,6 +191,42 @@ def test_unicode_data(self):
actual = unicode_type(data)
self.assertEqual(expected, actual)

def test_info(self):
ds = create_test_data(seed=123)
ds = ds.drop('dim3') # string type prints differently in PY2 vs PY3
ds.attrs['unicode_attr'] = u'ba®'
ds.attrs['string_attr'] = 'bar'

buf = StringIO()
ds.info(buf=buf)

expected = dedent(u'''\
xarray.Dataset {
dimensions:
dim1 = 8 ;
dim2 = 9 ;
dim3 = 10 ;
time = 20 ;
variables:
datetime64[ns] time(time) ;
float64 dim2(dim2) ;
float64 var1(dim1, dim2) ;
var1:foo = variable ;
float64 var2(dim1, dim2) ;
var2:foo = variable ;
float64 var3(dim3, dim1) ;
var3:foo = variable ;
int64 numbers(dim3) ;
// global attributes:
:unicode_attr = ba® ;
:string_attr = bar ;
}''')
actual = buf.getvalue()
self.assertEqual(expected, actual)
buf.close()

def test_constructor(self):
x1 = ('x', 2 * np.arange(100))
x2 = ('x', np.arange(1000))
Expand Down
3 changes: 1 addition & 2 deletions xarray/test/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def test_format_items(self):
actual = ' '.join(formatting.format_items(item))
self.assertEqual(expected, actual)


def test_format_array_flat(self):
actual = formatting.format_array_flat(np.arange(100), 13)
expected = '0 1 2 3 4 ...'
Expand Down Expand Up @@ -126,7 +125,7 @@ def test_format_timestamp_out_of_bounds(self):
expected = '1300-12-01'
result = formatting.format_timestamp(date)
self.assertEqual(result, expected)

date = datetime(2300, 12, 1)
expected = '2300-12-01'
result = formatting.format_timestamp(date)
Expand Down

0 comments on commit 8192190

Please sign in to comment.