Skip to content

Commit

Permalink
Avoid loading any data for reprs (pydata#7203)
Browse files Browse the repository at this point in the history
* Avoid loading any data for reprs

* Update test_formatting.py

* Update whats-new.rst

* Update doc/whats-new.rst

Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>

* specify dtype to avoid os issues

* Update xarray/tests/test_formatting.py

* Update whats-new.rst

Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
  • Loading branch information
Illviljan and max-sixty authored Oct 28, 2022
1 parent 7ff2b04 commit fc9026b
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- ``repr(ds)`` may not show the same result because it doesn't load small,
lazy data anymore. Use ``ds.head().load()`` when wanting to see just a sample
of the data. (:issue:`6722`, :pull:`7203`).
By `Jimmy Westling <https://github.com/illviljan>`_.
- Many arguments of plotmethods have been made keyword-only.
- ``xarray.plot.plot`` module renamed to ``xarray.plot.dataarray_plot`` to prevent
shadowing of the ``plot`` method. (:issue:`6949`, :pull:`7052`).
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ def short_data_repr(array):
return short_numpy_repr(array)
elif is_duck_array(internal_data):
return limit_lines(repr(array.data), limit=40)
elif array._in_memory or array.size < 1e5:
elif array._in_memory:
return short_numpy_repr(array)
else:
# internal xarray array type
Expand Down
34 changes: 30 additions & 4 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,17 +575,28 @@ def test_large_array_repr_length() -> None:

@requires_netCDF4
def test_repr_file_collapsed(tmp_path) -> None:
arr = xr.DataArray(np.arange(300), dims="test")
arr.to_netcdf(tmp_path / "test.nc", engine="netcdf4")
arr_to_store = xr.DataArray(np.arange(300, dtype=np.int64), dims="test")
arr_to_store.to_netcdf(tmp_path / "test.nc", engine="netcdf4")

with xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options(
display_expand_data=False
):
actual = formatting.array_repr(arr)
actual = repr(arr)
expected = dedent(
"""\
<xarray.DataArray (test: 300)>
array([ 0, 1, 2, ..., 297, 298, 299])
[300 values with dtype=int64]
Dimensions without coordinates: test"""
)

assert actual == expected

arr_loaded = arr.compute()
actual = arr_loaded.__repr__()
expected = dedent(
"""\
<xarray.DataArray (test: 300)>
0 1 2 3 4 5 6 7 8 9 10 11 12 ... 288 289 290 291 292 293 294 295 296 297 298 299
Dimensions without coordinates: test"""
)

Expand Down Expand Up @@ -699,3 +710,18 @@ def test__element_formatter(n_elements: int = 100) -> None:
)
actual = intro + values
assert expected == actual


def test_lazy_array_wont_compute() -> None:
from xarray.core.indexing import LazilyIndexedArray

class LazilyIndexedArrayNotComputable(LazilyIndexedArray):
def __array__(self, dtype=None):
raise NotImplementedError("Computing this array is not possible.")

arr = LazilyIndexedArrayNotComputable(np.array([1, 2]))
var = xr.DataArray(arr)

# These will crash if var.data are converted to numpy arrays:
var.__repr__()
var._repr_html_()

0 comments on commit fc9026b

Please sign in to comment.