Skip to content

Commit 4aef8f9

Browse files
gcariamathause
andauthored
Add to_pandas method for Dataset and related test (#5247)
Co-authored-by: Mathias Hauser <mathias.hauser@env.ethz.ch>
1 parent 1c198a1 commit 4aef8f9

File tree

4 files changed

+45
-0
lines changed

4 files changed

+45
-0
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,7 @@ Dataset methods
683683
open_rasterio
684684
open_zarr
685685
Dataset.to_netcdf
686+
Dataset.to_pandas
686687
Dataset.to_zarr
687688
save_mfdataset
688689
Dataset.to_array

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ v0.17.1 (unreleased)
2323
New Features
2424
~~~~~~~~~~~~
2525

26+
- Add :py:meth:`Dataset.to_pandas` (:pull:`5247`)
27+
By `Giacomo Caria <https://github.com/gcaria>`_.
2628
- Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make
2729
surface plots (:issue:`#2235` :issue:`#5084` :pull:`5101`).
2830
- Allow passing multiple arrays to :py:meth:`Dataset.__setitem__` (:pull:`5216`).

xarray/core/dataset.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5093,6 +5093,27 @@ def _normalize_dim_order(
50935093

50945094
return ordered_dims
50955095

5096+
def to_pandas(self) -> Union[pd.Series, pd.DataFrame]:
5097+
"""Convert this dataset into a pandas object without changing the number of dimensions.
5098+
5099+
The type of the returned object depends on the number of Dataset
5100+
dimensions:
5101+
5102+
* 0D -> `pandas.Series`
5103+
* 1D -> `pandas.DataFrame`
5104+
5105+
Only works for Datasets with 1 or fewer dimensions.
5106+
"""
5107+
if len(self.dims) == 0:
5108+
return pd.Series({k: v.item() for k, v in self.items()})
5109+
if len(self.dims) == 1:
5110+
return self.to_dataframe()
5111+
raise ValueError(
5112+
"cannot convert Datasets with %s dimensions into "
5113+
"pandas objects without changing the number of dimensions. "
5114+
"Please use Dataset.to_dataframe() instead." % len(self.dims)
5115+
)
5116+
50965117
def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
50975118
columns = [k for k in self.variables if k not in self.dims]
50985119
data = [

xarray/tests/test_dataset.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,27 @@ def test_broadcast_like(self):
17681768

17691769
assert_identical(original2.broadcast_like(original1), expected2)
17701770

1771+
def test_to_pandas(self):
1772+
# 0D -> series
1773+
actual = Dataset({"a": 1, "b": 2}).to_pandas()
1774+
expected = pd.Series([1, 2], ["a", "b"])
1775+
assert_array_equal(actual, expected)
1776+
1777+
# 1D -> dataframe
1778+
x = np.random.randn(10)
1779+
y = np.random.randn(10)
1780+
t = list("abcdefghij")
1781+
ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
1782+
actual = ds.to_pandas()
1783+
expected = ds.to_dataframe()
1784+
assert expected.equals(actual), (expected, actual)
1785+
1786+
# 2D -> error
1787+
x2d = np.random.randn(10, 10)
1788+
y2d = np.random.randn(10, 10)
1789+
with pytest.raises(ValueError, match=r"cannot convert Datasets"):
1790+
Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()
1791+
17711792
def test_reindex_like(self):
17721793
data = create_test_data()
17731794
data["letters"] = ("dim3", 10 * ["a"])

0 commit comments

Comments
 (0)