From 4aef8f9ff823cc1c5fa4e837acbfe2483110716b Mon Sep 17 00:00:00 2001 From: Giacomo Caria <44147817+gcaria@users.noreply.github.com> Date: Tue, 4 May 2021 15:55:59 +0200 Subject: [PATCH] Add to_pandas method for Dataset and related test (#5247) Co-authored-by: Mathias Hauser --- doc/api.rst | 1 + doc/whats-new.rst | 2 ++ xarray/core/dataset.py | 21 +++++++++++++++++++++ xarray/tests/test_dataset.py | 21 +++++++++++++++++++++ 4 files changed, 45 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index da78307a349..7c01a8af0f1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -683,6 +683,7 @@ Dataset methods open_rasterio open_zarr Dataset.to_netcdf + Dataset.to_pandas Dataset.to_zarr save_mfdataset Dataset.to_array diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2b3e398634c..9b02af5a6a8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,8 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add :py:meth:`Dataset.to_pandas` (:pull:`5247`) + By `Giacomo Caria `_. - Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make surface plots (:issue:`#2235` :issue:`#5084` :pull:`5101`). - Allow passing multiple arrays to :py:meth:`Dataset.__setitem__` (:pull:`5216`). diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 811357c265d..2755fb61f18 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5093,6 +5093,27 @@ def _normalize_dim_order( return ordered_dims + def to_pandas(self) -> Union[pd.Series, pd.DataFrame]: + """Convert this dataset into a pandas object without changing the number of dimensions. + + The type of the returned object depends on the number of Dataset + dimensions: + + * 0D -> `pandas.Series` + * 1D -> `pandas.DataFrame` + + Only works for Datasets with 1 or fewer dimensions. + """ + if len(self.dims) == 0: + return pd.Series({k: v.item() for k, v in self.items()}) + if len(self.dims) == 1: + return self.to_dataframe() + raise ValueError( + "cannot convert Datasets with %s dimensions into " + "pandas objects without changing the number of dimensions. " + "Please use Dataset.to_dataframe() instead." % len(self.dims) + ) + def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]): columns = [k for k in self.variables if k not in self.dims] data = [ diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b1269369f21..1e526a3787f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1768,6 +1768,27 @@ def test_broadcast_like(self): assert_identical(original2.broadcast_like(original1), expected2) + def test_to_pandas(self): + # 0D -> series + actual = Dataset({"a": 1, "b": 2}).to_pandas() + expected = pd.Series([1, 2], ["a", "b"]) + assert_array_equal(actual, expected) + + # 1D -> dataframe + x = np.random.randn(10) + y = np.random.randn(10) + t = list("abcdefghij") + ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)}) + actual = ds.to_pandas() + expected = ds.to_dataframe() + assert expected.equals(actual), (expected, actual) + + # 2D -> error + x2d = np.random.randn(10, 10) + y2d = np.random.randn(10, 10) + with pytest.raises(ValueError, match=r"cannot convert Datasets"): + Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas() + def test_reindex_like(self): data = create_test_data() data["letters"] = ("dim3", 10 * ["a"])