Add to_pandas method for Dataset and related test (#5247)

gcaria · mathause · web-flow · commit 4aef8f9ff823 · 2021-05-04T15:55:59.000+02:00
Co-authored-by: Mathias Hauser &lt;mathias.hauser@env.ethz.ch&gt;
diff --git a/doc/api.rst b/doc/api.rst
@@ -683,6 +683,7 @@ Dataset methods
    open_rasterio
    open_zarr
    Dataset.to_netcdf
+   Dataset.to_pandas
    Dataset.to_zarr
    save_mfdataset
    Dataset.to_array
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -23,6 +23,8 @@ v0.17.1 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- Add :py:meth:`Dataset.to_pandas` (:pull:`5247`)
+  By `Giacomo Caria <https://github.com/gcaria>`_.
 - Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make
   surface plots (:issue:`#2235` :issue:`#5084` :pull:`5101`).
 - Allow passing multiple arrays to :py:meth:`Dataset.__setitem__` (:pull:`5216`).
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -5093,6 +5093,27 @@ def _normalize_dim_order(
 
         return ordered_dims
 
+    def to_pandas(self) -> Union[pd.Series, pd.DataFrame]:
+        """Convert this dataset into a pandas object without changing the number of dimensions.
+
+        The type of the returned object depends on the number of Dataset
+        dimensions:
+
+        * 0D -> `pandas.Series`
+        * 1D -> `pandas.DataFrame`
+
+        Only works for Datasets with 1 or fewer dimensions.
+        """
+        if len(self.dims) == 0:
+            return pd.Series({k: v.item() for k, v in self.items()})
+        if len(self.dims) == 1:
+            return self.to_dataframe()
+        raise ValueError(
+            "cannot convert Datasets with %s dimensions into "
+            "pandas objects without changing the number of dimensions. "
+            "Please use Dataset.to_dataframe() instead." % len(self.dims)
+        )
+
     def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
         columns = [k for k in self.variables if k not in self.dims]
         data = [
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -1768,6 +1768,27 @@ def test_broadcast_like(self):
 
         assert_identical(original2.broadcast_like(original1), expected2)
 
+    def test_to_pandas(self):
+        # 0D -> series
+        actual = Dataset({"a": 1, "b": 2}).to_pandas()
+        expected = pd.Series([1, 2], ["a", "b"])
+        assert_array_equal(actual, expected)
+
+        # 1D -> dataframe
+        x = np.random.randn(10)
+        y = np.random.randn(10)
+        t = list("abcdefghij")
+        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
+        actual = ds.to_pandas()
+        expected = ds.to_dataframe()
+        assert expected.equals(actual), (expected, actual)
+
+        # 2D -> error
+        x2d = np.random.randn(10, 10)
+        y2d = np.random.randn(10, 10)
+        with pytest.raises(ValueError, match=r"cannot convert Datasets"):
+            Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()
+
     def test_reindex_like(self):
         data = create_test_data()
         data["letters"] = ("dim3", 10 * ["a"])