Skip to content

ENH: Preserve attrs in to_dataframe() #5335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ New Features
By `Jimmy Westling <https://github.com/illviljan>`_.
- Raise more informative error when decoding time variables with invalid reference dates.
(:issue:`5199`, :pull:`5288`). By `Giacomo Caria <https://github.com/gcaria>`_.
- Preserve attrs in `to_dataframe()` (:issue:`5327`). By `Alan Snow <https://github.com/snowman2>`_

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2714,7 +2714,9 @@ def to_dataframe(
ordered_dims = ds._normalize_dim_order(dim_order=dim_order)

df = ds._to_dataframe(ordered_dims)
attrs = df[unique_name].attrs
df.columns = [name if c == unique_name else c for c in df.columns]
df[name].attrs = attrs
return df

def to_series(self) -> pd.Series:
Expand Down
7 changes: 6 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5182,7 +5182,12 @@ def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
for k in columns
]
index = self.coords.to_index([*ordered_dims])
return pd.DataFrame(dict(zip(columns, data)), index=index)
pdf = pd.DataFrame(dict(zip(columns, data)), index=index)
# add attributes to dataframe
pdf.attrs = self.attrs
for column in columns:
pdf[column].attrs = self[column].attrs
return pdf

def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
"""Convert this dataset into a pandas.DataFrame.
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3681,6 +3681,21 @@ def test_to_dataframe_0length(self):
assert len(actual) == 0
assert_array_equal(actual.index.names, list("ABC"))

def test_to_dataframe__attrs(self):
arr = DataArray(
np.zeros((5, 5)),
name="test",
dims=("y", "x"),
coords={"y": np.arange(1, 6), "x": np.arange(2, 7)},
attrs={"long_name": "Description of data array", "_FillValue": -1},
)
df = arr.to_dataframe()
assert df.attrs == {}
assert df.test.attrs == {
"long_name": "Description of data array",
"_FillValue": -1,
}

def test_to_pandas_name_matches_coordinate(self):
# coordinate with same name as array
arr = DataArray([1, 2, 3], dims="x", name="x")
Expand Down
23 changes: 23 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4205,6 +4205,29 @@ def test_to_and_from_dataframe(self):
expected = pd.DataFrame([[]], index=idx)
assert expected.equals(actual), (expected, actual)

def test_to_dataframe__attrs(self):
ds = Dataset(
{
"a": (
"t",
[1],
{"long_name": "Description of data array", "_FillValue": -1},
),
"b": (
"t",
[1],
),
},
attrs={"test": "test"},
)
df = ds.to_dataframe()
assert df.attrs == {"test": "test"}
assert df.a.attrs == {
"long_name": "Description of data array",
"_FillValue": -1,
}
assert df.b.attrs == {}

def test_from_dataframe_categorical(self):
cat = pd.CategoricalDtype(
categories=["foo", "bar", "baz", "qux", "quux", "corge"]
Expand Down