Skip to content

Raise UserWarning when rename creates a new dimension coord #6999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ Bug fixes
By `András Gunyhó <https://github.com/mgunyho>`_.
- Avoid use of random numbers in `test_weighted.test_weighted_operations_nonequal_coords` (:issue:`6504`, :pull:`6961`).
By `Luke Conibear <https://github.com/lukeconibear>`_.
- Raise a ``UserWarning`` when renaming a coordinate or a dimension creates a
non-indexed dimension coordinate, and suggest the user creating an index
either with ``swap_dims`` or ``set_index`` (:issue:`6607`, :pull:`6999`). By
`Benoît Bovy <https://github.com/benbovy>`_.
- Use ``keep_attrs=True`` in grouping and resampling operations by default (:issue:`7012`).
This means :py:attr:`Dataset.attrs` and :py:attr:`DataArray.attrs` are now preserved by default.
By `Deepak Cherian <https://github.com/dcherian>`_.
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2032,11 +2032,11 @@ def rename(
if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None:
# change dims/coords
name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename")
dataset = self._to_temp_dataset().rename(name_dict)
dataset = self._to_temp_dataset()._rename(name_dict)
return self._from_temp_dataset(dataset)
if utils.hashable(new_name_or_name_dict) and names:
# change name + dims/coords
dataset = self._to_temp_dataset().rename(names)
dataset = self._to_temp_dataset()._rename(names)
dataarray = self._from_temp_dataset(dataset)
return dataarray._replace(name=new_name_or_name_dict)
# only change name
Expand Down
55 changes: 43 additions & 12 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3560,6 +3560,48 @@ def _rename_all(

return variables, coord_names, dims, indexes

def _rename(
self: T_Dataset,
name_dict: Mapping[Any, Hashable] | None = None,
**names: Hashable,
) -> T_Dataset:
"""Also used internally by DataArray so that the warning (if any)
is raised at the right stack level.
"""
name_dict = either_dict_or_kwargs(name_dict, names, "rename")
for k in name_dict.keys():
if k not in self and k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"variable or dimension in this dataset"
)

create_dim_coord = False
new_k = name_dict[k]

if k in self.dims and new_k in self._coord_names:
coord_dims = self._variables[name_dict[k]].dims
if coord_dims == (k,):
create_dim_coord = True
elif k in self._coord_names and new_k in self.dims:
coord_dims = self._variables[k].dims
if coord_dims == (new_k,):
create_dim_coord = True

if create_dim_coord:
warnings.warn(
f"rename {k!r} to {name_dict[k]!r} does not create an index "
"anymore. Try using swap_dims instead or use set_index "
"after rename to create an indexed coordinate.",
UserWarning,
stacklevel=3,
)

variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dims_dict=name_dict
)
return self._replace(variables, coord_names, dims=dims, indexes=indexes)

def rename(
self: T_Dataset,
name_dict: Mapping[Any, Hashable] | None = None,
Expand Down Expand Up @@ -3588,18 +3630,7 @@ def rename(
Dataset.rename_dims
DataArray.rename
"""
name_dict = either_dict_or_kwargs(name_dict, names, "rename")
for k in name_dict.keys():
if k not in self and k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"variable or dimension in this dataset"
)

variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dims_dict=name_dict
)
return self._replace(variables, coord_names, dims=dims, indexes=indexes)
return self._rename(name_dict=name_dict, **names)

def rename_dims(
self: T_Dataset,
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1742,6 +1742,23 @@ def test_rename(self) -> None:
)
assert_identical(renamed_all, expected_all)

def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
da = DataArray([0, 0], coords={"x": ("y", [0, 1])}, dims="y")

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")

da = xr.DataArray([0, 0], coords={"y": ("x", [0, 1])}, dims="x")

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")

def test_init_value(self) -> None:
expected = DataArray(
np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)]
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2892,6 +2892,23 @@ def test_rename_dimension_coord(self) -> None:
actual_2 = original.rename_dims({"x": "x_new"})
assert "x" in actual_2.xindexes

def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
ds = Dataset(coords={"x": ("y", [0, 1])})

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")

ds = Dataset(coords={"y": ("x", [0, 1])})

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")

def test_rename_multiindex(self) -> None:
mindex = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
original = Dataset({}, {"x": mindex})
Expand Down