Skip to content

Deprecate the multi-index dimension coordinate #8143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pytest

from xarray import set_options


def pytest_addoption(parser):
"""Add command-line flags for pytest."""
Expand Down Expand Up @@ -39,3 +41,10 @@ def add_standard_imports(doctest_namespace, tmpdir):

# always switch to the temporary directory, so files get written there
tmpdir.chdir()


@pytest.fixture(scope="function", params=[True, False])
def mindex_dim_coord(request):
set_options(future_no_mindex_dim_coord=not request.param)
yield request.param
set_options(future_no_mindex_dim_coord=request.param)
14 changes: 12 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5008,6 +5008,15 @@ def reorder_levels(
Another dataset, with this dataset's data but replaced
coordinates.
"""
warnings.warn(
"The `reorder_levels` method will be removed in the future, "
"when dimension coordinates with a PandasMultiIndex are removed.\n"
"Instead of `ds.reorder_levels(x=['two', 'one'])`, you could do:\n"
"`new_midx = ds.xindexes['one'].index.reorder_levels(['two', 'one'])`\n"
"`ds.assign_coords(xr.Coordinates.from_pandas_multiindex(new_midx, 'x'))`",
FutureWarning,
stacklevel=2,
)
dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels")
variables = self._variables.copy()
indexes = dict(self._indexes)
Expand Down Expand Up @@ -5132,7 +5141,8 @@ def _stack_once(

if len(product_vars) == len(dims):
idx = index_cls.stack(product_vars, new_dim)
new_indexes[new_dim] = idx
if not OPTIONS["future_no_mindex_dim_coord"]:
new_indexes[new_dim] = idx
new_indexes.update({k: idx for k in product_vars})
idx_vars = idx.create_variables(product_vars)
# keep consistent multi-index coordinate order
Expand Down Expand Up @@ -5779,7 +5789,7 @@ def drop_vars(
for var in names:
maybe_midx = self._indexes.get(var, None)
if isinstance(maybe_midx, PandasMultiIndex):
idx_coord_names = set(maybe_midx.index.names + [maybe_midx.dim])
idx_coord_names = set(self.xindexes.get_all_coords(var))
idx_other_names = idx_coord_names - set(names)
other_names.update(idx_other_names)
if other_names:
Expand Down
9 changes: 8 additions & 1 deletion xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PandasIndexingAdapter,
PandasMultiIndexingAdapter,
)
from xarray.core.options import OPTIONS
from xarray.core.utils import (
Frozen,
emit_user_level_warning,
Expand Down Expand Up @@ -1123,7 +1124,13 @@ def create_variables(
variables = {}

index_vars: IndexVars = {}
for name in (self.dim,) + self.index.names:

if OPTIONS["future_no_mindex_dim_coord"]:
names = self.index.names
else:
names = (self.dim,) + self.index.names

for name in names:
if name == self.dim:
level = None
dtype = None
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"display_default_indexes",
"enable_cftimeindex",
"file_cache_maxsize",
"future_no_mindex_dim_coord",
"keep_attrs",
"warn_for_unclosed_files",
"use_bottleneck",
Expand All @@ -48,6 +49,7 @@ class T_Options(TypedDict):
display_default_indexes: Literal["default", True, False]
enable_cftimeindex: bool
file_cache_maxsize: int
future_no_mindex_dim_coord: bool
keep_attrs: Literal["default", True, False]
warn_for_unclosed_files: bool
use_bottleneck: bool
Expand All @@ -70,6 +72,7 @@ class T_Options(TypedDict):
"display_default_indexes": False,
"enable_cftimeindex": True,
"file_cache_maxsize": 128,
"future_no_mindex_dim_coord": False,
"keep_attrs": "default",
"warn_for_unclosed_files": False,
"use_bottleneck": True,
Expand Down Expand Up @@ -98,6 +101,7 @@ def _positive_integer(value: int) -> bool:
"display_default_indexes": lambda choice: choice in [True, False, "default"],
"enable_cftimeindex": lambda value: isinstance(value, bool),
"file_cache_maxsize": _positive_integer,
"future_no_mindex_dim_coord": lambda value: isinstance(value, bool),
"keep_attrs": lambda choice: choice in [True, False, "default"],
"use_bottleneck": lambda value: isinstance(value, bool),
"use_flox": lambda value: isinstance(value, bool),
Expand Down Expand Up @@ -218,6 +222,10 @@ class set_options:
global least-recently-usage cached. This should be smaller than
your system's per-process file descriptor limit, e.g.,
``ulimit -n`` on Linux.
future_no_mindex_dim_coord : bool, default: False
If True, no dimension coordinate with tuple values is be created
for a pandas multi-index. This is currently deactivated but
it will be the default behavior in the future.
keep_attrs : {"default", True, False}
Whether to keep attributes on xarray Datasets/dataarrays after
operations. Can be
Expand Down
116 changes: 70 additions & 46 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from copy import copy, deepcopy
from io import StringIO
from textwrap import dedent
from typing import Any, Literal
from typing import Any, Literal, cast

import numpy as np
import pandas as pd
Expand All @@ -32,7 +32,7 @@
from xarray.core import dtypes, indexing, utils
from xarray.core.common import duck_array_ops, full_like
from xarray.core.coordinates import Coordinates, DatasetCoordinates
from xarray.core.indexes import Index, PandasIndex
from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex
from xarray.core.pycompat import array_type, integer_types
from xarray.core.utils import is_scalar
from xarray.testing import _assert_internal_invariants
Expand Down Expand Up @@ -335,43 +335,58 @@ def test_repr(self) -> None:
data = Dataset(attrs={"foo": "bar" * 1000})
assert len(repr(data)) < 1000

def test_repr_multiindex(self) -> None:
def test_repr_multiindex(self, mindex_dim_coord) -> None:
data = create_test_multiindex()
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* x (x) object MultiIndex
* level_1 (x) object 'a' 'a' 'b' 'b'
* level_2 (x) int64 1 2 1 2
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual
if mindex_dim_coord:
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* x (x) object MultiIndex
* level_1 (x) object 'a' 'a' 'b' 'b'
* level_2 (x) int64 1 2 1 2
Data variables:
*empty*"""
)
else:
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* level_1 (x) object 'a' 'a' 'b' 'b'
* level_2 (x) int64 1 2 1 2
Dimensions without coordinates: x
Data variables:
*empty*"""
)

# verify that long level names are not truncated
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("a_quite_long_level_name", "level_2")
)
data = Dataset({}, {"x": mindex})
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* x (x) object MultiIndex
* a_quite_long_level_name (x) object 'a' 'a' 'b' 'b'
* level_2 (x) int64 1 2 1 2
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual

if not mindex_dim_coord:
# verify that long level names are not truncated
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("a_quite_long_level_name", "level_2")
)
data = Dataset({}, {"x": mindex})
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* a_quite_long_level_name (x) object 'a' 'a' 'b' 'b'
* level_2 (x) int64 1 2 1 2
Dimensions without coordinates: x
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual

def test_repr_period_index(self) -> None:
data = create_test_data(seed=456)
data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="B")
Expand Down Expand Up @@ -2644,8 +2659,9 @@ def test_drop_variables(self) -> None:
assert_identical(expected, actual)

def test_drop_multiindex_level(self) -> None:
data = create_test_multiindex()
expected = data.drop_vars(["x", "level_1", "level_2"])
with set_options(future_no_mindex_dim_coord=True):
data = create_test_multiindex()
expected = data.drop_vars(["level_1", "level_2"])
with pytest.warns(DeprecationWarning):
actual = data.drop_vars("level_1")
assert_identical(expected, actual)
Expand Down Expand Up @@ -3377,15 +3393,15 @@ def test_expand_dims_kwargs_python36plus(self) -> None:
)
assert_identical(other_way_expected, other_way)

def test_set_index(self) -> None:
def test_set_index(self, mindex_dim_coord) -> None:
expected = create_test_multiindex()
mindex = expected["x"].to_index()
mindex = cast(PandasMultiIndex, expected.xindexes["level_1"]).index
indexes = [mindex.get_level_values(n) for n in mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
ds = Dataset({}, coords=coords)

obj = ds.set_index(x=mindex.names)
assert_identical(obj, expected)
assert_identical(obj, expected, check_default_indexes=False)

# ensure pre-existing indexes involved are removed
# (level_2 should be a coordinate with no index)
Expand Down Expand Up @@ -3434,14 +3450,17 @@ def test_set_index_deindexed_coords(self) -> None:
)
assert_identical(actual, expected)

def test_reset_index(self) -> None:
def test_reset_index(self, mindex_dim_coord) -> None:
ds = create_test_multiindex()
mindex = ds["x"].to_index()
mindex = cast(PandasMultiIndex, ds.xindexes["level_1"]).index
indexes = [mindex.get_level_values(n) for n in mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
expected = Dataset({}, coords=coords)

obj = ds.reset_index("x")
if mindex_dim_coord:
obj = ds.reset_index("x")
else:
obj = ds.reset_index(["level_1", "level_2"])
assert_identical(obj, expected, check_default_indexes=False)
assert len(obj.xindexes) == 0

Expand Down Expand Up @@ -3475,12 +3494,15 @@ def test_reset_index_drop_dims(self) -> None:
],
)
def test_reset_index_drop_convert(
self, arg, drop, dropped, converted, renamed
self, arg, drop, dropped, converted, renamed, mindex_dim_coord
) -> None:
# regressions https://github.com/pydata/xarray/issues/6946 and
# https://github.com/pydata/xarray/issues/6989
# check that multi-index dimension or level coordinates are dropped, converted
# from IndexVariable to Variable or renamed to dimension as expected
if arg == "x" or "x" in arg and not mindex_dim_coord:
return

midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("foo", "bar"))
ds = xr.Dataset(coords={"x": midx})
reset = ds.reset_index(arg, drop=drop)
Expand All @@ -3494,20 +3516,22 @@ def test_reset_index_drop_convert(

def test_reorder_levels(self) -> None:
ds = create_test_multiindex()
mindex = ds["x"].to_index()
mindex = cast(PandasMultiIndex, ds.xindexes["level_1"]).index
midx = mindex.reorder_levels(["level_2", "level_1"])
expected = Dataset({}, coords={"x": midx})

# check attrs propagated
ds["level_1"].attrs["foo"] = "bar"
expected["level_1"].attrs["foo"] = "bar"

reindexed = ds.reorder_levels(x=["level_2", "level_1"])
with pytest.warns(FutureWarning, match=r"will be removed"):
reindexed = ds.reorder_levels(x=["level_2", "level_1"])
assert_identical(reindexed, expected)

ds = Dataset({}, coords={"x": [1, 2]})
with pytest.raises(ValueError, match=r"has no MultiIndex"):
ds.reorder_levels(x=["level_1", "level_2"])
with pytest.warns(FutureWarning, match=r"will be removed"):
with pytest.raises(ValueError, match=r"has no MultiIndex"):
ds.reorder_levels(x=["level_1", "level_2"])

def test_set_xindex(self) -> None:
ds = Dataset(
Expand Down