Skip to content

Commit

Permalink
Internal refactor of label-based data selection (pydata#5322)
Browse files Browse the repository at this point in the history
* xindexes also returns multi-index levels as keys

* wip: move label selection into PandasIndex

add Index.query() method
split pd.Index vs. pd.MultiIndex logic

* Revert "xindexes also returns multi-index levels as keys"

This reverts commit 261fb78.

Let's keep this for later. There are too many places in Xarray that
assume that xindexes keys are dimension names.

* fix broken tests

* remove old code + move/update tests

* remove duplicate function

* add PandasMultiIndex class + refactor query impl

* remove PandasIndex.from_variables for now

Add it later in the refactoring when it will be needed elsewhere (e.g., in ``set_index``).

* fix broken tests

Is this what we want?

* prevent loading values for xarray objs in slice

* update what's new
  • Loading branch information
benbovy authored Jun 8, 2021
1 parent da0489f commit 9daf9b1
Show file tree
Hide file tree
Showing 10 changed files with 395 additions and 297 deletions.
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ Internal Changes
(:pull:`5433`)
By `Maximilian Roos <https://github.com/max-sixty>`_.

- Explicit indexes refactor: add a ``xarray.Index.query()`` method in which
one may eventually provide a custom implementation of label-based data
selection (not ready yet for public use). Also refactor the internal,
pandas-specific implementation into ``PandasIndex.query()`` and
``PandasMultiIndex.query()`` (:pull:`5322`).
By `Benoit Bovy <https://github.com/benbovy>`_.

.. _whats-new.0.18.2:

v0.18.2 (19 May 2021)
Expand Down
5 changes: 2 additions & 3 deletions xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import pandas as pd

from . import dtypes
from .indexes import Index, PandasIndex
from .indexing import get_indexer_nd
from .indexes import Index, PandasIndex, get_indexer_nd, wrap_pandas_index
from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index
from .variable import IndexVariable, Variable

Expand Down Expand Up @@ -561,7 +560,7 @@ def reindex_variables(
"from that to be indexed along {:s}".format(str(indexer.dims), dim)
)

target = new_indexes[dim] = PandasIndex(safe_cast_to_index(indexers[dim]))
target = new_indexes[dim] = wrap_pandas_index(safe_cast_to_index(indexers[dim]))

if dim in indexes:
# TODO (benbovy - flexible indexes): support other indexes than pd.Index?
Expand Down
10 changes: 8 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@
)
from .dataset import Dataset, split_indexes
from .formatting import format_item
from .indexes import Index, Indexes, PandasIndex, default_indexes, propagate_indexes
from .indexes import (
Index,
Indexes,
default_indexes,
propagate_indexes,
wrap_pandas_index,
)
from .indexing import is_fancy_indexer
from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords
from .options import OPTIONS, _get_keep_attrs
Expand Down Expand Up @@ -1005,7 +1011,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray":
# TODO: benbovy: flexible indexes: support all xarray indexes (not just pandas.Index)
# xarray Index needs a copy method.
indexes = {
k: PandasIndex(v.to_pandas_index().copy(deep=deep))
k: wrap_pandas_index(v.to_pandas_index().copy(deep=deep))
for k, v in self._indexes.items()
}
return self._replace(variable, coords, indexes=indexes)
Expand Down
13 changes: 7 additions & 6 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,13 @@
Index,
Indexes,
PandasIndex,
PandasMultiIndex,
default_indexes,
isel_variable_and_index,
propagate_indexes,
remove_unused_levels_categories,
roll_index,
wrap_pandas_index,
)
from .indexing import is_fancy_indexer
from .merge import (
Expand Down Expand Up @@ -3284,10 +3286,9 @@ def _rename_indexes(self, name_dict, dims_set):
continue
if isinstance(index, pd.MultiIndex):
new_names = [name_dict.get(k, k) for k in index.names]
new_index = index.rename(names=new_names)
indexes[new_name] = PandasMultiIndex(index.rename(names=new_names))
else:
new_index = index.rename(new_name)
indexes[new_name] = PandasIndex(new_index)
indexes[new_name] = PandasIndex(index.rename(new_name))
return indexes

def _rename_all(self, name_dict, dims_dict):
Expand Down Expand Up @@ -3516,7 +3517,7 @@ def swap_dims(
if new_index.nlevels == 1:
# make sure index name matches dimension name
new_index = new_index.rename(k)
indexes[k] = PandasIndex(new_index)
indexes[k] = wrap_pandas_index(new_index)
else:
var = v.to_base_variable()
var.dims = dims
Expand Down Expand Up @@ -3789,7 +3790,7 @@ def reorder_levels(
raise ValueError(f"coordinate {dim} has no MultiIndex")
new_index = index.reorder_levels(order)
variables[dim] = IndexVariable(coord.dims, new_index)
indexes[dim] = PandasIndex(new_index)
indexes[dim] = PandasMultiIndex(new_index)

return self._replace(variables, indexes=indexes)

Expand Down Expand Up @@ -3817,7 +3818,7 @@ def _stack_once(self, dims, new_dim):
coord_names = set(self._coord_names) - set(dims) | {new_dim}

indexes = {k: v for k, v in self.xindexes.items() if k not in dims}
indexes[new_dim] = PandasIndex(idx)
indexes[new_dim] = wrap_pandas_index(idx)

return self._replace_with_new_dims(
variables, coord_names=coord_names, indexes=indexes
Expand Down
Loading

0 comments on commit 9daf9b1

Please sign in to comment.