Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: MultiIndex.names|codes|levels returns tuples #57042

Merged
merged 13 commits into from
Feb 7, 2024
Merged
9 changes: 0 additions & 9 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,6 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:

``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``.

If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
the columns except the one we specify:

.. ipython:: python

df2 = df.set_index(["A", "B"])
grouped = df2.groupby(level=df2.index.names.difference(["B"]))
grouped.sum()

The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
a transpose:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
Other API changes
^^^^^^^^^^^^^^^^^
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
- :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`)
-

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 7 additions & 4 deletions pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ import numpy as np

from pandas._typing import npt

from pandas import MultiIndex
from pandas import (
Index,
MultiIndex,
)
from pandas.core.arrays import ExtensionArray

multiindex_nulls_shift: int
Expand Down Expand Up @@ -70,13 +73,13 @@ class MaskedUInt8Engine(MaskedIndexEngine): ...
class MaskedBoolEngine(MaskedUInt8Engine): ...

class BaseMultiIndexCodesEngine:
levels: list[np.ndarray]
levels: tuple[np.ndarray]
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

def __init__(
self,
levels: list[np.ndarray], # all entries hashable
labels: list[np.ndarray], # all entries integer-dtyped
levels: tuple[Index, ...], # all entries hashable
labels: tuple[np.ndarray], # all entries integer-dtyped
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
) -> None: ...
def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5698,7 +5698,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
idx = cast(MultiIndex, idx)
levels = list(idx.levels) + [lev]
codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))]
mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None])
mi = MultiIndex(levels=levels, codes=codes, names=list(idx.names) + [None])
else:
nidx = len(idx)
idx_codes = coerce_indexer_dtype(np.arange(nidx), idx)
Expand Down
31 changes: 16 additions & 15 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@
disallow_ndim_indexing,
is_valid_positional_slice,
)
from pandas.core.indexes.frozen import FrozenList
from pandas.core.missing import clean_reindex_fill_method
from pandas.core.ops import get_op_result_name
from pandas.core.ops.invalid import make_invalid_op
Expand Down Expand Up @@ -1767,8 +1766,8 @@ def _get_default_index_names(

return names

def _get_names(self) -> FrozenList:
return FrozenList((self.name,))
def _get_names(self) -> tuple[Hashable | None, ...]:
return (self.name,)

def _set_names(self, values, *, level=None) -> None:
"""
Expand Down Expand Up @@ -1866,7 +1865,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['species', 'year'])
names=('species', 'year'))

When renaming levels with a dict, levels can not be passed.

Expand All @@ -1875,7 +1874,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['snake', 'year'])
names=('snake', 'year'))
"""
if level is not None and not isinstance(self, ABCMultiIndex):
raise ValueError("Level must be None for non-MultiIndex")
Expand Down Expand Up @@ -1959,19 +1958,19 @@ def rename(self, name, inplace: bool = False) -> Self | None:

>>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
... [2018, 2019]],
... names=['kind', 'year'])
... names=('kind', 'year'))
>>> idx
MultiIndex([('python', 2018),
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['kind', 'year'])
names=('kind', 'year'))
>>> idx.rename(['species', 'year'])
MultiIndex([('python', 2018),
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['species', 'year'])
names=('species', 'year'))
>>> idx.rename('species')
Traceback (most recent call last):
TypeError: Must pass list-like as `names`.
Expand Down Expand Up @@ -2135,22 +2134,22 @@ def droplevel(self, level: IndexLabel = 0):
>>> mi
MultiIndex([(1, 3, 5),
(2, 4, 6)],
names=['x', 'y', 'z'])
names=('x', 'y', 'z'))

>>> mi.droplevel()
MultiIndex([(3, 5),
(4, 6)],
names=['y', 'z'])
names=('y', 'z'))

>>> mi.droplevel(2)
MultiIndex([(1, 3),
(2, 4)],
names=['x', 'y'])
names=('x', 'y'))

>>> mi.droplevel('z')
MultiIndex([(1, 3),
(2, 4)],
names=['x', 'y'])
names=('x', 'y'))

>>> mi.droplevel(['x', 'y'])
Index([5, 6], dtype='int64', name='z')
Expand Down Expand Up @@ -4865,7 +4864,9 @@ def _join_level(
"""
from pandas.core.indexes.multi import MultiIndex

def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
def _get_leaf_sorter(
labels: tuple[np.ndarray, ...] | list[np.ndarray]
) -> npt.NDArray[np.intp]:
"""
Returns sorter for the inner most level while preserving the
order of higher levels.
Expand Down Expand Up @@ -6627,7 +6628,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
MultiIndex([(1, 'red'),
(2, 'blue'),
(3, 'green')],
names=['number', 'color'])
names=('number', 'color'))

Check whether the strings in the 'color' level of the MultiIndex
are in a list of colors.
Expand Down Expand Up @@ -7608,7 +7609,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
MultiIndex([('a', 'a'),
('a', 'b')],
names=['L1', 'L2'])
names=('L1', 'L2'))

See Also
--------
Expand Down
120 changes: 0 additions & 120 deletions pandas/core/indexes/frozen.py

This file was deleted.

Loading
Loading