Skip to content

ENH/POC: ExtensionIndex for arbitrary EAs #37869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
…h-ei
  • Loading branch information
jbrockmendel committed Jan 15, 2021
commit e6e7f0752cc7468004c9f8c96c936f1802ca40eb
46 changes: 8 additions & 38 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,12 @@ def __new__(
if klass is not Index:
return klass(data, dtype=dtype, copy=copy, name=name, **kwargs)

from pandas.core.indexes.extension import ExtensionIndex

ea_cls = dtype.construct_array_type()
data = ea_cls._from_sequence(data, dtype=dtype, copy=copy)
data = np.asarray(data, dtype=object)
disallow_kwargs(kwargs)
return Index._simple_new(data, name=name)
return ExtensionIndex._simple_new(data, name=name)

elif is_ea_or_datetimelike_dtype(data_dtype):
klass = cls._dtype_to_subclass(data_dtype)
Expand All @@ -323,45 +324,14 @@ def __new__(
return result.astype(dtype, copy=False)
return result

return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs)

elif is_datetime64_any_dtype(data_dtype) or is_datetime64_any_dtype(dtype):
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
from pandas import DatetimeIndex

return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs)

elif is_timedelta64_dtype(data_dtype) or is_timedelta64_dtype(dtype):
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
from pandas import TimedeltaIndex

return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs)

elif is_period_dtype(data_dtype) or is_period_dtype(dtype):
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
from pandas import PeriodIndex

return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs)

# extension dtype
elif is_extension_array_dtype(data_dtype) or is_extension_array_dtype(dtype):
if not (dtype is None or is_object_dtype(dtype)):
# coerce to the provided dtype
ea_cls = dtype.construct_array_type()
data = ea_cls._from_sequence(data, dtype=dtype, copy=False)
else:
data = extract_array(data, extract_numpy=True)
if type(data).__name__ == "PandasArray":
# We're doing the test that patches PandasArray to not be
# recognized as EA
data = data._ndarray
return Index(data, dtype=object, copy=copy, name=name, **kwargs)
disallow_kwargs(kwargs)
if data_dtype == object:
data = np.array(data, dtype=object, copy=copy)
return Index._simple_new(data, name=name)

from pandas.core.indexes.extension import ExtensionIndex

obj = ExtensionIndex._simple_new(data, name=name)
# TODO: need to handle maybe_asobject
return obj
return ExtensionIndex._simple_new(data, name=name) # TODO: copy?

# index-like
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
Expand Down
8 changes: 1 addition & 7 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np

from pandas._libs import index as libindex, lib
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly, doc
Expand Down Expand Up @@ -279,12 +278,7 @@ def _check_indexing_method(self, method):
if method is None:
return

if method in ["bfill", "backfill", "pad", "ffill", "nearest"]:
raise NotImplementedError(
f"method {method} not yet implemented for {type(self).__name__}"
)

raise ValueError("Invalid fill method")
# ---------------------------------------------------------------------

def _get_engine_target(self) -> np.ndarray:
return np.asarray(self._data)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_construct_index(all_data, dropna):
else:
other = all_data

arr = integer_array(other, dtype=all_data.dtype)
arr = pd.array(other, dtype=all_data.dtype)
result = pd.Index(arr)
expected = pd.core.indexes.extension.ExtensionIndex._simple_new(arr)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ def test_apply_to_nullable_integer_returns_float(values, function):
idx = Index(pd_array([1, 2, 3]), name="a")
assert isinstance(idx, ExtensionIndex)
assert idx.dtype == "Int64"
expected = DataFrame({"b": arr}, index=idx)
expected = DataFrame({"b": arr}, index=idx).astype("Float64")

groups = DataFrame(values, dtype="Int64").groupby("a")

Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.