Skip to content

ENH: NumericIndex for any numpy int/uint/float dtype #41153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 55 commits into from
Aug 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
9739f85
ENH: Add NumIndex for indexic of any numeric type
topper-123 Apr 25, 2021
88a7858
fix various issues reported by the CI
topper-123 Apr 25, 2021
7ccb1b7
fix test failure
topper-123 Apr 26, 2021
b45500d
Make (Int|UInt|Float)64Index inherit from NumIndex
topper-123 Apr 27, 2021
5ef35f5
fix errors
topper-123 Apr 27, 2021
d8f6c22
Add more numeric tests for NumIndex
topper-123 May 2, 2021
1c65a0b
fixups
topper-123 May 2, 2021
f3e13aa
fix exact='equiv'
topper-123 May 6, 2021
4e17485
add more comprehensive tests
topper-123 May 10, 2021
c1e801d
fixes
topper-123 May 10, 2021
aa0cea7
addresses comments (move _format_native_types, assert_index_equal etc.)
topper-123 May 10, 2021
1f5f922
remove from public namespace
topper-123 May 10, 2021
132ce44
rename to NumericIndex
topper-123 May 10, 2021
058cd2e
fixes
topper-123 May 10, 2021
1c7f23f
fix test
topper-123 May 10, 2021
07a097c
ENH: Add NumIndex for indexic of any numeric type
topper-123 Apr 25, 2021
ff6cfb4
fix test failure
topper-123 Apr 26, 2021
fe7b97c
Add more numeric tests for NumIndex
topper-123 May 2, 2021
86f3960
fixups
topper-123 May 2, 2021
2424c0d
fix exact='equiv'
topper-123 May 6, 2021
a515bba
add more comprehensive tests
topper-123 May 10, 2021
341fc2f
remove from public namespace
topper-123 May 10, 2021
c2d8884
fix test
topper-123 May 10, 2021
5a56b1a
add back numeric tests
topper-123 May 11, 2021
a497d57
fix comments
topper-123 May 12, 2021
6557689
fix comments part II
topper-123 May 12, 2021
5bc4c2c
_is_num_index -> _is_numeric_index + Index.union
topper-123 May 14, 2021
84bf540
makeNumIndex -> makeNumericIndex and refactor makeIntIndex etc.
topper-123 May 14, 2021
69953b4
fix errors
topper-123 May 20, 2021
b4be77d
rebase after #41472
topper-123 May 20, 2021
bb42e2d
small clean-up
topper-123 May 20, 2021
bafa9b3
small clean-up II
topper-123 May 20, 2021
35b0e71
small clean-up III
topper-123 May 20, 2021
ed4730b
small clean-up IV
topper-123 May 20, 2021
6a32788
small clean-up V
topper-123 May 20, 2021
47e208c
fix bug
topper-123 May 20, 2021
d6a03a0
fix failures
topper-123 May 21, 2021
ec003ed
cleanups
topper-123 May 21, 2021
7ddee71
chabge _is_numeric_index to be an attribute
topper-123 May 21, 2021
2bb282f
minor clean-ups
topper-123 May 21, 2021
c1633fb
fix not-allowed parameter
topper-123 May 21, 2021
9c7d57b
fix _should_fallback_to_positional
topper-123 May 22, 2021
f6dccc1
clean-ups after rebasing
topper-123 Jun 4, 2021
3630fc7
more clean-ups
topper-123 Jun 4, 2021
bfe6895
add cleanups
topper-123 Jun 5, 2021
8532ddb
fix TestApi failure
topper-123 Jun 5, 2021
186de8e
more precise tests
topper-123 Jun 5, 2021
ead8f57
update tests
topper-123 Jun 21, 2021
2a850ea
update asserters doc string
topper-123 Jun 21, 2021
d04da70
update tests/common.py
topper-123 Jun 21, 2021
4b8385c
cleanups
topper-123 Jun 23, 2021
1f52f8b
simplify _ensure_dtype
topper-123 Jul 24, 2021
951c5f7
make attribute name clearer
topper-123 Jul 25, 2021
7c7c0dd
address comments
topper-123 Jul 28, 2021
bb72c68
add TODO
topper-123 Jul 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rebase after #41472
  • Loading branch information
topper-123 committed Jul 31, 2021
commit b4be77d86a339d5d63933bd94e4faf5e907b47bb
1 change: 1 addition & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
RangeIndex,
Float64Index,
MultiIndex,
NumericIndex,
IntervalIndex,
TimedeltaIndex,
DatetimeIndex,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import warnings

from pandas.core.indexes.api import ( # noqa:F401
BaseNumericIndex,
CategoricalIndex,
DatetimeIndex,
Float64Index,
Expand All @@ -10,6 +9,7 @@
IntervalIndex,
MultiIndex,
NaT,
NumericIndex,
PeriodIndex,
RangeIndex,
TimedeltaIndex,
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pandas.core.indexes.interval import IntervalIndex
from pandas.core.indexes.multi import MultiIndex
from pandas.core.indexes.numeric import (
BaseNumericIndex,
Float64Index,
Int64Index,
NumericIndex,
Expand All @@ -46,7 +45,6 @@
"Index",
"MultiIndex",
"NumericIndex",
"BaseNumericIndex",
"Float64Index",
"Int64Index",
"CategoricalIndex",
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2441,16 +2441,17 @@ def _is_numeric_index(cls) -> bool:

Used to check if an operation should return NumericIndex or plain Index.
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should just be a property on the index itself no? why is this needed at all

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I've changed it to an attribute on the index classes.

from pandas.core.indexes.numeric import (
from pandas import (
Float64Index,
Int64Index,
NumericIndex,
RangeIndex,
UInt64Index,
)

if not issubclass(cls, NumericIndex):
return False
elif issubclass(cls, (Int64Index, UInt64Index, Float64Index)):
elif issubclass(cls, (RangeIndex, Int64Index, UInt64Index, Float64Index)):
return False
else:
return True
Expand Down
66 changes: 60 additions & 6 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,52 @@ def _ensure_dtype(
else:
return dtype

def __contains__(self, key) -> bool:
"""
Check if key is a float and has a decimal. If it has, return False.
"""
if not is_integer_dtype(self.dtype):
return super().__contains__(key)

hash(key)
try:
if is_float(key) and int(key) != key:
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
return False
return key in self._engine
except (OverflowError, TypeError, ValueError):
return False

@doc(Index.astype)
def astype(self, dtype, copy=True):
if is_float_dtype(self.dtype):
dtype = pandas_dtype(dtype)
if needs_i8_conversion(dtype):
raise TypeError(
f"Cannot convert Float64Index to dtype {dtype}; integer "
"values are required for conversion"
)
elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype):
# TODO(jreback); this can change once we have an EA Index type
# GH 13149
arr = astype_nansafe(self._values, dtype=dtype)
if isinstance(self, Float64Index):
return Int64Index(arr, name=self.name)
else:
return NumericIndex(arr, name=self.name, dtype=dtype)

return super().astype(dtype, copy=copy)

# ----------------------------------------------------------------
# Indexing Methods

@cache_readonly
@doc(Index._should_fallback_to_positional)
def _should_fallback_to_positional(self) -> bool:
return False
if self.inferred_type == "floating":
return False

return super()._should_fallback_to_positional()

@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str):
Expand All @@ -239,6 +278,21 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
# we will try to coerce to integers
return self._maybe_cast_indexer(label)

@doc(Index._convert_arr_indexer)
def _convert_arr_indexer(self, keyarr) -> np.ndarray:
if is_unsigned_integer_dtype(self.dtype):
# Cast the indexer to uint64 if possible so that the values returned
# from indexing are also uint64.
dtype = None
if is_integer_dtype(keyarr) or (
lib.infer_dtype(keyarr, skipna=False) == "integer"
):
dtype = np.dtype(np.uint64)

return com.asarray_tuplesafe(keyarr, dtype=dtype)

return super()._convert_arr_indexer(keyarr)

# ----------------------------------------------------------------

@doc(Index._shallow_copy)
Expand Down Expand Up @@ -270,13 +324,13 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
return is_numeric_dtype(dtype)

@classmethod
def _assert_safe_casting(cls, data, subarr):
def _assert_safe_casting(cls, data, subarr) -> None:
"""
Subclasses need to override this only if the process of casting data
from some accepted dtype to the internal dtype(s) bears the risk of
truncation (e.g. float to int).
Ensure incoming data can be represented with matching signed-ness.
"""
pass
if is_integer_dtype(subarr.dtype):
if not np.array_equal(data, subarr):
raise TypeError("Unsafe NumPy casting, you must explicitly cast")

@property
def _is_all_dates(self) -> bool:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import maybe_extract_name
from pandas.core.indexes.numeric import (
BaseNumericIndex,
Float64Index,
Int64Index,
NumericIndex,
)
from pandas.core.ops.common import unpack_zerodim_and_defer

Expand All @@ -57,7 +57,7 @@
_empty_range = range(0)


class RangeIndex(BaseNumericIndex):
class RangeIndex(NumericIndex):
"""
Immutable Index implementing a monotonic integer range.

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)
import pandas._testing as tm
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
from pandas.core.indexes.numeric import BaseNumericIndex
from pandas.core.indexes.numeric import NumericIndex


@pytest.mark.parametrize(
Expand Down Expand Up @@ -49,7 +49,7 @@ def test_numpy_ufuncs_basic(index, func):
with tm.external_error_raised((TypeError, AttributeError)):
with np.errstate(all="ignore"):
func(index)
elif isinstance(index, BaseNumericIndex):
elif isinstance(index, NumericIndex):
# coerces to float (e.g. np.sin)
with np.errstate(all="ignore"):
result = func(index)
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_numpy_ufuncs_other(index, func, request):
with tm.external_error_raised(TypeError):
func(index)

elif isinstance(index, BaseNumericIndex):
elif isinstance(index, NumericIndex):
# Results in bool array
result = func(index)
assert isinstance(result, np.ndarray)
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,7 @@

COMPATIBLE_INCONSISTENT_PAIRS = [
(np.float64, np.int64),
(np.float64, np.int32),
(np.float64, np.int16),
(np.float64, np.int8),
(np.float64, np.uint64),
(np.float64, np.uint32),
(np.float64, np.uint16),
(np.float64, np.uint8),
(np.float32, np.int64),
(np.float32, np.int32),
(np.float32, np.int16),
(np.float32, np.int8),
(np.float32, np.uint64),
(np.float32, np.uint32),
(np.float32, np.uint16),
(np.float32, np.uint8),
(np.float32, np.float64),
]


Expand Down