Skip to content

Commit a4a2aa5

Browse files
committed
ENH: Add NumIndex for indexic of any numeric type
1 parent cc75b68 commit a4a2aa5

File tree

19 files changed

+280
-26
lines changed

19 files changed

+280
-26
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ including other versions of pandas.
2323
Enhancements
2424
~~~~~~~~~~~~
2525

26+
.. _whatsnew_130.numindex:
27+
28+
NumIndex: New Index type than can hold all numpy numeric dtypes
29+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30+
31+
We've added :class:`NumIndex`, a index type whose dtype can be any of int64/32/16/8,
32+
uint64/32/16/8 or float64/32 numpy dtypes. For example:
33+
34+
.. ipython:: python
35+
36+
idx = pd.NumIndex(range(1_000), dtype="uint16")
37+
idx
38+
ser = pd.Series(range(1_000), index=idx)
39+
ser
40+
41+
To use ``NumIndex``, you need to instantiate it directly, as e.g.
42+
``pd.Index(range(1_000), dtype="uint16")`` will return a ``UInt64Index`` for reasons
43+
of backwards compatibility.
44+
45+
``NumIndex`` will become the default index for numeric indexes in Pandas 2.0 and
46+
``Int64Index``, ``UInt64Index`` and ``Float64Index`` will be deprecated in a
47+
future version of Pandas and removed in version 2.0.
48+
49+
2650
.. _whatsnew_130.read_csv_json_http_headers:
2751

2852
Custom HTTP(s) headers when reading csv or json files

pandas/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
UInt64Index,
7979
RangeIndex,
8080
Float64Index,
81+
NumIndex,
8182
MultiIndex,
8283
IntervalIndex,
8384
TimedeltaIndex,

pandas/_libs/join.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ ctypedef fused join_t:
263263
int16_t
264264
int32_t
265265
int64_t
266+
uint8_t
267+
uint16_t
268+
uint32_t
266269
uint64_t
267270

268271

pandas/_testing/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
is_period_dtype,
3434
is_sequence,
3535
is_timedelta64_dtype,
36+
pandas_dtype,
3637
)
3738

3839
import pandas as pd
@@ -44,6 +45,7 @@
4445
Index,
4546
IntervalIndex,
4647
MultiIndex,
48+
NumIndex,
4749
RangeIndex,
4850
Series,
4951
bdate_range,
@@ -309,6 +311,20 @@ def makeFloatIndex(k=10, name=None):
309311
return Index(values * (10 ** np.random.randint(0, 9)), name=name)
310312

311313

314+
def makeNumIndex(k=10, name=None, *, dtype):
315+
dtype = pandas_dtype(dtype)
316+
if dtype.kind == "i":
317+
return NumIndex(list(range(k)), name=name, dtype=dtype)
318+
elif dtype.kind == "u":
319+
start_num = 2 ** (dtype.itemsize * 8 - 1)
320+
return NumIndex([start_num + i for i in range(k)], name=name, dtype=dtype)
321+
elif dtype.kind == "f":
322+
values = sorted(np.random.random_sample(k)) - np.random.random_sample(1)
323+
return NumIndex(values, name=name, dtype=dtype)
324+
else:
325+
raise NotImplementedError()
326+
327+
312328
def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:
313329
dt = datetime(2000, 1, 1)
314330
dr = bdate_range(dt, periods=k, freq=freq, name=name)

pandas/_testing/asserters.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,9 @@ def assert_index_equal(
265265
right : Index
266266
exact : bool or {'equiv'}, default 'equiv'
267267
Whether to check the Index class, dtype and inferred_type
268-
are identical. If 'equiv', then RangeIndex can be substituted for
269-
Int64Index as well.
268+
are identical. If 'equiv', RangeIndex can be substituted for
269+
Int64Index and signed integer dtypes will be equivalent to each other, unsigned
270+
integer to each other and float dtypes equivalent to each other.
270271
check_names : bool, default True
271272
Whether to check the names attribute.
272273
check_less_precise : bool or int, default False
@@ -313,7 +314,10 @@ def _check_types(left, right, obj="Index"):
313314
assert_class_equal(left, right, exact=exact, obj=obj)
314315

315316
# Skip exact dtype checking when `check_categorical` is False
316-
if check_categorical:
317+
if check_categorical and "categorical" in (
318+
left.inferred_type,
319+
right.inferred_type,
320+
):
317321
assert_attr_equal("dtype", left, right, obj=obj)
318322

319323
# allow string-like to have different inferred_types

pandas/conftest.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,16 @@ def _create_mi_with_dt64tz_level():
471471
"uint": tm.makeUIntIndex(100),
472472
"range": tm.makeRangeIndex(100),
473473
"float": tm.makeFloatIndex(100),
474+
"num_int64": tm.makeNumIndex(100, dtype="int64"),
475+
"num_int32": tm.makeNumIndex(100, dtype="int32"),
476+
"num_int16": tm.makeNumIndex(100, dtype="int16"),
477+
"num_int8": tm.makeNumIndex(100, dtype="int8"),
478+
"num_uint64": tm.makeNumIndex(100, dtype="uint64"),
479+
"num_uint32": tm.makeNumIndex(100, dtype="uint32"),
480+
"num_uint16": tm.makeNumIndex(100, dtype="uint16"),
481+
"num_uint8": tm.makeNumIndex(100, dtype="uint8"),
482+
"num_float64": tm.makeNumIndex(100, dtype="float64"),
483+
"num_float32": tm.makeNumIndex(100, dtype="float32"),
474484
"bool": tm.makeBoolIndex(10),
475485
"categorical": tm.makeCategoricalIndex(100),
476486
"interval": tm.makeIntervalIndex(100),
@@ -522,7 +532,10 @@ def index_flat(request):
522532
params=[
523533
key
524534
for key in indices_dict
525-
if key not in ["int", "uint", "range", "empty", "repeats"]
535+
if not (
536+
key in ["int", "uint", "range", "empty", "repeats"]
537+
or key.startswith("num_")
538+
)
526539
and not isinstance(indices_dict[key], MultiIndex)
527540
]
528541
)

pandas/core/algorithms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
143143
# until our algos support uint8 directly (see TODO)
144144
return np.asarray(values).astype("uint64"), np.dtype("bool")
145145
elif is_signed_integer_dtype(values):
146-
return ensure_int64(values), np.dtype("int64")
146+
return ensure_int64(values), values.dtype
147147
elif is_unsigned_integer_dtype(values):
148-
return ensure_uint64(values), np.dtype("uint64")
148+
return ensure_uint64(values), values.dtype
149149
elif is_float_dtype(values):
150-
return ensure_float64(values), np.dtype("float64")
150+
return ensure_float64(values), values.dtype
151151
elif is_complex_dtype(values):
152152

153153
# ignore the fact that we are casting to float

pandas/core/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
Int64Index,
5858
IntervalIndex,
5959
MultiIndex,
60+
NumIndex,
6061
PeriodIndex,
6162
RangeIndex,
6263
TimedeltaIndex,

pandas/core/dtypes/generic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def _check(cls, inst) -> bool:
100100
"rangeindex",
101101
"float64index",
102102
"uint64index",
103+
"numindex",
103104
"multiindex",
104105
"datetimeindex",
105106
"timedeltaindex",

pandas/core/indexes/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
Float64Index,
2626
Int64Index,
2727
NumericIndex,
28+
NumIndex,
2829
UInt64Index,
2930
)
3031
from pandas.core.indexes.period import PeriodIndex
@@ -46,6 +47,7 @@
4647
__all__ = [
4748
"Index",
4849
"MultiIndex",
50+
"NumIndex",
4951
"NumericIndex",
5052
"Float64Index",
5153
"Int64Index",

0 commit comments

Comments
 (0)