Skip to content

Commit 8d54acf

Browse files
andersy005Illviljanpre-commit-ci[bot]dcherian
authored
copy the dtypes module to the namedarray package. (#8250)
* move dtypes module to namedarray * keep original dtypes * revert utils changes * Update xarray/namedarray/dtypes.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix missing imports * update typing * fix return types * type fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * type fixes --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 36fe917 commit 8d54acf

File tree

3 files changed

+227
-1
lines changed

3 files changed

+227
-1
lines changed

xarray/namedarray/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def imag(self) -> Self:
267267
"""
268268
return self._replace(data=self.data.imag)
269269

270-
def __dask_tokenize__(self) -> Hashable | None:
270+
def __dask_tokenize__(self) -> Hashable:
271271
# Use v.data, instead of v._data, in order to cope with the wrappers
272272
# around NetCDF and the like
273273
from dask.base import normalize_token

xarray/namedarray/dtypes.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
from __future__ import annotations
2+
3+
import functools
4+
import sys
5+
from typing import Any, Literal
6+
7+
if sys.version_info >= (3, 10):
8+
from typing import TypeGuard
9+
else:
10+
from typing_extensions import TypeGuard
11+
12+
import numpy as np
13+
14+
from xarray.namedarray import utils
15+
16+
# Use as a sentinel value to indicate a dtype appropriate NA value.
17+
NA = utils.ReprObject("<NA>")
18+
19+
20+
@functools.total_ordering
21+
class AlwaysGreaterThan:
22+
def __gt__(self, other: Any) -> Literal[True]:
23+
return True
24+
25+
def __eq__(self, other: Any) -> bool:
26+
return isinstance(other, type(self))
27+
28+
29+
@functools.total_ordering
30+
class AlwaysLessThan:
31+
def __lt__(self, other: Any) -> Literal[True]:
32+
return True
33+
34+
def __eq__(self, other: Any) -> bool:
35+
return isinstance(other, type(self))
36+
37+
38+
# Equivalence to np.inf (-np.inf) for object-type
39+
INF = AlwaysGreaterThan()
40+
NINF = AlwaysLessThan()
41+
42+
43+
# Pairs of types that, if both found, should be promoted to object dtype
44+
# instead of following NumPy's own type-promotion rules. These type promotion
45+
# rules match pandas instead. For reference, see the NumPy type hierarchy:
46+
# https://numpy.org/doc/stable/reference/arrays.scalars.html
47+
PROMOTE_TO_OBJECT: tuple[tuple[type[np.generic], type[np.generic]], ...] = (
48+
(np.number, np.character), # numpy promotes to character
49+
(np.bool_, np.character), # numpy promotes to character
50+
(np.bytes_, np.str_), # numpy promotes to unicode
51+
)
52+
53+
54+
def maybe_promote(dtype: np.dtype[np.generic]) -> tuple[np.dtype[np.generic], Any]:
55+
"""Simpler equivalent of pandas.core.common._maybe_promote
56+
57+
Parameters
58+
----------
59+
dtype : np.dtype
60+
61+
Returns
62+
-------
63+
dtype : Promoted dtype that can hold missing values.
64+
fill_value : Valid missing value for the promoted dtype.
65+
"""
66+
# N.B. these casting rules should match pandas
67+
dtype_: np.typing.DTypeLike
68+
fill_value: Any
69+
if np.issubdtype(dtype, np.floating):
70+
dtype_ = dtype
71+
fill_value = np.nan
72+
elif np.issubdtype(dtype, np.timedelta64):
73+
# See https://github.com/numpy/numpy/issues/10685
74+
# np.timedelta64 is a subclass of np.integer
75+
# Check np.timedelta64 before np.integer
76+
fill_value = np.timedelta64("NaT")
77+
dtype_ = dtype
78+
elif np.issubdtype(dtype, np.integer):
79+
dtype_ = np.float32 if dtype.itemsize <= 2 else np.float64
80+
fill_value = np.nan
81+
elif np.issubdtype(dtype, np.complexfloating):
82+
dtype_ = dtype
83+
fill_value = np.nan + np.nan * 1j
84+
elif np.issubdtype(dtype, np.datetime64):
85+
dtype_ = dtype
86+
fill_value = np.datetime64("NaT")
87+
else:
88+
dtype_ = object
89+
fill_value = np.nan
90+
91+
dtype_out = np.dtype(dtype_)
92+
fill_value = dtype_out.type(fill_value)
93+
return dtype_out, fill_value
94+
95+
96+
NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype}
97+
98+
99+
def get_fill_value(dtype: np.dtype[np.generic]) -> Any:
100+
"""Return an appropriate fill value for this dtype.
101+
102+
Parameters
103+
----------
104+
dtype : np.dtype
105+
106+
Returns
107+
-------
108+
fill_value : Missing value corresponding to this dtype.
109+
"""
110+
_, fill_value = maybe_promote(dtype)
111+
return fill_value
112+
113+
114+
def get_pos_infinity(
115+
dtype: np.dtype[np.generic], max_for_int: bool = False
116+
) -> float | complex | AlwaysGreaterThan:
117+
"""Return an appropriate positive infinity for this dtype.
118+
119+
Parameters
120+
----------
121+
dtype : np.dtype
122+
max_for_int : bool
123+
Return np.iinfo(dtype).max instead of np.inf
124+
125+
Returns
126+
-------
127+
fill_value : positive infinity value corresponding to this dtype.
128+
"""
129+
if issubclass(dtype.type, np.floating):
130+
return np.inf
131+
132+
if issubclass(dtype.type, np.integer):
133+
return np.iinfo(dtype.type).max if max_for_int else np.inf
134+
if issubclass(dtype.type, np.complexfloating):
135+
return np.inf + 1j * np.inf
136+
137+
return INF
138+
139+
140+
def get_neg_infinity(
141+
dtype: np.dtype[np.generic], min_for_int: bool = False
142+
) -> float | complex | AlwaysLessThan:
143+
"""Return an appropriate positive infinity for this dtype.
144+
145+
Parameters
146+
----------
147+
dtype : np.dtype
148+
min_for_int : bool
149+
Return np.iinfo(dtype).min instead of -np.inf
150+
151+
Returns
152+
-------
153+
fill_value : positive infinity value corresponding to this dtype.
154+
"""
155+
if issubclass(dtype.type, np.floating):
156+
return -np.inf
157+
158+
if issubclass(dtype.type, np.integer):
159+
return np.iinfo(dtype.type).min if min_for_int else -np.inf
160+
if issubclass(dtype.type, np.complexfloating):
161+
return -np.inf - 1j * np.inf
162+
163+
return NINF
164+
165+
166+
def is_datetime_like(
167+
dtype: np.dtype[np.generic],
168+
) -> TypeGuard[np.datetime64 | np.timedelta64]:
169+
"""Check if a dtype is a subclass of the numpy datetime types"""
170+
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
171+
172+
173+
def result_type(
174+
*arrays_and_dtypes: np.typing.ArrayLike | np.typing.DTypeLike,
175+
) -> np.dtype[np.generic]:
176+
"""Like np.result_type, but with type promotion rules matching pandas.
177+
178+
Examples of changed behavior:
179+
number + string -> object (not string)
180+
bytes + unicode -> object (not unicode)
181+
182+
Parameters
183+
----------
184+
*arrays_and_dtypes : list of arrays and dtypes
185+
The dtype is extracted from both numpy and dask arrays.
186+
187+
Returns
188+
-------
189+
numpy.dtype for the result.
190+
"""
191+
types = {np.result_type(t).type for t in arrays_and_dtypes}
192+
193+
for left, right in PROMOTE_TO_OBJECT:
194+
if any(issubclass(t, left) for t in types) and any(
195+
issubclass(t, right) for t in types
196+
):
197+
return np.dtype(object)
198+
199+
return np.result_type(*arrays_and_dtypes)

xarray/namedarray/utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import importlib
44
import sys
5+
from collections.abc import Hashable
56
from enum import Enum
67
from typing import TYPE_CHECKING, Any, Final, Protocol, TypeVar
78

@@ -134,3 +135,29 @@ def to_0d_object_array(
134135
result = np.empty((), dtype=object)
135136
result[()] = value
136137
return result
138+
139+
140+
class ReprObject:
141+
"""Object that prints as the given value, for use with sentinel values."""
142+
143+
__slots__ = ("_value",)
144+
145+
_value: str
146+
147+
def __init__(self, value: str):
148+
self._value = value
149+
150+
def __repr__(self) -> str:
151+
return self._value
152+
153+
def __eq__(self, other: ReprObject | Any) -> bool:
154+
# TODO: What type can other be? ArrayLike?
155+
return self._value == other._value if isinstance(other, ReprObject) else False
156+
157+
def __hash__(self) -> int:
158+
return hash((type(self), self._value))
159+
160+
def __dask_tokenize__(self) -> Hashable:
161+
from dask.base import normalize_token
162+
163+
return normalize_token((type(self), self._value)) # type: ignore[no-any-return]

0 commit comments

Comments
 (0)