Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add first-class dtype utilities #8308

Merged
merged 31 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e5ab00c
Create new types module that aliases pandas.api.types.
vyasr May 19, 2021
5586652
Move is_categorical_dtype to new location.
vyasr May 19, 2021
bfe2723
Move a bunch more functions into the new module.
vyasr May 19, 2021
8727754
Add docstrings and apply pydocstyle.
vyasr May 20, 2021
65e0198
Add tests of is_categorical_dtype.
vyasr May 20, 2021
8b3b822
Add tests of is_numeric_dtype and is_integer_dtype and fix bugs.
vyasr May 20, 2021
52786bb
Test is_integer.
vyasr May 20, 2021
0e852e9
Add more systematic list of test cases.
vyasr May 20, 2021
6f6e05e
Import IntervalDtype into top-level namespace.
vyasr May 20, 2021
0933c13
Test is_string_dtype.
vyasr May 20, 2021
bbfdeae
Test datetime using simple wrapper of pandas.
vyasr May 20, 2021
eb4298e
Use wrapper for is_integer_dtype.
vyasr May 20, 2021
d5648fc
Add tests of cudf types.
vyasr May 20, 2021
a889711
Add explicit test of pandas agreement and do some cleanup.
vyasr May 20, 2021
e154d4e
Simplify is_scalar.
vyasr May 20, 2021
6911063
Address some obvious test failures, either by fixing or with TODOs wh…
vyasr May 20, 2021
5032eef
Clean up some comments.
vyasr May 20, 2021
380f44b
Combine previously introduced types API with current new more compreh…
vyasr Jun 8, 2021
f1b00c5
Import annotations.
vyasr Jun 8, 2021
6b88a23
Define a new function for internal use to distinguish decimals from o…
vyasr Jun 8, 2021
e05c2bc
Fix docstring to satisfy pydocstyle.
vyasr Jun 8, 2021
2c966ed
Partial replacement of usage of pd.api.types.
vyasr Jun 8, 2021
719274d
Simplify is_categorical_dtype.
vyasr Jun 8, 2021
c8b62aa
Change is_scalar to False for 0d arrays, and add a new utility for in…
vyasr Jun 9, 2021
b097c2c
Exclude interval from struct tests for now.
vyasr Jun 9, 2021
7e36d09
Remove more uses of pd.api.types.
vyasr Jun 9, 2021
f63146e
Remove aliasing.
vyasr Jun 9, 2021
afbdfb2
Merge remote-tracking branch 'origin/branch-21.08' into refactor/type…
vyasr Jun 14, 2021
55a47ef
Merge remote-tracking branch 'origin/branch-21.08' into refactor/type…
vyasr Jun 15, 2021
d2b8710
Fix circular import issues.
vyasr Jun 15, 2021
e61765a
Merge branch 'refactor/type_apis' of github.com:vyasr/cudf into refac…
vyasr Jun 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add tests of is_numeric_dtype and is_integer_dtype and fix bugs.
  • Loading branch information
vyasr committed Jun 8, 2021
commit 8b3b82261fa85d99a834d631e0670edffe9770ab
36 changes: 24 additions & 12 deletions python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import datetime as dt
from collections.abc import Sequence
from inspect import isclass
from numbers import Number

import cupy as cp
Expand All @@ -16,6 +17,7 @@

import cudf
from cudf._lib.scalar import DeviceScalar
from cudf.core.dtypes import _BaseDtype


def is_categorical_dtype(obj):
Expand Down Expand Up @@ -92,13 +94,21 @@ def is_numeric_dtype(obj):
bool
Whether or not the array or dtype is of a numeric dtype.
"""
# TODO: we should handle objects with a `.dtype` attribute,
# e.g., arrays, here.
try:
dtype = np.dtype(obj)
except TypeError:
return False
return dtype.kind in "biuf"
if isclass(obj):
if issubclass(obj, cudf.Decimal64Dtype):
return True
if issubclass(obj, _BaseDtype):
return False
else:
if isinstance(obj, cudf.Decimal64Dtype) or isinstance(
getattr(obj, "dtype", None), cudf.Decimal64Dtype
):
return True
if isinstance(obj, _BaseDtype) or isinstance(
getattr(obj, "dtype", None), _BaseDtype
):
return False
return pd_types.is_numeric_dtype(obj)


"""
Expand Down Expand Up @@ -128,11 +138,13 @@ def is_integer_dtype(obj):
bool
Whether or not the array or dtype is of an integer dtype.
"""
try:
dtype = np.dtype(obj)
except TypeError:
return pd.api.types.is_integer_dtype(obj)
return dtype.kind in "iu"
if (
(isclass(obj) and issubclass(obj, _BaseDtype))
or isinstance(obj, _BaseDtype)
or isinstance(getattr(obj, "dtype", None), _BaseDtype)
):
return False
return pd.api.types.is_integer_dtype(obj)


def is_integer(obj):
Expand Down
94 changes: 94 additions & 0 deletions python/cudf/cudf/tests/test_api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
"obj, expect",
(
(True, False),
(bool, False),
(np.bool_, False),
(np.int_, False),
(np.float64, False),
(complex, False),
(np.complex128, False),
(bool(1), False),
(np.bool_(1), False),
(np.int_(1), False),
(np.float64(1), False),
Expand Down Expand Up @@ -50,3 +52,95 @@
)
def test_is_categorical_dtype(obj, expect):
assert types.is_categorical_dtype(obj) == expect


@pytest.mark.parametrize(
"obj, expect",
(
(True, False),
(bool, True),
(np.bool_, True),
(np.int_, True),
(np.float64, True),
(complex, True),
(np.complex128, True),
(bool(1), False),
(np.bool_(1), True),
(np.int_(1), True),
(np.float64(1), True),
(complex(1), False),
(np.complex128(1), True),
(np.dtype("int"), True),
(np.dtype("float"), True),
(np.dtype("complex"), True),
(np.datetime64("2005-02-25T03:30"), False),
(cudf.Series(["2005-02-25T03:30"], dtype="datetime64[s]"), False),
(cudf.Series([1000], dtype="timedelta64[s]"), False),
(pd.Series(["2005-02-25T03:30"], dtype="datetime64[s]"), False),
(1, False),
(1.0, False),
("hello", False),
(cudf.Series("hello"), False),
(cudf.Series("hello"), False),
(cudf.CategoricalDtype, False),
(cudf.CategoricalDtype("a"), False),
(cudf.Series(["a"], dtype="category"), False),
(cudf.Series([1, 2], dtype=cudf.Decimal64Dtype(5, 2)), True),
(cudf.Decimal64Dtype(5, 2), True),
(cudf.Decimal64Dtype, True),
(pd.core.dtypes.dtypes.CategoricalDtypeType, False),
(pd.CategoricalDtype, False),
(cudf.Series([[1, 2], [3, 4, 5]]), False),
(cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
(cudf.StructDtype, False),
(cudf.ListDtype, False),
),
)
def test_is_numeric_dtype(obj, expect):
assert types.is_numeric_dtype(obj) == expect


@pytest.mark.parametrize(
"obj, expect",
(
(True, False),
(bool, False),
(np.bool_, False),
(np.int_, True),
(np.float64, False),
(complex, False),
(np.complex128, False),
(bool(1), False),
(np.bool_(1), False),
(np.int_(1), True),
(np.float64(1), False),
(complex(1), False),
(np.complex128(1), False),
(np.dtype("int"), True),
(np.dtype("float"), False),
(np.dtype("complex"), False),
(np.datetime64("2005-02-25T03:30"), False),
(cudf.Series(["2005-02-25T03:30"], dtype="datetime64[s]"), False),
(cudf.Series([1000], dtype="timedelta64[s]"), False),
(pd.Series(["2005-02-25T03:30"], dtype="datetime64[s]"), False),
(1, False),
(1.0, False),
("hello", False),
(cudf.Series("hello"), False),
(cudf.Series("hello"), False),
(cudf.CategoricalDtype, False),
(cudf.CategoricalDtype("a"), False),
(cudf.Series(["a"], dtype="category"), False),
(cudf.Series([1, 2], dtype=cudf.Decimal64Dtype(5, 2)), False),
(cudf.Decimal64Dtype(5, 2), False),
(cudf.Decimal64Dtype, False),
(pd.core.dtypes.dtypes.CategoricalDtypeType, False),
(pd.CategoricalDtype, False),
(cudf.Series([[1, 2], [3, 4, 5]]), False),
(cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
(cudf.StructDtype, False),
(cudf.ListDtype, False),
),
)
def test_is_integer_dtype(obj, expect):
assert types.is_integer_dtype(obj) == expect