Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: fastpaths in is_foo_dtype checks #33400

Merged
merged 5 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
"Timedelta",
"Timestamp",
"iNaT",
"Interval",
]


from pandas._libs.interval import Interval
from pandas._libs.tslibs import (
NaT,
NaTType,
Expand Down
28 changes: 16 additions & 12 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,15 @@ from pandas._libs.khash cimport (
kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts)

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_integer_dtype, is_float_dtype,
is_bool_dtype, is_object_dtype,
is_datetime64_dtype,
pandas_dtype, is_extension_array_dtype)
is_cat_dtype,
is_integer_dtype,
is_float_dtype,
is_bool_dtype,
is_object_dtype,
is_dt64_dtype,
pandas_dtype,
is_ea_dtype,
)
from pandas.core.arrays import Categorical
from pandas.core.dtypes.concat import union_categoricals
import pandas.io.common as icom
Expand Down Expand Up @@ -1064,7 +1068,7 @@ cdef class TextReader:

# don't try to upcast EAs
try_upcast = upcast_na and na_count > 0
if try_upcast and not is_extension_array_dtype(col_dtype):
if try_upcast and not is_ea_dtype(col_dtype):
col_res = _maybe_upcast(col_res)

if col_res is None:
Expand Down Expand Up @@ -1140,7 +1144,7 @@ cdef class TextReader:
bint user_dtype,
kh_str_starts_t *na_hashset,
object na_flist):
if is_categorical_dtype(dtype):
if is_cat_dtype(dtype):
# TODO: I suspect that _categorical_convert could be
# optimized when dtype is an instance of CategoricalDtype
codes, cats, na_count = _categorical_convert(
Expand All @@ -1153,7 +1157,7 @@ cdef class TextReader:
cats, codes, dtype, true_values=true_values)
return cat, na_count

elif is_extension_array_dtype(dtype):
elif is_ea_dtype(dtype):
result, na_count = self._string_convert(i, start, end, na_filter,
na_hashset)
array_type = dtype.construct_array_type()
Expand Down Expand Up @@ -1223,7 +1227,7 @@ cdef class TextReader:
elif is_object_dtype(dtype):
return self._string_convert(i, start, end, na_filter,
na_hashset)
elif is_datetime64_dtype(dtype):
elif is_dt64_dtype(dtype):
raise TypeError(f"the dtype {dtype} is not supported "
f"for parsing, pass this column "
f"using parse_dates instead")
Expand Down Expand Up @@ -2035,19 +2039,19 @@ def _concatenate_chunks(list chunks):
arrs = [chunk.pop(name) for chunk in chunks]
# Check each arr for consistent types.
dtypes = {a.dtype for a in arrs}
numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
numpy_dtypes = {x for x in dtypes if not is_cat_dtype(x)}
if len(numpy_dtypes) > 1:
common_type = np.find_common_type(numpy_dtypes, [])
if common_type == np.object:
warning_columns.append(str(name))

dtype = dtypes.pop()
if is_categorical_dtype(dtype):
if is_cat_dtype(dtype):
sort_categories = isinstance(dtype, str)
result[name] = union_categoricals(arrs,
sort_categories=sort_categories)
else:
if is_extension_array_dtype(dtype):
if is_ea_dtype(dtype):
array_type = dtype.construct_array_type()
result[name] = array_type._concat_same_type(arrs)
else:
Expand Down
37 changes: 36 additions & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas._libs import algos
from pandas._libs import Interval, Period, algos
from pandas._libs.tslibs import conversion
from pandas._typing import ArrayLike, DtypeObj

Expand Down Expand Up @@ -1522,6 +1522,41 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None


def is_ea_dtype(dtype) -> bool:
return isinstance(dtype, ExtensionDtype)


def is_dt64_dtype(dtype) -> bool:
return isinstance(dtype, np.dtype) and dtype.kind == "M"


def is_dt64tz_dtype(dtype) -> bool:
return isinstance(dtype, ExtensionDtype) and dtype.kind == "M"


def is_dt64_any_dtype(dtype) -> bool:
return isinstance(dtype, (np.dtype, ExtensionDtype)) and dtype.kind == "M"


def is_td64_dtype(dtype) -> bool:
return isinstance(dtype, np.dtype) and dtype.kind == "m"


def is_period_dtype_obj(dtype) -> bool:
return isinstance(dtype, ExtensionDtype) and dtype.type is Period


def is_interval_dtype_obj(dtype) -> bool:
return isinstance(dtype, ExtensionDtype) and dtype.type is Interval


def is_cat_dtype(dtype) -> bool:
"""
Check if we have a CategoricalDtype object.
"""
return isinstance(dtype, ExtensionDtype) and dtype.name == "category"


def is_complex_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a complex dtype.
Expand Down