Skip to content

Commit

Permalink
PERF: lib.Validator iteration (pandas-dev#44495)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Nov 17, 2021
1 parent 0e8ff31 commit 6240b1f
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 8 deletions.
18 changes: 14 additions & 4 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1704,10 +1704,15 @@ cdef class Validator:
cdef bint _validate(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
Py_ssize_t n = values.size
flatiter it = PyArray_IterNew(values)

for i in range(n):
if not self.is_valid(values[i]):
# The PyArray_GETITEM and PyArray_ITER_NEXT are faster
# equivalents to `val = values[i]`
val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
PyArray_ITER_NEXT(it)
if not self.is_valid(val):
return False

return True
Expand All @@ -1717,10 +1722,15 @@ cdef class Validator:
cdef bint _validate_skipna(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
Py_ssize_t n = values.size
flatiter it = PyArray_IterNew(values)

for i in range(n):
if not self.is_valid_skipna(values[i]):
# The PyArray_GETITEM and PyArray_ITER_NEXT are faster
# equivalents to `val = values[i]`
val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
PyArray_ITER_NEXT(it)
if not self.is_valid_skipna(val):
return False

return True
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,7 @@ def __init__(self, values, copy=False):

def _validate(self):
"""Validate that we only store NA or strings."""
if len(self._ndarray) and not lib.is_string_array(
self._ndarray.ravel("K"), skipna=True
):
if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
raise ValueError("StringArray requires a sequence of strings or pandas.NA")
if self._ndarray.dtype != "object":
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,5 +447,5 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
if dtype == np.dtype(bool):
return True
elif dtype == np.dtype("object"):
return lib.is_bool_array(arr.ravel("K"))
return lib.is_bool_array(arr)
return False
2 changes: 2 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,9 +1429,11 @@ def test_other_dtypes_for_array(self, func):
func = getattr(lib, func)
arr = np.array(["foo", "bar"])
assert not func(arr)
assert not func(arr.reshape(2, 1))

arr = np.array([1, 2])
assert not func(arr)
assert not func(arr.reshape(2, 1))

def test_date(self):

Expand Down

0 comments on commit 6240b1f

Please sign in to comment.