Skip to content

CLN: assorted cleanups #29314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Nov 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a1da4cf
CLN
jbrockmendel Oct 29, 2019
26fb16e
CLN
jbrockmendel Oct 29, 2019
250d035
types
jbrockmendel Oct 29, 2019
e1871ca
use memoryview
jbrockmendel Oct 29, 2019
38d5d1b
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 29, 2019
aec1426
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 29, 2019
1d31843
REF to suppress build warnings
jbrockmendel Oct 29, 2019
f6ad22a
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 29, 2019
02c08cc
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 29, 2019
e84e145
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 29, 2019
05fe729
revert edits in build_warns
jbrockmendel Oct 29, 2019
fee9f09
cleanup
jbrockmendel Oct 29, 2019
ed40c6c
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 30, 2019
9b97c33
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 30, 2019
1cd954f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 30, 2019
f545feb
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 31, 2019
85400f6
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 31, 2019
37d0c0e
add types
jbrockmendel Oct 31, 2019
f4b45b1
docstring fixups
jbrockmendel Oct 31, 2019
a5154c4
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 31, 2019
e6284dd
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 31, 2019
366bef1
BLD: prevent build warnings
jbrockmendel Oct 31, 2019
c27fdf0
CLN: remove leftover file, implement helper
jbrockmendel Nov 1, 2019
5c2b43b
privatize
jbrockmendel Nov 1, 2019
83c26a1
lint fixup
jbrockmendel Nov 1, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 26 additions & 61 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -379,28 +379,34 @@ ctypedef fused algos_t:
uint8_t


def _validate_limit(nobs: int, limit=None) -> int:
if limit is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, next pass

lim = nobs
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit

return lim


@cython.boundscheck(False)
@cython.wraparound(False)
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
algos_t cur, next
algos_t cur, next_val
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer[:] = -1

if limit is None:
lim = nright
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(nright, limit)

if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
return indexer
Expand All @@ -426,9 +432,9 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
j += 1
break

next = old[i + 1]
next_val = old[i + 1]

while j < nright and cur <= new[j] < next:
while j < nright and cur <= new[j] < next_val:
if new[j] == cur:
indexer[j] = i
elif fill_count < lim:
Expand All @@ -438,16 +444,14 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

fill_count = 0
i += 1
cur = next
cur = next_val

return indexer


@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
cdef:
Py_ssize_t i, N
algos_t val
Expand All @@ -459,14 +463,7 @@ def pad_inplace(algos_t[:] values,
if N == 0:
return

if limit is None:
lim = N
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(N, limit)

val = values[0]
for i in range(N):
Expand All @@ -482,9 +479,7 @@ def pad_inplace(algos_t[:] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
limit=None):
def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
cdef:
Py_ssize_t i, j, N, K
algos_t val
Expand All @@ -496,14 +491,7 @@ def pad_2d_inplace(algos_t[:, :] values,
if N == 0:
return

if limit is None:
lim = N
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(N, limit)

for j in range(K):
fill_count = 0
Expand Down Expand Up @@ -559,14 +547,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
indexer = np.empty(nright, dtype=np.int64)
indexer[:] = -1

if limit is None:
lim = nright
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(nright, limit)

if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
return indexer
Expand Down Expand Up @@ -612,9 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
cdef:
Py_ssize_t i, N
algos_t val
Expand All @@ -626,14 +605,7 @@ def backfill_inplace(algos_t[:] values,
if N == 0:
return

if limit is None:
lim = N
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(N, limit)

val = values[N - 1]
for i in range(N - 1, -1, -1):
Expand Down Expand Up @@ -663,14 +635,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
if N == 0:
return

if limit is None:
lim = N
else:
if not util.is_integer_object(limit):
raise ValueError('Limit must be an integer')
if limit < 1:
raise ValueError('Limit must be greater than 0')
lim = limit
lim = _validate_limit(N, limit)

for j in range(K):
fill_count = 0
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def diff_2d(ndarray[diff_t, ndim=2] arr,
ndarray[out_t, ndim=2] out,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy
Py_ssize_t i, j, sx, sy, start, stop

# Disable for unsupported dtype combinations,
# see https://github.com/cython/cython/issues/2646
Expand Down
6 changes: 0 additions & 6 deletions pandas/_libs/groupby.pxd

This file was deleted.

7 changes: 7 additions & 0 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ _int64_max = np.iinfo(np.int64).max

cdef float64_t NaN = <float64_t>np.NaN

cdef enum InterpolationEnumType:
INTERPOLATION_LINEAR,
INTERPOLATION_LOWER,
INTERPOLATION_HIGHER,
INTERPOLATION_NEAREST,
INTERPOLATION_MIDPOINT


cdef inline float64_t median_linear(float64_t* a, int n) nogil:
cdef:
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -698,8 +698,7 @@ def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,

@cython.boundscheck(False)
@cython.wraparound(False)
def get_level_sorter(const int64_t[:] label,
const int64_t[:] starts):
def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
"""
argsort for a single level of a multi-index, keeping the order of higher
levels unchanged. `starts` points to starts of same-key indices w.r.t
Expand Down Expand Up @@ -1677,6 +1676,7 @@ cpdef bint is_datetime64_array(ndarray values):
return validator.validate(values)


# TODO: only non-here use is in test
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so, I'm okay with removing in this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Poor wording on my part. This function is used inside lib.pyx, but its only non-cython use in in tests. So the option is to make it cdef instead of cpdef at the cost of not testing it directly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay. Thanks for clarifying that.

def is_datetime_with_singletz_array(values: ndarray) -> bool:
"""
Check values have the same tzinfo attribute.
Expand Down Expand Up @@ -1720,6 +1720,7 @@ cdef class AnyTimedeltaValidator(TimedeltaValidator):
return is_timedelta(value)


# TODO: only non-here use is in test
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same

cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
""" infer with timedeltas and/or nat/none """
cdef:
Expand Down
10 changes: 6 additions & 4 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ cdef class TextReader:
object true_values, false_values
object handle
bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns
int64_t parser_start
uint64_t parser_start
list clocks
char *c_encoding
kh_str_starts_t *false_set
Expand Down Expand Up @@ -710,11 +710,11 @@ cdef class TextReader:
# header is now a list of lists, so field_count should use header[0]

cdef:
Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
Py_ssize_t i, start, field_count, passed_count, unnamed_count
char *word
object name, old_name
int status
int64_t hr, data_line
uint64_t hr, data_line
char *errors = "strict"
StringPath path = _string_path(self.c_encoding)

Expand Down Expand Up @@ -1015,12 +1015,14 @@ cdef class TextReader:
else:
end = min(start + rows, self.parser.lines)

# FIXME: dont leave commented-out
# # skip footer
# if footer > 0:
# end -= footer

num_cols = -1
for i in range(self.parser.lines):
# Py_ssize_t cast prevents build warning
for i in range(<Py_ssize_t>self.parser.lines):
num_cols = (num_cols < self.parser.line_fields[i]) * \
self.parser.line_fields[i] + \
(num_cols >= self.parser.line_fields[i]) * num_cols
Expand Down
Loading