-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Rolling rank #43338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
ENH: Rolling rank #43338
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
3ebf8c0
ENH: rolling rank
gsiano ce754f7
ENH: rolling rank
gsiano f13a720
Merge branch 'rolling_rank' of github.com:gsiano/pandas into rolling_…
gsiano 874c980
ENH: rolling rank
gsiano 4d06ba3
ENH: rolling rank
gsiano 1308208
ENH: rolling rank
gsiano 4caa51b
ENH: rolling rank
gsiano f2ee5b2
ENH: rolling rank - rank methods
gsiano b135f1e
ENH: rolling rank - `ascending` flag
gsiano fda85b4
ENH: rolling rank
gsiano e692ce3
ENH: rolling rank - reorder parameter list
gsiano 6b23fc0
ENH: rolling rank - address pre-commit errors
gsiano 5f7d319
ENH: rolling rank
gsiano 63d37c5
ENH: rolling rank - fix pre-commit errors
gsiano ba468c6
ENH: rolling rank
gsiano e078119
Merge branch 'master' into rolling_rank
gsiano bb7005f
ENH: rolling rank
gsiano 1470c7b
ENH: rolling rank
gsiano File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ import cython | |
from libc.math cimport round | ||
from libcpp.deque cimport deque | ||
|
||
from pandas._libs.algos cimport TiebreakEnumType | ||
|
||
import numpy as np | ||
|
||
cimport numpy as cnp | ||
|
@@ -50,6 +52,8 @@ cdef extern from "../src/skiplist.h": | |
double skiplist_get(skiplist_t*, int, int*) nogil | ||
int skiplist_insert(skiplist_t*, double) nogil | ||
int skiplist_remove(skiplist_t*, double) nogil | ||
int skiplist_rank(skiplist_t*, double) nogil | ||
int skiplist_min_rank(skiplist_t*, double) nogil | ||
|
||
cdef: | ||
float32_t MINfloat32 = np.NINF | ||
|
@@ -795,7 +799,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, | |
val = values[j] | ||
if notnan(val): | ||
nobs += 1 | ||
err = skiplist_insert(sl, val) != 1 | ||
err = skiplist_insert(sl, val) == -1 | ||
if err: | ||
break | ||
|
||
|
@@ -806,7 +810,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, | |
val = values[j] | ||
if notnan(val): | ||
nobs += 1 | ||
err = skiplist_insert(sl, val) != 1 | ||
err = skiplist_insert(sl, val) == -1 | ||
if err: | ||
break | ||
|
||
|
@@ -1139,6 +1143,122 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, | |
return output | ||
|
||
|
||
rolling_rank_tiebreakers = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possible to unify these with the same in algos.pyx? |
||
"average": TiebreakEnumType.TIEBREAK_AVERAGE, | ||
"min": TiebreakEnumType.TIEBREAK_MIN, | ||
"max": TiebreakEnumType.TIEBREAK_MAX, | ||
} | ||
|
||
|
||
def roll_rank(const float64_t[:] values, ndarray[int64_t] start, | ||
ndarray[int64_t] end, int64_t minp, bint percentile, | ||
str method, bint ascending) -> np.ndarray: | ||
""" | ||
O(N log(window)) implementation using skip list | ||
|
||
derived from roll_quantile | ||
""" | ||
cdef: | ||
Py_ssize_t i, j, s, e, N = len(values), idx | ||
float64_t rank_min = 0, rank = 0 | ||
int64_t nobs = 0, win | ||
float64_t val | ||
skiplist_t *skiplist | ||
float64_t[::1] output | ||
TiebreakEnumType rank_type | ||
|
||
try: | ||
rank_type = rolling_rank_tiebreakers[method] | ||
except KeyError: | ||
raise ValueError(f"Method '{method}' is not supported") | ||
|
||
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( | ||
start, end | ||
) | ||
# we use the Fixed/Variable Indexer here as the | ||
# actual skiplist ops outweigh any window computation costs | ||
output = np.empty(N, dtype=np.float64) | ||
|
||
win = (end - start).max() | ||
if win == 0: | ||
output[:] = NaN | ||
return np.asarray(output) | ||
skiplist = skiplist_init(<int>win) | ||
if skiplist == NULL: | ||
raise MemoryError("skiplist_init failed") | ||
|
||
with nogil: | ||
for i in range(N): | ||
s = start[i] | ||
e = end[i] | ||
|
||
if i == 0 or not is_monotonic_increasing_bounds: | ||
if not is_monotonic_increasing_bounds: | ||
nobs = 0 | ||
skiplist_destroy(skiplist) | ||
skiplist = skiplist_init(<int>win) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# setup | ||
for j in range(s, e): | ||
val = values[j] if ascending else -values[j] | ||
if notnan(val): | ||
nobs += 1 | ||
rank = skiplist_insert(skiplist, val) | ||
if rank == -1: | ||
raise MemoryError("skiplist_insert failed") | ||
if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: | ||
# The average rank of `val` is the sum of the ranks of all | ||
# instances of `val` in the skip list divided by the number | ||
# of instances. The sum of consecutive integers from 1 to N | ||
# is N * (N + 1) / 2. | ||
# The sum of the ranks is the sum of integers from the | ||
# lowest rank to the highest rank, which is the sum of | ||
# integers from 1 to the highest rank minus the sum of | ||
# integers from 1 to one less than the lowest rank. | ||
rank_min = skiplist_min_rank(skiplist, val) | ||
rank = (((rank * (rank + 1) / 2) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
- ((rank_min - 1) * rank_min / 2)) | ||
/ (rank - rank_min + 1)) | ||
elif rank_type == TiebreakEnumType.TIEBREAK_MIN: | ||
rank = skiplist_min_rank(skiplist, val) | ||
else: | ||
rank = NaN | ||
|
||
else: | ||
# calculate deletes | ||
for j in range(start[i - 1], s): | ||
val = values[j] if ascending else -values[j] | ||
if notnan(val): | ||
skiplist_remove(skiplist, val) | ||
nobs -= 1 | ||
|
||
# calculate adds | ||
for j in range(end[i - 1], e): | ||
val = values[j] if ascending else -values[j] | ||
if notnan(val): | ||
nobs += 1 | ||
rank = skiplist_insert(skiplist, val) | ||
if rank == -1: | ||
raise MemoryError("skiplist_insert failed") | ||
if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE: | ||
rank_min = skiplist_min_rank(skiplist, val) | ||
rank = (((rank * (rank + 1) / 2) | ||
- ((rank_min - 1) * rank_min / 2)) | ||
/ (rank - rank_min + 1)) | ||
elif rank_type == TiebreakEnumType.TIEBREAK_MIN: | ||
rank = skiplist_min_rank(skiplist, val) | ||
else: | ||
rank = NaN | ||
if nobs >= minp: | ||
output[i] = rank / nobs if percentile else rank | ||
else: | ||
output[i] = NaN | ||
|
||
skiplist_destroy(skiplist) | ||
|
||
return np.asarray(output) | ||
|
||
|
||
def roll_apply(object obj, | ||
ndarray[int64_t] start, ndarray[int64_t] end, | ||
int64_t minp, | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.