-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
BUG: Preserve key order when using loc on MultiIndex DataFrame #28933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2753c79
af5c678
dd53a91
97b952d
3fa3c6d
8b5ec48
fb33627
2c6195f
d911110
81edea5
f1407f1
4c667a7
ee89a33
c18d60d
1717a14
2ab8e30
edde717
82e5109
3367109
025d304
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Test if the result of the loc function need to be sorted to return them in the same order as the indexer. If not, skip the sort to improve performance.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3028,6 +3028,7 @@ def _update_indexer(idxr, indexer=indexer): | |
return indexer | ||
return indexer & idxr | ||
|
||
need_sort = False | ||
for i, k in enumerate(seq): | ||
|
||
if com.is_bool_indexer(k): | ||
|
@@ -3039,12 +3040,21 @@ def _update_indexer(idxr, indexer=indexer): | |
# a collection of labels to include from this level (these | ||
# are or'd) | ||
indexers = None | ||
start_pos = 0 | ||
for x in k: | ||
try: | ||
idxrs = _convert_to_indexer( | ||
self._get_level_indexer(x, level=i, indexer=indexer) | ||
) | ||
indexers = idxrs if indexers is None else indexers | idxrs | ||
|
||
if not need_sort: | ||
next_key_pos = self.levels[i].get_loc(x) | ||
if next_key_pos < start_pos: | ||
need_sort = True | ||
else: | ||
start_pos = next_key_pos | ||
|
||
except KeyError: | ||
|
||
# ignore not founds | ||
|
@@ -3082,26 +3092,28 @@ def _update_indexer(idxr, indexer=indexer): | |
if indexer is None: | ||
return Int64Index([])._ndarray_values | ||
|
||
# Generate tuples of keys by wich to order the results | ||
keys = tuple() | ||
for i, k in enumerate(seq): | ||
if com.is_bool_indexer(k): | ||
new_order = np.arange(n)[indexer] | ||
elif is_list_like(k): | ||
# Generate a map with all level codes as sorted initially | ||
key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( | ||
self.levels[i] | ||
) | ||
# Set order as given in the indexer list | ||
for p, e in enumerate(k): | ||
if e in self.levels[i]: | ||
key_order_map[self.levels[i].get_loc(e)] = p | ||
new_order = key_order_map[self.codes[i][indexer]] | ||
else: | ||
# For all other case, use the same order as the level | ||
new_order = np.arange(n)[indexer] | ||
keys = (new_order,) + keys | ||
if len(keys) > 0: | ||
# Generate tuples of keys by which to order the results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is really complex and adding quite a bit of code. Please take another look to simplify greatly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure thing, will do. I did have a simpler solution, but the performance hit was really high. |
||
if need_sort: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just check is_lexsorted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not the same thing. The index may or may not be lexsorted, but what I want to know here is if the keys given to .loc are in the same order has the index (see line 3058), and if not, I reorder the result in indexer to have them in a order reflecting the given keys order. |
||
keys = tuple() | ||
for i, k in enumerate(seq): | ||
if com.is_bool_indexer(k): | ||
new_order = np.arange(n)[indexer] | ||
elif is_list_like(k): | ||
# Generate a map with all level codes as sorted initially | ||
key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( | ||
self.levels[i] | ||
) | ||
# Set order as given in the indexer list | ||
for p, e in enumerate(k): | ||
if e in self.levels[i]: | ||
key_order_map[self.levels[i].get_loc(e)] = p | ||
new_order = key_order_map[self.codes[i][indexer]] | ||
# Testing if the sort order of the result shoud be modified | ||
else: | ||
# For all other case, use the same order as the level | ||
new_order = np.arange(n)[indexer] | ||
keys = (new_order,) + keys | ||
|
||
ind = np.lexsort(keys) | ||
indexer = indexer[ind] | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.