-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
ENH: Support EAs in Series.unstack #23284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ced299f
3b63fcb
756dde9
90f84ef
942db1b
36a4450
ee330d6
2fcaf4d
4f46364
e9498a1
72b5a0d
f6b2050
4d679cb
ff7aba7
91587cb
49bdb50
cf8ed73
5902b5b
17d3002
a75806a
2397e89
8ed7c73
b23234c
29a6bb1
19b7cfa
254fe52
2d78d42
a9e6263
ca286f7
2f28638
967c674
f6aa4b9
32bc3de
56e5f2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# -*- coding: utf-8 -*- | ||
import copy | ||
import functools | ||
import warnings | ||
import inspect | ||
import re | ||
|
@@ -1434,7 +1434,7 @@ def equals(self, other): | |
return False | ||
return array_equivalent(self.values, other.values) | ||
|
||
def _unstack(self, unstacker_func, new_columns): | ||
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): | ||
"""Return a list of unstacked blocks of self | ||
|
||
Parameters | ||
|
@@ -1443,6 +1443,10 @@ def _unstack(self, unstacker_func, new_columns): | |
Partially applied unstacker. | ||
new_columns : Index | ||
All columns of the unstacked BlockManager. | ||
n_rows : int | ||
Only used in ExtensionBlock.unstack | ||
fill_value : int | ||
Only used in ExtensionBlock.unstack | ||
|
||
Returns | ||
------- | ||
|
@@ -1736,7 +1740,7 @@ def _slice(self, slicer): | |
def _try_cast_result(self, result, dtype=None): | ||
return result | ||
|
||
def _unstack(self, unstacker_func, new_columns): | ||
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): | ||
"""Return a list of unstacked blocks of self | ||
|
||
Parameters | ||
|
@@ -1745,6 +1749,10 @@ def _unstack(self, unstacker_func, new_columns): | |
Partially applied unstacker. | ||
new_columns : Index | ||
All columns of the unstacked BlockManager. | ||
n_rows : int | ||
Only used in ExtensionBlock.unstack | ||
fill_value : int | ||
Only used in ExtensionBlock.unstack | ||
|
||
Returns | ||
------- | ||
|
@@ -1756,18 +1764,28 @@ def _unstack(self, unstacker_func, new_columns): | |
# NonConsolidatable blocks can have a single item only, so we return | ||
# one block per item | ||
unstacker = unstacker_func(self.values.T) | ||
new_items = unstacker.get_new_columns() | ||
new_placement = new_columns.get_indexer(new_items) | ||
new_values, mask = unstacker.get_new_values() | ||
|
||
mask = mask.any(0) | ||
new_placement, new_values, mask = self._get_unstack_items( | ||
unstacker, new_columns | ||
) | ||
|
||
new_values = new_values.T[mask] | ||
new_placement = new_placement[mask] | ||
|
||
blocks = [self.make_block_same_class(vals, [place]) | ||
for vals, place in zip(new_values, new_placement)] | ||
return blocks, mask | ||
|
||
@staticmethod | ||
def _get_unstack_items(unstacker, new_columns): | ||
# shared with ExtensionBlock | ||
new_items = unstacker.get_new_columns() | ||
new_placement = new_columns.get_indexer(new_items) | ||
new_values, mask = unstacker.get_new_values() | ||
|
||
mask = mask.any(0) | ||
return new_placement, new_values, mask | ||
|
||
|
||
class ExtensionBlock(NonConsolidatableMixIn, Block): | ||
"""Block for holding extension types. | ||
|
@@ -1955,32 +1973,21 @@ def shift(self, periods, axis=0): | |
def _ftype(self): | ||
return getattr(self.values, '_pandas_ftype', Block._ftype) | ||
|
||
def _unstack(self, unstacker_func, new_columns): | ||
# I wonder if this is supported | ||
fill_value = unstacker_func.keywords['fill_value'] | ||
unstacker_func = copy.deepcopy(unstacker_func) | ||
unstacker_func.keywords['fill_value'] = -1 | ||
|
||
# just get the index. Can maybe avoid this? | ||
dummy_unstacker = unstacker_func(np.empty((0, 0))) | ||
|
||
dummy_arr = np.arange(len(dummy_unstacker.index)) | ||
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): | ||
dummy_arr = np.arange(n_rows) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string (or does it share)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The inherited one OK. I'm going to leave a comment explaining why we override. |
||
dummy_unstacker = functools.partial(unstacker_func, fill_value=-1) | ||
unstacker = dummy_unstacker(dummy_arr) | ||
|
||
unstacker = unstacker_func(dummy_arr) | ||
new_items = unstacker.get_new_columns() | ||
new_placement = new_columns.get_indexer(new_items) | ||
new_values, mask = unstacker.get_new_values() | ||
mask = mask.any(0) | ||
|
||
new_values = [ | ||
self.values.take(indices, allow_fill=True, | ||
fill_value=fill_value) | ||
for indices in new_values.T | ||
] | ||
new_placement, new_values, mask = self._get_unstack_items( | ||
unstacker, new_columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would not this generically work for all unstacking/ (e.g. what if you make this the super method)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is slower in general. It's necessarily slower for NumPy types, since you have to do the reshaping / unstack on the ndarray of positions anyway. The hope is that the cost of the additional Working on benchmarks now. |
||
) | ||
|
||
blocks = [ | ||
self.make_block_same_class(vals, [place]) | ||
for vals, place in zip(new_values, new_placement) | ||
self.make_block_same_class( | ||
self.values.take(indices, allow_fill=True, | ||
fill_value=fill_value), | ||
[place]) | ||
for indices, place in zip(new_values.T, new_placement) | ||
] | ||
return blocks, mask | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.