-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REGR: fix op(frame, frame2) with reindex #31679
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4afc6d7
38974f3
1eeac12
0012d04
06d206c
ce23635
1eba891
abc73f8
409ffd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
""" | ||
import datetime | ||
import operator | ||
from typing import Optional, Set, Tuple, Union | ||
from typing import TYPE_CHECKING, Optional, Set, Tuple, Union | ||
|
||
import numpy as np | ||
|
||
|
@@ -61,6 +61,9 @@ | |
rxor, | ||
) | ||
|
||
if TYPE_CHECKING: | ||
from pandas import DataFrame # noqa:F401 | ||
|
||
# ----------------------------------------------------------------------------- | ||
# constants | ||
ARITHMETIC_BINOPS: Set[str] = { | ||
|
@@ -703,6 +706,58 @@ def to_series(right): | |
return left, right | ||
|
||
|
||
def _should_reindex_frame_op( | ||
left: "DataFrame", right, axis, default_axis: int, fill_value, level | ||
) -> bool: | ||
""" | ||
Check if this is an operation between DataFrames that will need to reindex. | ||
""" | ||
assert isinstance(left, ABCDataFrame) | ||
|
||
if not isinstance(right, ABCDataFrame): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return False | ||
|
||
if fill_value is None and level is None and axis is default_axis: | ||
# TODO: any other cases we should handle here? | ||
cols = left.columns.intersection(right.columns) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't this just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yah, but for certain types of indexes (e.g. RangeIndex) intersection is optimized. Usually won't be enough to matter, but still There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, fair point. yeah we likley just need some more asv's around this (followup) |
||
if not (cols.equals(left.columns) and cols.equals(right.columns)): | ||
return True | ||
|
||
return False | ||
|
||
|
||
def _frame_arith_method_with_reindex( | ||
left: "DataFrame", right: "DataFrame", op | ||
) -> "DataFrame": | ||
""" | ||
For DataFrame-with-DataFrame operations that require reindexing, | ||
operate only on shared columns, then reindex. | ||
|
||
Parameters | ||
---------- | ||
left : DataFrame | ||
right : DataFrame | ||
op : binary operator | ||
|
||
Returns | ||
------- | ||
DataFrame | ||
""" | ||
# GH#31623, only operate on shared columns | ||
cols = left.columns.intersection(right.columns) | ||
|
||
new_left = left[cols] | ||
new_right = right[cols] | ||
result = op(new_left, new_right) | ||
|
||
# Do the join on the columns instead of using _align_method_FRAME | ||
# to avoid constructing two potentially large/sparse DataFrames | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a nice side-effect of this bugfix. |
||
join_columns, _, _ = left.columns.join( | ||
right.columns, how="outer", level=None, return_indexers=True | ||
) | ||
return result.reindex(join_columns, axis=1) | ||
|
||
|
||
def _arith_method_FRAME(cls, op, special): | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
|
@@ -720,6 +775,9 @@ def _arith_method_FRAME(cls, op, special): | |
@Appender(doc) | ||
def f(self, other, axis=default_axis, level=None, fill_value=None): | ||
|
||
if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level): | ||
return _frame_arith_method_with_reindex(self, other, op) | ||
|
||
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) | ||
|
||
if isinstance(other, ABCDataFrame): | ||
|
Uh oh!
There was an error while loading. Please reload this page.