-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
REGR: preserve reindexed array object (instead of creating new array) for concat with all-NA array #47762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REGR: preserve reindexed array object (instead of creating new array) for concat with all-NA array #47762
Changes from 1 commit
e89624f
7bc3b64
a5ae544
40e1009
e5e19b8
c42f54e
d76aa6f
28d72a3
3679d63
2e0e0a8
12d8480
83b1ea9
2da56d5
bfba49c
7f25c24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from pandas.tests.extension.array_with_attr.array import ( | ||
FloatAttrArray, | ||
FloatAttrDtype, | ||
make_data, | ||
) | ||
|
||
__all__ = ["FloatAttrArray", "FloatAttrDtype", "make_data"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
""" | ||
Test extension array that has custom attribute information (not stored on the dtype). | ||
|
||
""" | ||
from __future__ import annotations | ||
|
||
import numbers | ||
|
||
import numpy as np | ||
|
||
from pandas._typing import type_t | ||
|
||
from pandas.core.dtypes.base import ExtensionDtype | ||
|
||
import pandas as pd | ||
from pandas.core.arrays import ExtensionArray | ||
|
||
|
||
class FloatAttrDtype(ExtensionDtype): | ||
type = int | ||
name = "int_attr" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these should be float? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't matter for the test, but yet that's less confusing ;) |
||
na_value = np.nan | ||
|
||
@classmethod | ||
def construct_array_type(cls) -> type_t[FloatAttrArray]: | ||
""" | ||
Return the array type associated with this dtype. | ||
|
||
Returns | ||
------- | ||
type | ||
""" | ||
return FloatAttrArray | ||
|
||
|
||
class FloatAttrArray(ExtensionArray): | ||
dtype = FloatAttrDtype() | ||
__array_priority__ = 1000 | ||
|
||
def __init__(self, values, attr=None) -> None: | ||
if not isinstance(values, np.ndarray): | ||
raise TypeError("Need to pass a numpy array of float64 dtype as values") | ||
if not values.dtype == "float64": | ||
raise TypeError("Need to pass a numpy array of float64 dtype as values") | ||
self.data = values | ||
self.attr = attr | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
data = np.array(scalars, dtype="float64", copy=copy) | ||
return cls(data) | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, numbers.Integral): | ||
return self.data[item] | ||
else: | ||
# slice, list-like, mask | ||
item = pd.api.indexers.check_array_indexer(self, item) | ||
return type(self)(self.data[item], self.attr) | ||
|
||
def __len__(self) -> int: | ||
return len(self.data) | ||
|
||
def isna(self): | ||
return np.isnan(self.data) | ||
|
||
def take(self, indexer, allow_fill=False, fill_value=None): | ||
from pandas.api.extensions import take | ||
|
||
data = self.data | ||
if allow_fill and fill_value is None: | ||
fill_value = self.dtype.na_value | ||
|
||
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) | ||
return type(self)(result, self.attr) | ||
|
||
def copy(self): | ||
return type(self)(self.data.copy(), self.attr) | ||
simonjayhawkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
data = np.concatenate([x.data for x in to_concat]) | ||
attr = to_concat[0].attr if len(to_concat) else None | ||
return cls(data, attr) | ||
|
||
|
||
def make_data(): | ||
return np.arange(100, dtype="float64") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
import pandas as pd | ||
import pandas._testing as tm | ||
from pandas.tests.extension.array_with_attr import ( | ||
FloatAttrArray, | ||
FloatAttrDtype, | ||
make_data, | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def dtype(): | ||
return FloatAttrDtype() | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
return FloatAttrArray(make_data()) | ||
|
||
|
||
def test_concat_with_all_na(data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't use the fixture |
||
# https://github.com/pandas-dev/pandas/issues/28840 | ||
# | ||
arr = FloatAttrArray(np.array([np.nan, np.nan], dtype="float64"), attr="test") | ||
df1 = pd.DataFrame({"col": arr, "key": [0, 1]}) | ||
df2 = pd.DataFrame({"key": [0, 1], "col2": [1, 2]}) | ||
|
||
result = pd.merge(df1, df2, on="key") | ||
expected = pd.DataFrame({"col": arr, "key": [0, 1], "col2": [1, 2]}) | ||
tm.assert_frame_equal(result, expected) | ||
assert result["col"].array.attr == "test" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this didn't need to change? (just a leftover from the code move?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, good catch