Description
from pandas.api.extensions import ExtensionDtype,ExtensionArray, ExtensionScalarOpsMixin
class FP:
def __add__(self, other):
return self + other
pass
@pd.api.extensions.register_extension_dtype
class FPType(ExtensionDtype):
name = 'fp'
type = FP
@classmethod
def construct_from_string(cls, string):
if string == cls.name:
return cls()
else:
raise TypeError("Cannot construct a '{}' from "
"'{}'".format(cls, string))
# collections.abc.Sequence -> data is sequence, _reduce is possible
class FPArray(ExtensionArray,ExtensionScalarOpsMixin,collections.abc.Sequence):
_dtype = FPType()
@property
def dtype(self):
return self._dtype
def __init__(self, values):
values = np.array(values) # TODO: avoid potential copy
self.data = values
def __len__(self):
return len(self.data)
def __getitem__(self, *args):
result = operator.getitem(self.data, *args)
return result
def take(self,indexer):
return self.data.take(indexer)
def __contains__(self,item):
return item in self.data
def __iter__(self):
for i in self.data:
yield i
def __reversed__(self):
return reversed(self.data)
def _reduce(self, name, skipna=True, **kwargs):
return sum(self.data+100)
The ExtensionArray array itself is just for illustration and is not the problem, the problematic part is
arr = FPArray([FP(),FP(),FP(),FP()])
df = pd.DataFrame(arr )
when it reached line 444 of pandas/core/frame.py (https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L444)
if not isinstance(data, abc.Sequence):
data = list(data)
where data
is referring to my FPArray, if data is not abc.Sequence, my ExtensionArray will be casted to a list.
Having my ExtensionArray casted to a list mean my ExtensionArray will eventually casted back to a normal Pandas series.
The Solution should be having ExtensionArray
to be a subclass of abc.Sequence
.
INSTALLED VERSIONS
commit : None
python : 3.7.4.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 0.25.1
numpy : 1.17.2
pytz : 2019.2
dateutil : 2.8.0
pip : 19.2.3
setuptools : 41.2.0
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : None
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None