Skip to content

[Python] test_extension_to_pandas_storage_type fails with NotImplementedError: extension<test.period<PeriodTypeWithToPandasDtype>> #35321

@raulcd

Description

@raulcd

Describe the bug, including details regarding any error messages, version, and platform.

The pandas nightly tests and release verification jobs are failing:

Due to the following test failing:

test_extension_to_pandas_storage_type[registered_period_type0]
test_extension_to_pandas_storage_type[registered_period_type1]
test_extension_to_pandas_storage_type[registered_period_type2]

This started happening since the new pandas 2.0.1 was released: https://pypi.org/project/pandas/#history

The full error:

=================================== FAILURES ===================================
________ test_extension_to_pandas_storage_type[registered_period_type0] ________

registered_period_type = (PeriodType(DataType(int64)), <class 'pyarrow.lib.ExtensionArray'>)

    @pytest.mark.pandas
    def test_extension_to_pandas_storage_type(registered_period_type):
        period_type, _ = registered_period_type
        np_arr = np.array([1, 2, 3, 4], dtype='i8')
        storage = pa.array([1, 2, 3, 4], pa.int64())
        arr = pa.ExtensionArray.from_storage(period_type, storage)
    
        if isinstance(period_type, PeriodTypeWithToPandasDtype):
            pandas_dtype = period_type.to_pandas_dtype()
        else:
            pandas_dtype = np_arr.dtype
    
        # Test arrays
        result = arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test chunked arrays
        chunked_arr = pa.chunked_array([arr])
        result = chunked_arr.to_numpy()
        assert result.dtype == np_arr.dtype
    
        result = chunked_arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test Table.to_pandas
        data = [
            pa.array([1, 2, 3, 4]),
            pa.array(['foo', 'bar', None, None]),
            pa.array([True, None, True, False]),
            arr
        ]
        my_schema = pa.schema([('f0', pa.int8()),
                               ('f1', pa.string()),
                               ('f2', pa.bool_()),
                               ('ext', period_type)])
        table = pa.Table.from_arrays(data, schema=my_schema)
        result = table.to_pandas()
        assert result["ext"].dtype == pandas_dtype
    
        import pandas as pd
        if Version(pd.__version__) > Version("2.0.0"):
    
            # Check the usage of types_mapper
>           result = table.to_pandas(types_mapper=pd.ArrowDtype)

opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas
    ???
pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas
    ???
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager
    blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp>
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block
    block = _int.make_block(pd_ext_arr, placement=placement)
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block
    if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype
    return arr_or_dtype.type is Period
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = extension<test.period<PeriodType>>[pyarrow]

    @property
    def type(self):
        """
        Returns associated scalar type.
        """
        pa_type = self.pyarrow_dtype
        if pa.types.is_integer(pa_type):
            return int
        elif pa.types.is_floating(pa_type):
            return float
        elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
            return str
        elif (
            pa.types.is_binary(pa_type)
            or pa.types.is_fixed_size_binary(pa_type)
            or pa.types.is_large_binary(pa_type)
        ):
            return bytes
        elif pa.types.is_boolean(pa_type):
            return bool
        elif pa.types.is_duration(pa_type):
            if pa_type.unit == "ns":
                return Timedelta
            else:
                return timedelta
        elif pa.types.is_timestamp(pa_type):
            if pa_type.unit == "ns":
                return Timestamp
            else:
                return datetime
        elif pa.types.is_date(pa_type):
            return date
        elif pa.types.is_time(pa_type):
            return time
        elif pa.types.is_decimal(pa_type):
            return Decimal
        elif pa.types.is_dictionary(pa_type):
            # TODO: Potentially change this & CategoricalDtype.type to
            #  something more representative of the scalar
            return CategoricalDtypeType
        elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
            return list
        elif pa.types.is_map(pa_type):
            return dict
        elif pa.types.is_null(pa_type):
            # TODO: None? pd.NA? pa.null?
            return type(pa_type)
        else:
>           raise NotImplementedError(pa_type)
E           NotImplementedError: extension<test.period<PeriodType>>

opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError
________ test_extension_to_pandas_storage_type[registered_period_type1] ________

registered_period_type = (PeriodTypeWithClass(DataType(int64)), <class 'pyarrow.tests.test_extension_type.PeriodArray'>)

    @pytest.mark.pandas
    def test_extension_to_pandas_storage_type(registered_period_type):
        period_type, _ = registered_period_type
        np_arr = np.array([1, 2, 3, 4], dtype='i8')
        storage = pa.array([1, 2, 3, 4], pa.int64())
        arr = pa.ExtensionArray.from_storage(period_type, storage)
    
        if isinstance(period_type, PeriodTypeWithToPandasDtype):
            pandas_dtype = period_type.to_pandas_dtype()
        else:
            pandas_dtype = np_arr.dtype
    
        # Test arrays
        result = arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test chunked arrays
        chunked_arr = pa.chunked_array([arr])
        result = chunked_arr.to_numpy()
        assert result.dtype == np_arr.dtype
    
        result = chunked_arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test Table.to_pandas
        data = [
            pa.array([1, 2, 3, 4]),
            pa.array(['foo', 'bar', None, None]),
            pa.array([True, None, True, False]),
            arr
        ]
        my_schema = pa.schema([('f0', pa.int8()),
                               ('f1', pa.string()),
                               ('f2', pa.bool_()),
                               ('ext', period_type)])
        table = pa.Table.from_arrays(data, schema=my_schema)
        result = table.to_pandas()
        assert result["ext"].dtype == pandas_dtype
    
        import pandas as pd
        if Version(pd.__version__) > Version("2.0.0"):
    
            # Check the usage of types_mapper
>           result = table.to_pandas(types_mapper=pd.ArrowDtype)

opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas
    ???
pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas
    ???
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager
    blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp>
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block
    block = _int.make_block(pd_ext_arr, placement=placement)
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block
    if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype
    return arr_or_dtype.type is Period
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = extension<test.period<PeriodTypeWithClass>>[pyarrow]

    @property
    def type(self):
        """
        Returns associated scalar type.
        """
        pa_type = self.pyarrow_dtype
        if pa.types.is_integer(pa_type):
            return int
        elif pa.types.is_floating(pa_type):
            return float
        elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
            return str
        elif (
            pa.types.is_binary(pa_type)
            or pa.types.is_fixed_size_binary(pa_type)
            or pa.types.is_large_binary(pa_type)
        ):
            return bytes
        elif pa.types.is_boolean(pa_type):
            return bool
        elif pa.types.is_duration(pa_type):
            if pa_type.unit == "ns":
                return Timedelta
            else:
                return timedelta
        elif pa.types.is_timestamp(pa_type):
            if pa_type.unit == "ns":
                return Timestamp
            else:
                return datetime
        elif pa.types.is_date(pa_type):
            return date
        elif pa.types.is_time(pa_type):
            return time
        elif pa.types.is_decimal(pa_type):
            return Decimal
        elif pa.types.is_dictionary(pa_type):
            # TODO: Potentially change this & CategoricalDtype.type to
            #  something more representative of the scalar
            return CategoricalDtypeType
        elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
            return list
        elif pa.types.is_map(pa_type):
            return dict
        elif pa.types.is_null(pa_type):
            # TODO: None? pd.NA? pa.null?
            return type(pa_type)
        else:
>           raise NotImplementedError(pa_type)
E           NotImplementedError: extension<test.period<PeriodTypeWithClass>>

opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError
________ test_extension_to_pandas_storage_type[registered_period_type2] ________

registered_period_type = (PeriodTypeWithToPandasDtype(DataType(int64)), <class 'pyarrow.lib.ExtensionArray'>)

    @pytest.mark.pandas
    def test_extension_to_pandas_storage_type(registered_period_type):
        period_type, _ = registered_period_type
        np_arr = np.array([1, 2, 3, 4], dtype='i8')
        storage = pa.array([1, 2, 3, 4], pa.int64())
        arr = pa.ExtensionArray.from_storage(period_type, storage)
    
        if isinstance(period_type, PeriodTypeWithToPandasDtype):
            pandas_dtype = period_type.to_pandas_dtype()
        else:
            pandas_dtype = np_arr.dtype
    
        # Test arrays
        result = arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test chunked arrays
        chunked_arr = pa.chunked_array([arr])
        result = chunked_arr.to_numpy()
        assert result.dtype == np_arr.dtype
    
        result = chunked_arr.to_pandas()
        assert result.dtype == pandas_dtype
    
        # Test Table.to_pandas
        data = [
            pa.array([1, 2, 3, 4]),
            pa.array(['foo', 'bar', None, None]),
            pa.array([True, None, True, False]),
            arr
        ]
        my_schema = pa.schema([('f0', pa.int8()),
                               ('f1', pa.string()),
                               ('f2', pa.bool_()),
                               ('ext', period_type)])
        table = pa.Table.from_arrays(data, schema=my_schema)
        result = table.to_pandas()
        assert result["ext"].dtype == pandas_dtype
    
        import pandas as pd
        if Version(pd.__version__) > Version("2.0.0"):
    
            # Check the usage of types_mapper
>           result = table.to_pandas(types_mapper=pd.ArrowDtype)

opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas
    ???
pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas
    ???
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager
    blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp>
    return [_reconstruct_block(item, columns, extension_columns)
opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block
    block = _int.make_block(pd_ext_arr, placement=placement)
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block
    if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype
    return arr_or_dtype.type is Period
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = extension<test.period<PeriodTypeWithToPandasDtype>>[pyarrow]

    @property
    def type(self):
        """
        Returns associated scalar type.
        """
        pa_type = self.pyarrow_dtype
        if pa.types.is_integer(pa_type):
            return int
        elif pa.types.is_floating(pa_type):
            return float
        elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
            return str
        elif (
            pa.types.is_binary(pa_type)
            or pa.types.is_fixed_size_binary(pa_type)
            or pa.types.is_large_binary(pa_type)
        ):
            return bytes
        elif pa.types.is_boolean(pa_type):
            return bool
        elif pa.types.is_duration(pa_type):
            if pa_type.unit == "ns":
                return Timedelta
            else:
                return timedelta
        elif pa.types.is_timestamp(pa_type):
            if pa_type.unit == "ns":
                return Timestamp
            else:
                return datetime
        elif pa.types.is_date(pa_type):
            return date
        elif pa.types.is_time(pa_type):
            return time
        elif pa.types.is_decimal(pa_type):
            return Decimal
        elif pa.types.is_dictionary(pa_type):
            # TODO: Potentially change this & CategoricalDtype.type to
            #  something more representative of the scalar
            return CategoricalDtypeType
        elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
            return list
        elif pa.types.is_map(pa_type):
            return dict
        elif pa.types.is_null(pa_type):
            # TODO: None? pd.NA? pa.null?
            return type(pa_type)
        else:
>           raise NotImplementedError(pa_type)
E           NotImplementedError: extension<test.period<PeriodTypeWithToPandasDtype>>

opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError

Component(s)

Python

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions