diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a0205a8d64cb7..6fe15133914da 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -202,6 +202,7 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2ec35889d6a7a..1eb87aa99fd1e 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -710,7 +710,8 @@ def __eq__(self, other): # None should match any subtype return True else: - return self.subtype == other.subtype + from pandas.core.dtypes.common import is_dtype_equal + return is_dtype_equal(self.subtype, other.subtype) @classmethod def is_dtype(cls, dtype): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index baf80173d7362..58b1bdb3f55ea 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -20,7 +20,8 @@ is_scalar, is_float, is_number, - is_integer) + is_integer, + pandas_dtype) from pandas.core.indexes.base import ( Index, _ensure_index, default_pprint, _index_shared_docs) @@ -699,8 +700,16 @@ def copy(self, deep=False, name=None): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): - if is_interval_dtype(dtype): - return self.copy() if copy else self + dtype = pandas_dtype(dtype) + if is_interval_dtype(dtype) and dtype != self.dtype: + try: + new_left = self.left.astype(dtype.subtype) + new_right = self.right.astype(dtype.subtype) + except TypeError: + msg = ('Cannot convert {dtype} to {new_dtype}; subtypes are ' + 'incompatible') + raise TypeError(msg.format(dtype=self.dtype, new_dtype=dtype)) + return self._shallow_copy(new_left, new_right) return super(IntervalIndex, self).astype(dtype, copy=copy) @cache_readonly diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 692fb3271cfda..d800a7b92b559 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -534,6 +534,12 @@ def test_equality(self): assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')) + # invalid subtype comparisons do not raise when directly compared + dtype1 = IntervalDtype('float64') + dtype2 = IntervalDtype('datetime64[ns, US/Eastern]') + assert dtype1 != dtype2 + assert dtype2 != dtype1 + @pytest.mark.parametrize('subtype', [ None, 'interval', 'Interval', 'int64', 'uint64', 'float64', 'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')]) diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 0000000000000..b3a4bfa878c3f --- /dev/null +++ b/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,209 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import ( + Index, + IntervalIndex, + interval_range, + CategoricalIndex, + Timestamp, + Timedelta, + NaT) +from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype +import pandas.util.testing as tm + + +class Base(object): + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype('interval') + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype='object') + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype('category') + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex( + index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('dtype', [ + 'int64', 'uint64', 'float64', 'complex128', 'period[M]', + 'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[ns]', + 'datetime64[ns, US/Eastern]']) + def test_astype_cannot_cast(self, index, dtype): + msg = 'Cannot cast IntervalIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = 'data type "fake_dtype" not understood' + with tm.assert_raises_regex(TypeError, msg): + index.astype('fake_dtype') + + +class TestIntSubtype(Base): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype='int64')), + IntervalIndex.from_breaks( + np.arange(100, dtype='uint64'), closed='left'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', [ + 'float64', 'datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('subtype_start, subtype_end', [ + ('int64', 'uint64'), ('uint64', 'int64')]) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + +class TestFloatSubtype(Base): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed='neither'), + IntervalIndex.from_arrays([-1.5, np.nan, 0., 0., 1.5], + [-0.5, np.nan, 1., 1., 3.], + closed='both'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = 'Cannot convert NA to integer' + with tm.assert_raises_regex(ValueError, msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + # float64 -> integer-like fails with non-integer valued floats + index = interval_range(0.0, 10.0, freq=0.25) + dtype = IntervalDtype('int64') + with pytest.raises(ValueError): + index.astype(dtype) + + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + @pytest.mark.parametrize('subtype', ['datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype) + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(Base): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp('2018-01-01'), periods=10, closed='neither'), + interval_range(Timestamp('2018-01-01'), periods=10).insert(2, NaT), + interval_range(Timestamp('2018-01-01', tz='US/Eastern'), periods=10), + interval_range(Timedelta('0 days'), periods=10, closed='both'), + interval_range(Timedelta('0 days'), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype('float64') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype('timedelta64[ns]') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + + index = interval_range(Timestamp('2018-01-01'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype('datetime64[ns]') + index = interval_range(Timedelta('0 days'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index b6d49c9e7ba19..9895ee06a22c0 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -415,26 +415,6 @@ def test_equals(self, closed): np.arange(5), closed=other_closed) assert not expected.equals(expected_other_closed) - def test_astype(self, closed): - idx = self.create_index(closed=closed) - result = idx.astype(object) - tm.assert_index_equal(result, Index(idx.values, dtype='object')) - assert not idx.equals(result) - assert idx.equals(IntervalIndex.from_intervals(result)) - - result = idx.astype('interval') - tm.assert_index_equal(result, idx) - assert result.equals(idx) - - @pytest.mark.parametrize('dtype', [ - np.int64, np.float64, 'period[M]', 'timedelta64', 'datetime64[ns]', - 'datetime64[ns, US/Eastern]']) - def test_astype_errors(self, closed, dtype): - idx = self.create_index(closed=closed) - msg = 'Cannot cast IntervalIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): - idx.astype(dtype) - @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed)