Skip to content

Commit

Permalink
ENH: Add IntervalDtype support to IntervalIndex.astype (pandas-dev#19231
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jschendel authored and jreback committed Jan 14, 2018
1 parent 53be520 commit 787ab55
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 24 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ Other Enhancements
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)

.. _whatsnew_0230.api_breaking:

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,8 @@ def __eq__(self, other):
# None should match any subtype
return True
else:
return self.subtype == other.subtype
from pandas.core.dtypes.common import is_dtype_equal
return is_dtype_equal(self.subtype, other.subtype)

@classmethod
def is_dtype(cls, dtype):
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
is_scalar,
is_float,
is_number,
is_integer)
is_integer,
pandas_dtype)
from pandas.core.indexes.base import (
Index, _ensure_index,
default_pprint, _index_shared_docs)
Expand Down Expand Up @@ -699,8 +700,16 @@ def copy(self, deep=False, name=None):

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_interval_dtype(dtype):
return self.copy() if copy else self
dtype = pandas_dtype(dtype)
if is_interval_dtype(dtype) and dtype != self.dtype:
try:
new_left = self.left.astype(dtype.subtype)
new_right = self.right.astype(dtype.subtype)
except TypeError:
msg = ('Cannot convert {dtype} to {new_dtype}; subtypes are '
'incompatible')
raise TypeError(msg.format(dtype=self.dtype, new_dtype=dtype))
return self._shallow_copy(new_left, new_right)
return super(IntervalIndex, self).astype(dtype, copy=copy)

@cache_readonly
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,12 @@ def test_equality(self):
assert not is_dtype_equal(IntervalDtype('int64'),
IntervalDtype('float64'))

# invalid subtype comparisons do not raise when directly compared
dtype1 = IntervalDtype('float64')
dtype2 = IntervalDtype('datetime64[ns, US/Eastern]')
assert dtype1 != dtype2
assert dtype2 != dtype1

@pytest.mark.parametrize('subtype', [
None, 'interval', 'Interval', 'int64', 'uint64', 'float64',
'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')])
Expand Down
209 changes: 209 additions & 0 deletions pandas/tests/indexes/interval/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
from __future__ import division

import pytest
import numpy as np
from pandas import (
Index,
IntervalIndex,
interval_range,
CategoricalIndex,
Timestamp,
Timedelta,
NaT)
from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype
import pandas.util.testing as tm


class Base(object):
"""Tests common to IntervalIndex with any subtype"""

def test_astype_idempotent(self, index):
result = index.astype('interval')
tm.assert_index_equal(result, index)

result = index.astype(index.dtype)
tm.assert_index_equal(result, index)

def test_astype_object(self, index):
result = index.astype(object)
expected = Index(index.values, dtype='object')
tm.assert_index_equal(result, expected)
assert not result.equals(index)

def test_astype_category(self, index):
result = index.astype('category')
expected = CategoricalIndex(index.values)
tm.assert_index_equal(result, expected)

result = index.astype(CategoricalDtype())
tm.assert_index_equal(result, expected)

# non-default params
categories = index.dropna().unique().values[:-1]
dtype = CategoricalDtype(categories=categories, ordered=True)
result = index.astype(dtype)
expected = CategoricalIndex(
index.values, categories=categories, ordered=True)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('dtype', [
'int64', 'uint64', 'float64', 'complex128', 'period[M]',
'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[ns]',
'datetime64[ns, US/Eastern]'])
def test_astype_cannot_cast(self, index, dtype):
msg = 'Cannot cast IntervalIndex to dtype'
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)

def test_astype_invalid_dtype(self, index):
msg = 'data type "fake_dtype" not understood'
with tm.assert_raises_regex(TypeError, msg):
index.astype('fake_dtype')


class TestIntSubtype(Base):
"""Tests specific to IntervalIndex with integer-like subtype"""

indexes = [
IntervalIndex.from_breaks(np.arange(-10, 11, dtype='int64')),
IntervalIndex.from_breaks(
np.arange(100, dtype='uint64'), closed='left'),
]

@pytest.fixture(params=indexes)
def index(self, request):
return request.param

@pytest.mark.parametrize('subtype', [
'float64', 'datetime64[ns]', 'timedelta64[ns]'])
def test_subtype_conversion(self, index, subtype):
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize('subtype_start, subtype_end', [
('int64', 'uint64'), ('uint64', 'int64')])
def test_subtype_integer(self, subtype_start, subtype_end):
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
dtype = IntervalDtype(subtype_end)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype_end),
index.right.astype(subtype_end),
closed=index.closed)
tm.assert_index_equal(result, expected)

@pytest.mark.xfail(reason='GH 15832')
def test_subtype_integer_errors(self):
# int64 -> uint64 fails with negative values
index = interval_range(-10, 10)
dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)


class TestFloatSubtype(Base):
"""Tests specific to IntervalIndex with float subtype"""

indexes = [
interval_range(-10.0, 10.0, closed='neither'),
IntervalIndex.from_arrays([-1.5, np.nan, 0., 0., 1.5],
[-0.5, np.nan, 1., 1., 3.],
closed='both'),
]

@pytest.fixture(params=indexes)
def index(self, request):
return request.param

@pytest.mark.parametrize('subtype', ['int64', 'uint64'])
def test_subtype_integer(self, subtype):
index = interval_range(0.0, 10.0)
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)

# raises with NA
msg = 'Cannot convert NA to integer'
with tm.assert_raises_regex(ValueError, msg):
index.insert(0, np.nan).astype(dtype)

@pytest.mark.xfail(reason='GH 15832')
def test_subtype_integer_errors(self):
# float64 -> uint64 fails with negative values
index = interval_range(-10.0, 10.0)
dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)

# float64 -> integer-like fails with non-integer valued floats
index = interval_range(0.0, 10.0, freq=0.25)
dtype = IntervalDtype('int64')
with pytest.raises(ValueError):
index.astype(dtype)

dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)

@pytest.mark.parametrize('subtype', ['datetime64[ns]', 'timedelta64[ns]'])
def test_subtype_datetimelike(self, index, subtype):
dtype = IntervalDtype(subtype)
msg = 'Cannot convert .* to .*; subtypes are incompatible'
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)


class TestDatetimelikeSubtype(Base):
"""Tests specific to IntervalIndex with datetime-like subtype"""

indexes = [
interval_range(Timestamp('2018-01-01'), periods=10, closed='neither'),
interval_range(Timestamp('2018-01-01'), periods=10).insert(2, NaT),
interval_range(Timestamp('2018-01-01', tz='US/Eastern'), periods=10),
interval_range(Timedelta('0 days'), periods=10, closed='both'),
interval_range(Timedelta('0 days'), periods=10).insert(2, NaT),
]

@pytest.fixture(params=indexes)
def index(self, request):
return request.param

@pytest.mark.parametrize('subtype', ['int64', 'uint64'])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)

def test_subtype_float(self, index):
dtype = IntervalDtype('float64')
msg = 'Cannot convert .* to .*; subtypes are incompatible'
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)

def test_subtype_datetimelike(self):
# datetime -> timedelta raises
dtype = IntervalDtype('timedelta64[ns]')
msg = 'Cannot convert .* to .*; subtypes are incompatible'

index = interval_range(Timestamp('2018-01-01'), periods=10)
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)

index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10)
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)

# timedelta -> datetime raises
dtype = IntervalDtype('datetime64[ns]')
index = interval_range(Timedelta('0 days'), periods=10)
with tm.assert_raises_regex(TypeError, msg):
index.astype(dtype)
20 changes: 0 additions & 20 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,26 +415,6 @@ def test_equals(self, closed):
np.arange(5), closed=other_closed)
assert not expected.equals(expected_other_closed)

def test_astype(self, closed):
idx = self.create_index(closed=closed)
result = idx.astype(object)
tm.assert_index_equal(result, Index(idx.values, dtype='object'))
assert not idx.equals(result)
assert idx.equals(IntervalIndex.from_intervals(result))

result = idx.astype('interval')
tm.assert_index_equal(result, idx)
assert result.equals(idx)

@pytest.mark.parametrize('dtype', [
np.int64, np.float64, 'period[M]', 'timedelta64', 'datetime64[ns]',
'datetime64[ns, US/Eastern]'])
def test_astype_errors(self, closed, dtype):
idx = self.create_index(closed=closed)
msg = 'Cannot cast IntervalIndex to dtype'
with tm.assert_raises_regex(TypeError, msg):
idx.astype(dtype)

@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
def test_where(self, closed, klass):
idx = self.create_index(closed=closed)
Expand Down

0 comments on commit 787ab55

Please sign in to comment.