From b64e9d5bf17888667bff8f37411d71fd45603891 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 28 May 2018 18:42:54 -0700 Subject: [PATCH] Implement integer array add/sub for datetimelike indexes (#19959) --- doc/source/whatsnew/v0.24.0.txt | 7 +++ pandas/core/indexes/datetimelike.py | 63 ++++++++++++++++--- .../indexes/datetimes/test_arithmetic.py | 48 +++++++++++++- .../tests/indexes/period/test_arithmetic.py | 25 ++++++++ .../indexes/timedeltas/test_arithmetic.py | 39 ++++++++++++ 5 files changed, 170 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 079766f0bc635..e931450cb5c01 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -22,6 +22,13 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0240.api.datetimelike: + +Datetimelike API Changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`) + .. _whatsnew_0240.api.other: Other API Changes diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 158b272384ae8..c7cb245263df8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -12,7 +12,7 @@ import numpy as np -from pandas._libs import lib, iNaT, NaT +from pandas._libs import lib, iNaT, NaT, Timedelta from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.timestamps import round_ns @@ -34,6 +34,7 @@ is_string_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetime64_any_dtype, is_period_dtype, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -814,6 +815,46 @@ def _addsub_offset_array(self, other, op): kwargs['freq'] = 'infer' return self._constructor(res_values, **kwargs) + def _addsub_int_array(self, other, op): + """ + Add or subtract array-like of integers equivalent to applying + `shift` pointwise. + + Parameters + ---------- + other : Index, np.ndarray + integer-dtype + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if is_period_dtype(self): + # easy case for PeriodIndex + if op is operator.sub: + other = -other + res_values = checked_add_with_arr(self.asi8, other, + arr_mask=self._isnan) + res_values = res_values.view('i8') + res_values[self._isnan] = iNaT + return self._from_ordinals(res_values, freq=self.freq) + + elif self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") + + elif isinstance(self.freq, Tick): + # easy case where we can convert to timedelta64 operation + td = Timedelta(self.freq) + return op(self, td * other) + + # We should only get here with DatetimeIndex; dispatch + # to _addsub_offset_array + assert not is_timedelta64_dtype(self) + return op(self, np.array(other) * self.freq) + @classmethod def _add_datetimelike_methods(cls): """ @@ -822,8 +863,6 @@ def _add_datetimelike_methods(cls): """ def __add__(self, other): - from pandas import DateOffset - other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented @@ -853,9 +892,8 @@ def __add__(self, other): elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) - elif is_integer_dtype(other) and self.freq is None: - # GH#19123 - raise NullFrequencyError("Cannot shift with no freq") + elif is_integer_dtype(other): + result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" @@ -915,14 +953,12 @@ def __sub__(self, other): elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) + elif is_integer_dtype(other): + result = self._addsub_int_array(other, operator.sub) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) - elif is_integer_dtype(other) and self.freq is None: - # GH#19123 - raise NullFrequencyError("Cannot shift with no freq") - elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" @@ -948,6 +984,13 @@ def __rsub__(self, other): # we need to wrap in DatetimeIndex and flip the operation from pandas import DatetimeIndex return DatetimeIndex(other) - self + elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and + not is_datetime64_any_dtype(other)): + # GH#19959 datetime - datetime is well-defined as timedelta, + # but any other type - datetime is not well-defined. + raise TypeError("cannot subtract {cls} from {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) return -(self - other) cls.__rsub__ = __rsub__ diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 4ef7997a53b85..eff2872a1cff3 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -367,6 +367,49 @@ def test_dti_isub_int(self, tz, one): rng -= one tm.assert_index_equal(rng, expected) + # ------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize('freq', ['H', 'D']) + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_dti_add_intarray_tick(self, box, freq): + # GH#19959 + dti = pd.date_range('2016-01-01', periods=2, freq=freq) + other = box([4, -1]) + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))]) + result = dti + other + tm.assert_index_equal(result, expected) + result = other + dti + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('freq', ['W', 'M', 'MS', 'Q']) + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_dti_add_intarray_non_tick(self, box, freq): + # GH#19959 + dti = pd.date_range('2016-01-01', periods=2, freq=freq) + other = box([4, -1]) + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))]) + with tm.assert_produces_warning(PerformanceWarning): + result = dti + other + tm.assert_index_equal(result, expected) + with tm.assert_produces_warning(PerformanceWarning): + result = other + dti + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_dti_add_intarray_no_freq(self, box): + # GH#19959 + dti = pd.DatetimeIndex(['2016-01-01', 'NaT', '2017-04-05 06:07:08']) + other = box([9, 4, -1]) + with pytest.raises(NullFrequencyError): + dti + other + with pytest.raises(NullFrequencyError): + other + dti + with pytest.raises(NullFrequencyError): + dti - other + with pytest.raises(TypeError): + other - dti + # ------------------------------------------------------------- # DatetimeIndex.shift is used in integer addition @@ -528,7 +571,7 @@ def test_dti_sub_tdi(self, tz): result = dti - tdi.values tm.assert_index_equal(result, expected) - msg = 'cannot perform __neg__ with this index type:' + msg = 'cannot subtract DatetimeIndex from' with tm.assert_raises_regex(TypeError, msg): tdi.values - dti @@ -553,7 +596,8 @@ def test_dti_isub_tdi(self, tz): tm.assert_index_equal(result, expected) msg = '|'.join(['cannot perform __neg__ with this index type:', - 'ufunc subtract cannot use operands with types']) + 'ufunc subtract cannot use operands with types', + 'cannot subtract DatetimeIndex from']) with tm.assert_raises_regex(TypeError, msg): tdi.values -= dti diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index c75fdd35a974c..aea019d910fe0 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -449,6 +449,31 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) + # --------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_pi_add_intarray(self, box, op): + # GH#19959 + pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')]) + other = box([4, -1]) + result = op(pi, other) + expected = pd.PeriodIndex([pd.Period('2016Q1'), pd.Period('NaT')]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_pi_sub_intarray(self, box): + # GH#19959 + pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')]) + other = box([4, -1]) + result = pi - other + expected = pd.PeriodIndex([pd.Period('2014Q1'), pd.Period('NaT')]) + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + other - pi + # --------------------------------------------------------------- # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) # TODO: Some of these are misnomers because of non-Tick DateOffsets diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 9035434046ccb..786ff5cde1806 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -542,6 +542,45 @@ def test_tdi_isub_int(self, one): rng -= one tm.assert_index_equal(rng, expected) + # ------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_tdi_add_integer_array(self, box): + # GH#19959 + rng = timedelta_range('1 days 09:00:00', freq='H', periods=3) + other = box([4, 3, 2]) + expected = TimedeltaIndex(['1 day 13:00:00'] * 3) + result = rng + other + tm.assert_index_equal(result, expected) + result = other + rng + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_tdi_sub_integer_array(self, box): + # GH#19959 + rng = timedelta_range('9H', freq='H', periods=3) + other = box([4, 3, 2]) + expected = TimedeltaIndex(['5H', '7H', '9H']) + result = rng - other + tm.assert_index_equal(result, expected) + result = other - rng + tm.assert_index_equal(result, -expected) + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_tdi_addsub_integer_array_no_freq(self, box): + # GH#19959 + tdi = TimedeltaIndex(['1 Day', 'NaT', '3 Hours']) + other = box([14, -1, 16]) + with pytest.raises(NullFrequencyError): + tdi + other + with pytest.raises(NullFrequencyError): + other + tdi + with pytest.raises(NullFrequencyError): + tdi - other + with pytest.raises(NullFrequencyError): + other - tdi + # ------------------------------------------------------------- # Binary operations TimedeltaIndex and timedelta-like