Skip to content

BUG/ENH: cleanup for Timestamp arithmetic #8916

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
WIP: terrible hack to attempt to fix dt comparisons
  • Loading branch information
shoyer committed Dec 6, 2014
commit ef97b8dc8f75f5c6d88eec3a5d7606810e43b35e
31 changes: 19 additions & 12 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5004,32 +5004,39 @@ def check(df,df2):

df = DataFrame(np.random.randint(10, size=(10, 2)), columns=['a', 'b'])
df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))})
check(df,df2)
check(df, df2)
# check(df, pd.Timestamp('2000-01-01'))
# check(df2, 123)

def test_timestamp_compare(self):
# make sure we can compare Timestamps on the right AND left hand side
# GH4982
df = DataFrame({'dates1': date_range('20010101', periods=10),
'dates2': date_range('20010102', periods=10),
'intcol': np.random.randint(1000000000, size=10),
'floatcol': np.random.randn(10),
'stringcol': list(tm.rands(10))})
df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT
'dates2': date_range('20010101', periods=10)})
df.loc[::2, 'dates2'] = pd.NaT
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}
for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)

nat_cmp_value = True if left != 'ne' else False

# no nats
expected = left_f(df, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), df)
tm.assert_frame_equal(result, expected)
ts = Timestamp('20010109')
expected = DataFrame(left_f(df.values, ts), columns=df.columns)
left_result = left_f(df, ts)
right_result = right_f(ts, df)
tm.assert_frame_equal(left_result, expected)
tm.assert_frame_equal(right_result, expected)

# nats
expected = left_f(df, Timestamp('nat'))
result = right_f(Timestamp('nat'), df)
tm.assert_frame_equal(result, expected)
values = (np.zeros if left != 'ne' else np.ones)((10, 2), bool)
expected = DataFrame(values, columns=df.columns)
left_result = left_f(df, Timestamp('nat'))
right_result = right_f(Timestamp('nat'), df)
tm.assert_frame_equal(left_result, expected)
tm.assert_frame_equal(right_result, expected)

def test_modulo(self):

Expand Down
5 changes: 5 additions & 0 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,11 @@ def test_ops_ndarray(self):
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other - ts, -expected)

tsz = Timestamp('2000-01-01', tz='EST')
self.assertRaises(ValueError, lambda: ts > tsz)
self.assertRaises(ValueError,
lambda: pd.to_datetime(['2000-01-02']).values > tsz)

def test_ops_notimplemented(self):
class Other:
pass
Expand Down
44 changes: 30 additions & 14 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,6 @@ _reverse_ops[Py_GT] = Py_LT
_reverse_ops[Py_GE] = Py_LE


cdef str _NDIM_STRING = "ndim"

# This is PITA. Because we inherit from datetime, which has very specific
# construction requirements, we need to do object instantiation in python
# (see Timestamp class above). This will serve as a C extension type that
Expand All @@ -746,10 +744,12 @@ cdef class _Timestamp(datetime):
int ndim

if isinstance(other, _Timestamp):
# print '_timestamp'
if isinstance(other, _NaT):
return _cmp_nat_dt(other, self, _reverse_ops[op])
ots = other
elif isinstance(other, datetime):
# print 'datetime'
if self.nanosecond == 0:
val = self.to_datetime()
return PyObject_RichCompareBool(val, other, op)
Expand All @@ -759,17 +759,33 @@ cdef class _Timestamp(datetime):
except ValueError:
return self._compare_outside_nanorange(other, op)
elif isinstance(other, np.datetime64):
return PyObject_RichCompareBool(self, Timestamp(other), op)
# print 'convert dt64'
return PyObject_RichCompare(self, Timestamp(other), op)
elif hasattr(other, 'dtype'):
# print 'dtype', type(other), other.dtype, other
if self.tz is None and self.offset is None:
# allow comparison to ndarrays; use the reverse op because it's
# necessary when comparing to pd.Series
return PyObject_RichCompare(other, self.to_datetime64(),
_reverse_ops[op])
# TODO: somehow trigger normal numpy broadcasting rules even though
# we set __array_priority__ > ndarray.__array_priority__
return NotImplemented
# This terrible hack lets us invoke normal numpy broadcasting rules
# even though we set __array_priority__ >
# ndarray.__array_priority__ (for the benefit of arithmetic)
# return NotImplemented
elif self.__array_priority__ == 0:
# print 'priority == 0'
return NotImplemented
else:
# print 'priority set to 0'
# print(self.__array_priority__)
new_obj = Timestamp(self.value, self.offset, self.tz)
new_obj.__array_priority__ = 0
new_obj._allow_cmp_int_dtype = True
return PyObject_RichCompare(other, new_obj, _reverse_ops[op])
elif hasattr(self, '_allow_cmp_int_dtype') and isinstance(other, long):
ots = other = Timestamp(other)
else:
# print 'not implemented', type(other), other, self.__array_priority__
return NotImplemented

self._assert_tzawareness_compat(other)
Expand Down Expand Up @@ -917,7 +933,7 @@ cdef class _NaT(_Timestamp):
return hash(self.value)

# less than np.ndarray
__array_priority__ = -1
__array_priority__ = 0

def __richcmp__(_NaT self, object other, int op):
cdef int ndim = getattr(other, 'ndim', -1)
Expand Down Expand Up @@ -1519,19 +1535,19 @@ cdef class _Timedelta(timedelta):
_Timedelta ots
int ndim

if isinstance(other, _Timedelta):
if isinstance(other, _NaT):
return _cmp_nat_dt(other, self, _reverse_ops[op])
if isinstance(other, _NaT):
return NotImplemented
elif isinstance(other, _Timedelta):
ots = other
elif isinstance(other, timedelta):
ots = Timedelta(other)
elif isinstance(other, np.timedelta64):
elif isinstance(other, (timedelta, np.timedelta64)):
return PyObject_RichCompareBool(self, Timedelta(other), op)
elif hasattr(other, 'dtype'):
# allow comparison to ndarrays; use the reverse op because it's
# necessary when comparing to pd.Series
return PyObject_RichCompare(other, self.to_datetime64(),
return PyObject_RichCompare(other, self.to_timedelta64(),
_reverse_ops[op])
else:
return NotImplemented
return _cmp_scalar(self.value, ots.value, op)

def _ensure_components(_Timedelta self):
Expand Down