Skip to content

Commit

Permalink
BUG/CLN: datetimelike Index.equals may return True with non-Index
Browse files Browse the repository at this point in the history
closes #13107

Author: sinhrks <sinhrks@gmail.com>

Closes #13986 from sinhrks/dti_equals and squashes the following commits:

580151a [sinhrks] BUG/CLN: move .equals to DatetimeOpsMixin
  • Loading branch information
sinhrks authored and jreback committed Sep 3, 2016
1 parent e9c5c2d commit 4488f18
Show file tree
Hide file tree
Showing 18 changed files with 177 additions and 84 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,7 @@ Bug Fixes
- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)


- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
Expand Down
11 changes: 9 additions & 2 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1605,8 +1605,15 @@ def equals(self, other):
if not isinstance(other, Index):
return False

return array_equivalent(_values_from_object(self),
_values_from_object(other))
if is_object_dtype(self) and not is_object_dtype(other):
# if other is not object, use other's logic for coercion
return other.equals(self)

try:
return array_equivalent(_values_from_object(self),
_values_from_object(other))
except:
return False

def identical(self, other):
"""Similar to equals, but check that other comparable attributes are
Expand Down
3 changes: 3 additions & 0 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ def equals(self, other):
if self.is_(other):
return True

if not isinstance(other, Index):
return False

try:
other = self._is_dtype_compat(other)
return array_equivalent(self._data, other)
Expand Down
4 changes: 4 additions & 0 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,7 @@ def reindex(self, target, method=None, level=None, limit=None,
return_indexers=True,
keep_order=False)
else:
target = _ensure_index(target)
if self.equals(target):
indexer = None
else:
Expand Down Expand Up @@ -1984,6 +1985,9 @@ def equals(self, other):
if self.is_(other):
return True

if not isinstance(other, Index):
return False

if not isinstance(other, MultiIndex):
return array_equivalent(self._values,
_values_from_object(_ensure_index(other)))
Expand Down
15 changes: 4 additions & 11 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas.types.common import (is_dtype_equal, pandas_dtype,
is_float_dtype, is_object_dtype,
is_integer_dtype, is_scalar)
from pandas.types.missing import array_equivalent, isnull
from pandas.types.missing import isnull
from pandas.core.common import _values_from_object

from pandas import compat
Expand Down Expand Up @@ -160,16 +160,6 @@ def _convert_scalar_indexer(self, key, kind=None):
return (super(Int64Index, self)
._convert_scalar_indexer(key, kind=kind))

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

return array_equivalent(_values_from_object(self),
_values_from_object(other))

def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
return Int64Index(joined, name=name)
Expand Down Expand Up @@ -306,6 +296,9 @@ def equals(self, other):
if self is other:
return True

if not isinstance(other, Index):
return False

# need to compare nans locations and make sure that they are the same
# since nans don't compare equal this is a bit tricky
try:
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,20 @@ def test_delete_base(self):
# either depending on numpy version
result = idx.delete(len(idx))

def test_equals(self):

for name, idx in compat.iteritems(self.indices):
self.assertTrue(idx.equals(idx))
self.assertTrue(idx.equals(idx.copy()))
self.assertTrue(idx.equals(idx.astype(object)))

self.assertFalse(idx.equals(list(idx)))
self.assertFalse(idx.equals(np.array(idx)))

if idx.nlevels == 1:
# do not test MultiIndex
self.assertFalse(idx.equals(pd.Series(idx)))

def test_equals_op(self):
# GH9947, GH10637
index_a = self.create_index()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def test_astype(self):
casted = self.intIndex.astype('i8')
self.assertEqual(casted.name, 'foobar')

def test_equals(self):
def test_equals_object(self):
# same
self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])))

Expand Down
37 changes: 24 additions & 13 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def test_ensure_copied_data(self):
result = CategoricalIndex(index.values, copy=False)
self.assertIs(_base(index.values), _base(result.values))

def test_equals(self):
def test_equals_categorical(self):

ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
Expand Down Expand Up @@ -556,19 +556,30 @@ def test_equals(self):

# tests
# make sure that we are testing for category inclusion properly
self.assertTrue(CategoricalIndex(
list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca')))
ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
self.assertTrue(ci.equals(ci.copy()))

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
ci = CategoricalIndex(list('aabca'),
categories=['c', 'a', 'b', np.nan])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.assertTrue(CategoricalIndex(
list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list(
'aabca')))

self.assertFalse(CategoricalIndex(
list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
'aabca')))
self.assertTrue(CategoricalIndex(
list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
'aabca') + [np.nan]))
self.assertTrue(ci.equals(ci.copy()))

ci = CategoricalIndex(list('aabca') + [np.nan],
categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca')))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
self.assertTrue(ci.equals(ci.copy()))

ci = CategoricalIndex(list('aabca') + [np.nan],
categories=['c', 'a', 'b'])
self.assertFalse(ci.equals(list('aabca') + [np.nan]))
self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan])))
self.assertTrue(ci.equals(ci.copy()))

def test_string_categorical_index_repr(self):
# short
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,7 @@ def test_to_hierarchical(self):
def test_bounds(self):
self.index._bounds

def test_equals(self):
def test_equals_multi(self):
self.assertTrue(self.index.equals(self.index))
self.assertTrue(self.index.equal_levels(self.index))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_astype(self):
i = Float64Index([0, 1.1, np.NAN])
self.assertRaises(ValueError, lambda: i.astype(dtype))

def test_equals(self):
def test_equals_numeric(self):

i = Float64Index([1.0, 2.0])
self.assertTrue(i.equals(i))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_is_monotonic(self):
self.assertTrue(index.is_monotonic_increasing)
self.assertTrue(index.is_monotonic_decreasing)

def test_equals(self):
def test_equals_range(self):
equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),
(RangeIndex(0), RangeIndex(1, -1, 3)),
(RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)),
Expand Down
30 changes: 29 additions & 1 deletion pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
from pandas.types.common import (is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar,
is_scalar, is_dtype_equal,
is_list_like)
from pandas.types.generic import (ABCIndex, ABCSeries,
ABCPeriodIndex, ABCIndexClass)
Expand Down Expand Up @@ -109,6 +109,34 @@ def ceil(self, freq):
class DatetimeIndexOpsMixin(object):
""" common ops mixin to support a unified inteface datetimelike Index """

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if not isinstance(other, ABCIndexClass):
return False
elif not isinstance(other, type(self)):
try:
other = type(self)(other)
except:
return False

if not is_dtype_equal(self.dtype, other.dtype):
# have different timezone
return False

# ToDo: Remove this when PeriodDtype is added
elif isinstance(self, ABCPeriodIndex):
if not isinstance(other, ABCPeriodIndex):
return False
if self.freq != other.freq:
return False

return np.array_equal(self.asi8, other.asi8)

def __iter__(self):
return (self._box_func(v) for v in self.asi8)

Expand Down
20 changes: 0 additions & 20 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,26 +1625,6 @@ def is_normalized(self):
def _resolution(self):
return period.resolution(self.asi8, self.tz)

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if (not hasattr(other, 'inferred_type') or
other.inferred_type != 'datetime64'):
if self.offset is not None:
return False
try:
other = DatetimeIndex(other)
except:
return False

if self._has_same_tz(other):
return np.array_equal(self.asi8, other.asi8)
return False

def insert(self, loc, item):
"""
Make new Index inserting new item at location
Expand Down
15 changes: 0 additions & 15 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,21 +596,6 @@ def _mpl_repr(self):
# how to represent ourselves to matplotlib
return self.asobject.values

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if not isinstance(other, PeriodIndex):
try:
other = PeriodIndex(other)
except:
return False

return np.array_equal(self.asi8, other.asi8)

def to_timestamp(self, freq=None, how='start'):
"""
Cast to DatetimeIndex
Expand Down
16 changes: 0 additions & 16 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,22 +806,6 @@ def dtype(self):
def is_all_dates(self):
return True

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True

if (not hasattr(other, 'inferred_type') or
other.inferred_type != 'timedelta64'):
try:
other = TimedeltaIndex(other)
except:
return False

return np.array_equal(self.asi8, other.asi8)

def insert(self, loc, item):
"""
Make new Index inserting new item at location
Expand Down
Loading

0 comments on commit 4488f18

Please sign in to comment.