Skip to content

Commit

Permalink
BUG: operator equal on Index should behavior similarly to Series
Browse files Browse the repository at this point in the history
  • Loading branch information
mortada committed Jul 16, 2015
1 parent 35c0863 commit d5ff457
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 56 deletions.
48 changes: 45 additions & 3 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,14 @@ way to summarize a boolean result.

.. ipython:: python
(df>0).all()
(df>0).any()
(df > 0).all()
(df > 0).any()
You can reduce to a final boolean value.

.. ipython:: python
(df>0).any().any()
(df > 0).any().any()
You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property.

Expand Down Expand Up @@ -330,6 +330,48 @@ equality to be True:
df1.equals(df2)
df1.equals(df2.sort())
Comparing array-like objects
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

You can conveniently do element-wise comparisons when comparing a pandas
data structure with a scalar value:

.. ipython:: python
pd.Series(['foo', 'bar', 'baz']) == 'foo'
pd.Index(['foo', 'bar', 'baz']) == 'foo'
Pandas also handles element-wise comparisons between different array-like
objects of the same length:

.. ipython:: python
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])
pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux'])
Trying to compare ``Index`` or ``Series`` objects of different lengths will
raise a ValueError:

.. code-block:: python
In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar'])
ValueError: Series lengths must match to compare
In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo'])
ValueError: Series lengths must match to compare
Note that this is different from the numpy behavior where a comparison can
be broadcast:

.. ipython:: python
np.array([1, 2, 3]) == np.array([2])
or it can return False if broadcasting can not be done:

.. ipython:: python
np.array([1, 2, 3]) == np.array([1, 2])
Combining overlapping data sets
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
71 changes: 71 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,76 @@ Backwards incompatible API changes

.. _whatsnew_0170.api_breaking:

- Operator equal on Index should behavior similarly to Series (:issue:`9947`)

Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise
a ``ValueError``. This is to be consistent with the behavior of ``Series``.

Previous behavior:

.. code-block:: python

In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
Out[2]: array([ True, False, False], dtype=bool)

In [3]: pd.Index([1, 2, 3]) == pd.Index([2])
Out[3]: array([False, True, False], dtype=bool)

In [4]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
Out[4]: False

In [5]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
Out[5]:
0 True
1 False
2 False
dtype: bool

In [6]: pd.Series([1, 2, 3]) == pd.Series([2])
ValueError: Series lengths must match to compare

In [7]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
ValueError: Series lengths must match to compare

New behavior:

.. code-block:: python

In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5])
Out[8]: array([ True, False, False], dtype=bool)

In [9]: pd.Index([1, 2, 3]) == pd.Index([2])
ValueError: Lengths must match to compare

In [10]: pd.Index([1, 2, 3]) == pd.Index([1, 2])
ValueError: Lengths must match to compare

In [11]: pd.Series([1, 2, 3]) == pd.Series([1, 4, 5])
Out[11]:
0 True
1 False
2 False
dtype: bool

In [12]: pd.Series([1, 2, 3]) == pd.Series([2])
ValueError: Series lengths must match to compare

In [13]: pd.Series([1, 2, 3]) == pd.Series([1, 2])
ValueError: Series lengths must match to compare

Note that this is different from the ``numpy`` behavior where a comparison can
be broadcast:

.. ipython:: python

np.array([1, 2, 3]) == np.array([1])

or it can return False if broadcasting can not be done:

.. ipython:: python

np.array([1, 2, 3]) == np.array([1, 2])

.. _whatsnew_0170.api_breaking.other:

Other API Changes
Expand Down Expand Up @@ -149,3 +219,4 @@ Bug Fixes

- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)

- Bug in operator equal on Index not being consistent with Series (:issue:`9947`)
3 changes: 3 additions & 0 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2593,6 +2593,9 @@ def _add_comparison_methods(cls):
def _make_compare(op):

def _evaluate_compare(self, other):
if isinstance(other, (np.ndarray, Index, ABCSeries)):
if other.ndim > 0 and len(self) != len(other):
raise ValueError('Lengths must match to compare')
func = getattr(self.values, op)
result = func(np.asarray(other))

Expand Down
116 changes: 63 additions & 53 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,22 +1550,70 @@ def test_groupby(self):
tm.assert_dict_equal(groups, exp)

def test_equals_op(self):
# For issue #9785
# GH9947
index_a = Index(['foo', 'bar', 'baz'])
index_b = Index(['foo', 'bar', 'baz', 'qux'])
# Testing Numpy Results Equivelent
assert_array_equal(
index_a.equals(index_a),
index_a == index_a
)
assert_array_equal(
index_a.equals(index_b),
index_a == index_b,
)
assert_array_equal(
index_b.equals(index_a),
index_b == index_a,
)
index_c = Index(['foo', 'bar', 'qux'])
index_d = Index(['foo'])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == index_b
assert_array_equal(index_a == index_a, np.array([True, True, True]))
assert_array_equal(index_a == index_c, np.array([True, True, False]))

# test comparisons with numpy arrays
array_a = np.array(['foo', 'bar', 'baz'])
array_b = np.array(['foo', 'bar', 'baz', 'qux'])
array_c = np.array(['foo', 'bar', 'qux'])
array_d = np.array(['foo'])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == array_b
assert_array_equal(index_a == array_a, np.array([True, True, True]))
assert_array_equal(index_a == array_c, np.array([True, True, False]))

# test comparisons with Series
series_a = Series(['foo', 'bar', 'baz'])
series_b = Series(['foo', 'bar', 'baz', 'qux'])
series_c = Series(['foo', 'bar', 'qux'])
series_d = Series(['foo'])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == series_b
assert_array_equal(index_a == series_a, np.array([True, True, True]))
assert_array_equal(index_a == series_c, np.array([True, True, False]))

# cases where length is 1 for one of them
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == index_d
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == series_d
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
index_a == array_d
with tm.assertRaisesRegexp(ValueError, "Series lengths must match"):
series_a == series_d
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
series_a == array_d

# comparing with scalar should broadcast
assert_array_equal(index_a == 'foo', np.array([True, False, False]))
assert_array_equal(series_a == 'foo', np.array([True, False, False]))
assert_array_equal(array_a == 'foo', np.array([True, False, False]))

# GH9785
# test comparisons of multiindex
from pandas.compat import StringIO
df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
assert_array_equal(df.index == df.index, np.array([True, True]))

mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
assert_array_equal(df.index == mi1, np.array([True, True]))
mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
assert_array_equal(df.index == mi2, np.array([True, False]))
mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
df.index == mi3
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
df.index == index_a
assert_array_equal(index_a == mi3, np.array([False, False, False]))


class TestCategoricalIndex(Base, tm.TestCase):
_holder = CategoricalIndex
Expand Down Expand Up @@ -4815,47 +4863,9 @@ def test_index_name_retained(self):
tm.assert_frame_equal(result, df_expected)

def test_equals_operator(self):
# For issue #9785
# GH9785
self.assertTrue((self.index == self.index).all())

def test_index_compare(self):
# For issue #9785
index_unequal = Index(['foo', 'bar', 'baz'])
index_equal = Index([
('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')
], tupleize_cols=False)
# Testing Numpy Results Equivelent
assert_array_equal(
index_unequal.equals(self.index),
index_unequal == self.index,
err_msg = 'Index compared with MultiIndex failed',
)
assert_array_equal(
self.index.equals(index_unequal),
self.index == index_unequal,
err_msg = 'MultiIndex compared with Index failed',
)
assert_array_equal(
self.index.equals(index_equal),
self.index == index_equal,
err_msg = 'MultiIndex compared with Similar Index failed',
)
assert_array_equal(
index_equal.equals(self.index),
index_equal == self.index,
err_msg = 'Index compared with Similar MultiIndex failed',
)
# Testing that the result is true for the index_equal case
self.assertTrue(
(self.index == index_equal).all(),
msg='Assert Index compared with Similar MultiIndex match'
)
self.assertTrue(
(index_equal == self.index).all(),
msg='Assert MultiIndex compared with Similar Index match'
)


def test_get_combined_index():
from pandas.core.index import _get_combined_index
Expand Down

0 comments on commit d5ff457

Please sign in to comment.