Skip to content

API: Index.duplicated should return np.array #9112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 20, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Backwards incompatible API changes

.. _whatsnew_0160.api_breaking:

- ``Index.duplicated`` now returns `np.array(dtype=bool)` rathar than `Index(dtype=object)` containing `bool` values. (:issue:`8875`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for correction and merge!


Deprecations
~~~~~~~~~~~~

Expand Down
12 changes: 6 additions & 6 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@


_shared_docs = dict()
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='')
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
duplicated='IndexOpsMixin')


class StringMixin(object):
Expand Down Expand Up @@ -486,14 +487,14 @@ def searchsorted(self, key, side='left'):
@Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
def drop_duplicates(self, take_last=False, inplace=False):
duplicated = self.duplicated(take_last=take_last)
result = self[~(duplicated.values).astype(bool)]
result = self[np.logical_not(duplicated)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok with changing to this, though using (~values).astype(bool) is quite common in the code base - if someone wanted to clean up this would be appreciated

if inplace:
return self._update_inplace(result)
else:
return result

_shared_docs['duplicated'] = (
"""Return boolean %(klass)s denoting duplicate values
"""Return boolean %(duplicated)s denoting duplicate values

Parameters
----------
Expand All @@ -502,7 +503,7 @@ def drop_duplicates(self, take_last=False, inplace=False):

Returns
-------
duplicated : %(klass)s
duplicated : %(duplicated)s
""")

@Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs)
Expand All @@ -513,8 +514,7 @@ def duplicated(self, take_last=False):
return self._constructor(duplicated,
index=self.index).__finalize__(self)
except AttributeError:
from pandas.core.index import Index
return Index(duplicated)
return np.array(duplicated, dtype=bool)

#----------------------------------------------------------------------
# abstracts
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

_unsortable_types = frozenset(('mixed', 'mixed-integer'))

_index_doc_kwargs = dict(klass='Index', inplace='')
_index_doc_kwargs = dict(klass='Index', inplace='',
duplicated='np.array')


def _try_get_item(x):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@
klass='Series',
axes_single_arg="{0,'index'}",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None."""
If True, performs operation inplace and returns None.""",
duplicated='Series'
)


Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,10 @@ def test_duplicated_drop_duplicates(self):
continue

# original doesn't have duplicates
expected = Index([False] * len(original))
tm.assert_index_equal(original.duplicated(), expected)
expected = np.array([False] * len(original), dtype=bool)
duplicated = original.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
self.assertTrue(duplicated.dtype == bool)
result = original.drop_duplicates()
tm.assert_index_equal(result, original)
self.assertFalse(result is original)
Expand All @@ -625,15 +627,19 @@ def test_duplicated_drop_duplicates(self):

# create repeated values, 3rd and 5th values are duplicated
idx = original[list(range(len(original))) + [5, 3]]
expected = Index([False] * len(original) + [True, True])
tm.assert_index_equal(idx.duplicated(), expected)
expected = np.array([False] * len(original) + [True, True], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
self.assertTrue(duplicated.dtype == bool)
tm.assert_index_equal(idx.drop_duplicates(), original)

last_base = [False] * len(idx)
last_base[3] = True
last_base[5] = True
expected = Index(last_base)
tm.assert_index_equal(idx.duplicated(take_last=True), expected)
expected = np.array(last_base)
duplicated = idx.duplicated(take_last=True)
tm.assert_numpy_array_equal(duplicated, expected)
self.assertTrue(duplicated.dtype == bool)
tm.assert_index_equal(idx.drop_duplicates(take_last=True),
idx[~np.array(last_base)])

Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2075,13 +2075,17 @@ def test_duplicated_drop_duplicates(self):
# GH 4060
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2 ,3], [1, 1, 1, 1, 2, 2]))

expected = Index([False, False, False, True, False, False])
tm.assert_index_equal(idx.duplicated(), expected)
expected = np.array([False, False, False, True, False, False], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
self.assertTrue(duplicated.dtype == bool)
expected = MultiIndex.from_arrays(([1, 2, 3, 2 ,3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(), expected)

expected = Index([True, False, False, False, False, False])
tm.assert_index_equal(idx.duplicated(take_last=True), expected)
expected = np.array([True, False, False, False, False, False])
duplicated = idx.duplicated(take_last=True)
tm.assert_numpy_array_equal(duplicated, expected)
self.assertTrue(duplicated.dtype == bool)
expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(take_last=True), expected)

Expand Down