Skip to content

ENH: drop function now has errors keyword for non-existing column handling #6736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ API changes
In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])

In [4]: cat
Out[4]:
Out[4]:
[a, b, a]
Categories (3, object): [a < b < c]

Expand Down
7 changes: 7 additions & 0 deletions doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Enhancements



- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)

.. ipython:: python

df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
df.drop(['A', 'X'], axis=1, errors='ignore')


.. _whatsnew_0161.api:

Expand Down
8 changes: 5 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1557,7 +1557,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None):

return self.reindex(**d)

def drop(self, labels, axis=0, level=None, inplace=False):
def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
"""
Return new object with labels in requested axis removed

Expand All @@ -1569,6 +1569,8 @@ def drop(self, labels, axis=0, level=None, inplace=False):
For MultiIndex
inplace : bool, default False
If True, do operation inplace and return None.
errors : {'ignore', 'raise'}, default 'raise'
If 'ignore', suppress error and existing labels are dropped.

Returns
-------
Expand All @@ -1582,9 +1584,9 @@ def drop(self, labels, axis=0, level=None, inplace=False):
if level is not None:
if not isinstance(axis, MultiIndex):
raise AssertionError('axis must be a MultiIndex')
new_axis = axis.drop(labels, level=level)
new_axis = axis.drop(labels, level=level, errors=errors)
else:
new_axis = axis.drop(labels)
new_axis = axis.drop(labels, errors=errors)
dropped = self.reindex(**{axis_name: new_axis})
try:
dropped.axes[axis_].set_names(axis.names, inplace=True)
Expand Down
31 changes: 20 additions & 11 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2325,13 +2325,15 @@ def insert(self, loc, item):
(_self[:loc], item_idx, _self[loc:]))
return Index(idx, name=self.name)

def drop(self, labels):
def drop(self, labels, errors='raise'):
"""
Make new Index with passed list of labels deleted

Parameters
----------
labels : array-like
errors : {'ignore', 'raise'}, default 'raise'
If 'ignore', suppress error and existing labels are dropped.

Returns
-------
Expand All @@ -2341,7 +2343,9 @@ def drop(self, labels):
indexer = self.get_indexer(labels)
mask = indexer == -1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, shouldn't this raise a KeyError? @jorisvandenbossche @shoyer to be consistent with say .loc and other indexers? @hayd

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of a gray area to me. We're not doing actual indexing here. I would probably stick with ValueError, especially to remain consistent with the existing implementation.

if mask.any():
raise ValueError('labels %s not contained in axis' % labels[mask])
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' % labels[mask])
indexer = indexer[~mask]
return self.delete(indexer)

@Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
Expand Down Expand Up @@ -3847,7 +3851,7 @@ def repeat(self, n):
sortorder=self.sortorder,
verify_integrity=False)

def drop(self, labels, level=None):
def drop(self, labels, level=None, errors='raise'):
"""
Make new MultiIndex with passed list of labels deleted

Expand All @@ -3870,19 +3874,24 @@ def drop(self, labels, level=None):
indexer = self.get_indexer(labels)
mask = indexer == -1
if mask.any():
raise ValueError('labels %s not contained in axis'
% labels[mask])
return self.delete(indexer)
if errors != 'ignore':
raise ValueError('labels %s not contained in axis'
% labels[mask])
indexer = indexer[~mask]
except Exception:
pass

inds = []
for label in labels:
loc = self.get_loc(label)
if isinstance(loc, int):
inds.append(loc)
else:
inds.extend(lrange(loc.start, loc.stop))
try:
loc = self.get_loc(label)
if isinstance(loc, int):
inds.append(loc)
else:
inds.extend(lrange(loc.start, loc.stop))
except KeyError:
if errors != 'ignore':
raise

return self.delete(inds)

Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7409,6 +7409,26 @@ def test_drop_names(self):
self.assertEqual(obj.columns.name, 'second')
self.assertEqual(list(df.columns), ['d', 'e', 'f'])

self.assertRaises(ValueError, df.drop, ['g'])
self.assertRaises(ValueError, df.drop, ['g'], 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's give at least one of these an errors='raise' argument (and add it to an existing test that doesn't raise) just for completeness.


# errors = 'ignore'
dropped = df.drop(['g'], errors='ignore')
expected = Index(['a', 'b', 'c'])
self.assert_index_equal(dropped.index, expected)

dropped = df.drop(['b', 'g'], errors='ignore')
expected = Index(['a', 'c'])
self.assert_index_equal(dropped.index, expected)

dropped = df.drop(['g'], axis=1, errors='ignore')
expected = Index(['d', 'e', 'f'])
self.assert_index_equal(dropped.columns, expected)

dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
expected = Index(['e', 'f'])
self.assert_index_equal(dropped.columns, expected)

def test_dropEmptyRows(self):
N = len(self.frame.index)
mat = randn(N)
Expand Down Expand Up @@ -7787,6 +7807,19 @@ def test_drop(self):
assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.ix[[2], :])
assert_frame_equal(simple.drop([0, 3], axis='index'), simple.ix[[1, 2], :])

self.assertRaises(ValueError, simple.drop, 5)
self.assertRaises(ValueError, simple.drop, 'C', 1)
self.assertRaises(ValueError, simple.drop, [1, 5])
self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1)

# errors = 'ignore'
assert_frame_equal(simple.drop(5, errors='ignore'), simple)
assert_frame_equal(simple.drop([0, 5], errors='ignore'),
simple.ix[[1, 2, 3], :])
assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple)
assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'),
simple[['B']])

#non-unique - wheee!
nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')),
columns=['a', 'a', 'b'])
Expand Down
66 changes: 59 additions & 7 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,20 +1036,43 @@ def check_slice(in_slice, expected):
def test_drop(self):
n = len(self.strIndex)

dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)])
drop = self.strIndex[lrange(5, 10)]
dropped = self.strIndex.drop(drop)
expected = self.strIndex[lrange(5) + lrange(10, n)]
self.assertTrue(dropped.equals(expected))

self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar'])
self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar'])

# errors='ignore'
mixed = drop.tolist() + ['foo']
dropped = self.strIndex.drop(mixed, errors='ignore')
expected = self.strIndex[lrange(5) + lrange(10, n)]
self.assert_index_equal(dropped, expected)

dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore')
expected = self.strIndex[lrange(n)]
self.assert_index_equal(dropped, expected)

dropped = self.strIndex.drop(self.strIndex[0])
expected = self.strIndex[1:]
self.assertTrue(dropped.equals(expected))
self.assert_index_equal(dropped, expected)

ser = Index([1, 2, 3])
dropped = ser.drop(1)
expected = Index([2, 3])
self.assertTrue(dropped.equals(expected))
self.assert_index_equal(dropped, expected)

# errors='ignore'
self.assertRaises(ValueError, ser.drop, [3, 4])

dropped = ser.drop(4, errors='ignore')
expected = Index([1, 2, 3])
self.assert_index_equal(dropped, expected)

dropped = ser.drop([3, 4, 5], errors='ignore')
expected = Index([1, 2])
self.assert_index_equal(dropped, expected)

def test_tuple_union_bug(self):
import pandas
Expand Down Expand Up @@ -3529,21 +3552,50 @@ def test_drop(self):
dropped2 = self.index.drop(index)

expected = self.index[[0, 2, 3, 5]]
self.assertTrue(dropped.equals(expected))
self.assertTrue(dropped2.equals(expected))
self.assert_index_equal(dropped, expected)
self.assert_index_equal(dropped2, expected)

dropped = self.index.drop(['bar'])
expected = self.index[[0, 1, 3, 4, 5]]
self.assertTrue(dropped.equals(expected))
self.assert_index_equal(dropped, expected)

dropped = self.index.drop('foo')
expected = self.index[[2, 3, 4, 5]]
self.assert_index_equal(dropped, expected)

index = MultiIndex.from_tuples([('bar', 'two')])
self.assertRaises(KeyError, self.index.drop, [('bar', 'two')])
self.assertRaises(KeyError, self.index.drop, index)
self.assertRaises(KeyError, self.index.drop, ['foo', 'two'])

# partially correct argument
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
self.assertRaises(KeyError, self.index.drop, mixed_index)

# error='ignore'
dropped = self.index.drop(index, errors='ignore')
expected = self.index[[0, 1, 2, 3, 4, 5]]
self.assert_index_equal(dropped, expected)

dropped = self.index.drop(mixed_index, errors='ignore')
expected = self.index[[0, 1, 2, 3, 5]]
self.assert_index_equal(dropped, expected)

dropped = self.index.drop(['foo', 'two'], errors='ignore')
expected = self.index[[2, 3, 4, 5]]
self.assert_index_equal(dropped, expected)

# mixed partial / full drop
dropped = self.index.drop(['foo', ('qux', 'one')])
expected = self.index[[2, 3, 5]]
self.assertTrue(dropped.equals(expected))
self.assert_index_equal(dropped, expected)

# mixed partial / full drop / error='ignore'
mixed_index = ['foo', ('qux', 'one'), 'two']
self.assertRaises(KeyError, self.index.drop, mixed_index)
dropped = self.index.drop(mixed_index, errors='ignore')
expected = self.index[[2, 3, 5]]
self.assert_index_equal(dropped, expected)

def test_droplevel_with_names(self):
index = self.index[self.index.get_loc('foo')]
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1984,6 +1984,15 @@ def check_drop(drop_val, axis_number, aliases, expected):
expected = Panel({"One": df})
check_drop('Two', 0, ['items'], expected)

self.assertRaises(ValueError, panel.drop, 'Three')

# errors = 'ignore'
dropped = panel.drop('Three', errors='ignore')
assert_panel_equal(dropped, panel)
dropped = panel.drop(['Two', 'Three'], errors='ignore')
expected = Panel({"One": df})
assert_panel_equal(dropped, expected)

# Major
exp_df = DataFrame({"A": [2], "B": [4]}, index=[1])
expected = Panel({"One": exp_df, "Two": exp_df})
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,14 @@ def test_drop(self):
self.assertRaises(ValueError, s.drop, 'bc')
self.assertRaises(ValueError, s.drop, ('a',))

# errors='ignore'
s = Series(range(3),index=list('abc'))
result = s.drop('bc', errors='ignore')
assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.ix[1:]
assert_series_equal(result, expected)

# bad axis
self.assertRaises(ValueError, s.drop, 'one', axis='columns')

Expand Down