Skip to content

BUG: Maintain column order with groupby.nth #22811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 20, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add optional sort parameter to difference method in subclasses
  • Loading branch information
reidy-p committed Nov 10, 2018
commit 351138d71e9bb721779954b23b38f57bcfc1b9b3
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ Other Enhancements
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`)

.. _whatsnew_0240.api_breaking:

Expand Down
16 changes: 13 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2769,10 +2769,18 @@ def intersection(self, other):
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
names=result_names)

def difference(self, other):
def difference(self, other, sort=True):
"""
Compute sorted set difference of two MultiIndex objects

Parameters
----------
other : MultiIndex
sort : bool, default True
Sort the resulting MultiIndex if possible

.. versionadded:: 0.24.0

Returns
-------
diff : MultiIndex
Expand All @@ -2788,8 +2796,10 @@ def difference(self, other):
labels=[[]] * self.nlevels,
names=result_names, verify_integrity=False)

difference = sorted(set(self._ndarray_values) -
set(other._ndarray_values))
difference = set(self._ndarray_values) - set(other._ndarray_values)

if sort:
difference = sorted(difference)

if len(difference) == 0:
return MultiIndex(levels=[[]] * self.nlevels,
Expand Down
30 changes: 20 additions & 10 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,15 +1104,17 @@ def test_iadd_string(self):

@pytest.mark.parametrize("second_name,expected", [
(None, None), ('name', 'name')])
def test_difference_name_preservation(self, second_name, expected):
@pytest.mark.parametrize("sort", [
(True, False)])
def test_difference_name_preservation(self, second_name, expected, sort):
# TODO: replace with fixturesult
first = self.strIndex[5:20]
second = self.strIndex[:10]
answer = self.strIndex[10:20]

first.name = 'name'
second.name = second_name
result = first.difference(second)
result = first.difference(second, sort)

assert tm.equalContents(result, answer)

Expand All @@ -1121,18 +1123,22 @@ def test_difference_name_preservation(self, second_name, expected):
else:
assert result.name == expected

def test_difference_empty_arg(self):
@pytest.mark.parametrize("sort", [
(True, False)])
def test_difference_empty_arg(self, sort):
first = self.strIndex[5:20]
first.name == 'name'
result = first.difference([])
result = first.difference([], sort=sort)

assert tm.equalContents(result, first)
assert result.name == first.name

def test_difference_identity(self):
@pytest.mark.parametrize("sort", [
(True, False)])
def test_difference_identity(self, sort):
first = self.strIndex[5:20]
first.name == 'name'
result = first.difference(first)
result = first.difference(first, sort)

assert len(result) == 0
assert result.name == first.name
Expand Down Expand Up @@ -1181,13 +1187,15 @@ def test_symmetric_difference_non_index(self):
assert tm.equalContents(result, expected)
assert result.name == 'new_name'

def test_difference_type(self):
@pytest.mark.parametrize("sort", [
(True, False)])
def test_difference_type(self, sort):
# GH 20040
# If taking difference of a set and itself, it
# needs to preserve the type of the index
skip_index_keys = ['repeats']
for key, index in self.generate_index_types(skip_index_keys):
result = index.difference(index)
result = index.difference(index, sort)
expected = index.drop(index)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -2401,13 +2409,15 @@ def test_intersection_different_type_base(self, klass):
result = first.intersection(klass(second.values))
assert tm.equalContents(result, second)

def test_difference_base(self):
@pytest.mark.parametrize("sort", [
(True, False)])
def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = self.create_index()
first = index[:4]
second = index[3:]

result = first.difference(second)
result = first.difference(second, sort)
expected = Index([0, 1, 'a'])
tm.assert_index_equal(result, expected)

Expand Down