Skip to content

Commit f8c566c

Browse files
author
Tom Augspurger
committed
Merge pull request #6849 from TomAugspurger/unstack-nonunique
API/BUG Raise ValueError when stacking nonunique levels
2 parents e95bdce + 4b35ff0 commit f8c566c

File tree

6 files changed

+41
-2
lines changed

6 files changed

+41
-2
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ API Changes
149149
- Define and document the order of column vs index names in query/eval (:issue:`6676`)
150150

151151
- ``DataFrame.sort`` now places NaNs at the beginning or end of the sort according to the ``na_position`` parameter. (:issue:`3917`)
152+
- ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers
153+
to a non-unique item in the ``Index`` (previously raised a ``KeyError``). (:issue:`6738`)
152154

153155
- all offset operations now return ``Timestamp`` types (rather than datetime), Business/Week frequencies were incorrect (:issue:`4069`)
154156
- ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`)

doc/source/v0.14.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ API changes
206206
- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`)
207207

208208
- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`)
209-
209+
- ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers
210+
to a non-unique item in the ``Index`` (previously raised a ``KeyError``).
210211

211212
.. _whatsnew_0140.sql:
212213

pandas/core/index.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,6 +2317,13 @@ def _set_names(self, values, validate=True):
23172317
names = property(
23182318
fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
23192319

2320+
def _reference_duplicate_name(self, name):
2321+
"""
2322+
Returns True if the name refered to in self.names is duplicated.
2323+
"""
2324+
# count the times name equals an element in self.names.
2325+
return np.sum(name == np.asarray(self.names)) > 1
2326+
23202327
def _format_native_types(self, **kwargs):
23212328
return self.tolist()
23222329

pandas/core/reshape.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# pylint: disable=E1101,E1103
22
# pylint: disable=W0703,W0622,W0613,W0201
3-
43
from pandas.compat import range, zip
54
from pandas import compat
65
import itertools
@@ -69,6 +68,13 @@ def __init__(self, values, index, level=-1, value_columns=None):
6968
raise ValueError('must pass column labels for multi-column data')
7069

7170
self.index = index
71+
72+
if isinstance(self.index, MultiIndex):
73+
if index._reference_duplicate_name(level):
74+
msg = ("Ambiguous reference to {0}. The index "
75+
"names are not unique.".format(level))
76+
raise ValueError(msg)
77+
7278
self.level = self.index._get_level_number(level)
7379

7480
levels = index.levels
@@ -497,6 +503,12 @@ def stack(frame, level=-1, dropna=True):
497503
stacked : Series
498504
"""
499505
N, K = frame.shape
506+
if isinstance(frame.columns, MultiIndex):
507+
if frame.columns._reference_duplicate_name(level):
508+
msg = ("Ambiguous reference to {0}. The column "
509+
"names are not unique.".format(level))
510+
raise ValueError(msg)
511+
500512
if isinstance(level, int) and level < 0:
501513
level += frame.columns.nlevels
502514

pandas/tests/test_frame.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11387,6 +11387,16 @@ def test_unstack_dtypes(self):
1138711387
expected = Series({'float64' : 2, 'object' : 2})
1138811388
assert_series_equal(result, expected)
1138911389

11390+
def test_unstack_non_unique_index_names(self):
11391+
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
11392+
names=['c1', 'c1'])
11393+
df = DataFrame([1, 2], index=idx)
11394+
with tm.assertRaises(ValueError):
11395+
df.unstack('c1')
11396+
11397+
with tm.assertRaises(ValueError):
11398+
df.T.stack('c1')
11399+
1139011400
def test_reset_index(self):
1139111401
stacked = self.frame.stack()[::2]
1139211402
stacked = DataFrame({'foo': stacked, 'bar': stacked})

pandas/tests/test_index.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,13 @@ def test_names(self):
15341534
level_names = [level.name for level in index.levels]
15351535
self.assertEqual(ind_names, level_names)
15361536

1537+
def test_reference_duplicate_name(self):
1538+
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'x'])
1539+
self.assertTrue(idx._reference_duplicate_name('x'))
1540+
1541+
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'y'])
1542+
self.assertFalse(idx._reference_duplicate_name('x'))
1543+
15371544
def test_astype(self):
15381545
expected = self.index.copy()
15391546
actual = self.index.astype('O')

0 commit comments

Comments
 (0)