Skip to content

Commit

Permalink
ENH: Support resetting index with tuple name
Browse files Browse the repository at this point in the history
closes pandas-dev#16164

Author: Pietro Battiston <me@pietrobattiston.it>

Closes pandas-dev#16165 from toobaz/reix_col_name and squashes the following commits:

9e1bdba [Pietro Battiston] REF: reorganize reinsertion code
3b0bb1f [Pietro Battiston] ENH: Handle tuples shorter than nlevels gracefully
c958de7 [Pietro Battiston] TST: additional test for reset_index with tuple-named index level
e12bca1 [Pietro Battiston] ENH: allow tuple index names to be interpreted as full column keys
6315d07 [Pietro Battiston] REF: Avoid duplication in reset_index() when reinsering index columns
  • Loading branch information
toobaz authored and jreback committed Apr 28, 2017
1 parent b6f65eb commit a7a0574
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 41 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ Other Enhancements
- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename
labels in the specified level of a MultiIndex (:issue:`4160`).
- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`)
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`)
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
Expand Down
68 changes: 32 additions & 36 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3019,44 +3019,40 @@ def _maybe_casted_values(index, labels=None):
if len(level) < len(self.index.levels):
new_index = self.index.droplevel(level)

if not drop:
names = self.index.names
zipped = lzip(self.index.levels, self.index.labels)

multi_col = isinstance(self.columns, MultiIndex)
for i, (lev, lab) in reversed(list(enumerate(zipped))):
col_name = names[i]
if col_name is None:
col_name = 'level_%d' % i

if multi_col:
if col_fill is None:
col_name = tuple([col_name] * self.columns.nlevels)
else:
name_lst = [col_fill] * self.columns.nlevels
lev_num = self.columns._get_level_number(col_level)
name_lst[lev_num] = col_name
col_name = tuple(name_lst)

# to ndarray and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
if level is None or i in level:
new_obj.insert(0, col_name, level_values)

elif not drop:
name = self.index.name
if name is None or name == 'index':
name = 'index' if 'index' not in self else 'level_0'
if isinstance(self.columns, MultiIndex):
if col_fill is None:
name = tuple([name] * self.columns.nlevels)
else:
name_lst = [col_fill] * self.columns.nlevels
if not drop:
if isinstance(self.index, MultiIndex):
names = [n if n is not None else ('level_%d' % i)
for (i, n) in enumerate(self.index.names)]
to_insert = lzip(self.index.levels, self.index.labels)
else:
default = 'index' if 'index' not in self else 'level_0'
names = ([default] if self.index.name is None
else [self.index.name])
to_insert = ((self.index, None),)

multi_col = isinstance(self.columns, MultiIndex)
for i, (lev, lab) in reversed(list(enumerate(to_insert))):
name = names[i]
if multi_col:
col_name = (list(name) if isinstance(name, tuple)
else [name])
if col_fill is None:
if len(col_name) not in (1, self.columns.nlevels):
raise ValueError("col_fill=None is incompatible "
"with incomplete column name "
"{}".format(name))
col_fill = col_name[0]

lev_num = self.columns._get_level_number(col_level)
name_lst[lev_num] = name
name_lst = [col_fill] * lev_num + col_name
missing = self.columns.nlevels - len(name_lst)
name_lst += [col_fill] * missing
name = tuple(name_lst)
values = _maybe_casted_values(self.index)
new_obj.insert(0, name, values)

# to ndarray and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
if level is None or i in level:
new_obj.insert(0, name, level_values)

new_obj.index = new_index
if not inplace:
Expand Down
44 changes: 39 additions & 5 deletions pandas/tests/test_multilevel.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2242,16 +2242,50 @@ def test_reset_index_multiindex_columns(self):
levels = [['A', ''], ['B', 'b']]
df = pd.DataFrame([[0, 2], [1, 3]],
columns=pd.MultiIndex.from_tuples(levels))
expected = df.copy()
df.index.name = 'A'
result = df[['B']].reset_index()
tm.assert_frame_equal(result, expected)
result = df[['B']].rename_axis('A').reset_index()
tm.assert_frame_equal(result, df)

# gh-16120: already existing column
with tm.assert_raises_regex(ValueError,
("cannot insert \('A', ''\), "
"already exists")):
df.reset_index()
df.rename_axis('A').reset_index()

# gh-16164: multiindex (tuple) full key
result = df.set_index([('A', '')]).reset_index()
tm.assert_frame_equal(result, df)

# with additional (unnamed) index level
idx_col = pd.DataFrame([[0], [1]],
columns=pd.MultiIndex.from_tuples([('level_0',
'')]))
expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1)
result = df.set_index([('B', 'b')], append=True).reset_index()
tm.assert_frame_equal(result, expected)

# with index name which is a too long tuple...
with tm.assert_raises_regex(ValueError,
("Item must have length equal to number "
"of levels.")):
df.rename_axis([('C', 'c', 'i')]).reset_index()

# or too short...
levels = [['A', 'a', ''], ['B', 'b', 'i']]
df2 = pd.DataFrame([[0, 2], [1, 3]],
columns=pd.MultiIndex.from_tuples(levels))
idx_col = pd.DataFrame([[0], [1]],
columns=pd.MultiIndex.from_tuples([('C',
'c',
'ii')]))
expected = pd.concat([idx_col, df2], axis=1)
result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii')
tm.assert_frame_equal(result, expected)

# ... which is incompatible with col_fill=None
with tm.assert_raises_regex(ValueError,
("col_fill=None is incompatible with "
"incomplete column name \('C', 'c'\)")):
df2.rename_axis([('C', 'c')]).reset_index(col_fill=None)

def test_set_index_period(self):
# GH 6631
Expand Down

0 comments on commit a7a0574

Please sign in to comment.