Skip to content

Commit 999035e

Browse files
committed
API: Added squeeze keyword to MultiIndex ctors
Adds a new ``squeeze`` keyword to control whether the various MultiIndex constructors should squeeze down to an ``Index`` when all the values are length-1 tuples. In [3]: MultiIndex.from_tuples([('a',), ('b',), ('c',)]) Out[3]: Index(['a', 'b', 'c'], dtype='object') In [4]: MultiIndex.from_tuples([('a',), ('b',), ('c',)], squeeze=False) Out[4]: MultiIndex(levels=[['a', 'b', 'c']], labels=[[0, 1, 2]]) This is helpful for routines that rely on the MultiIndex constructors always returning a MultiIndex, regardless of the data values (e.g. hash_tuples).
1 parent 929c66f commit 999035e

File tree

3 files changed

+76
-9
lines changed

3 files changed

+76
-9
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ Other Enhancements
7777
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
7878
- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
7979
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
80+
- The various :class:`MultiIndex` constructors all take a ``squeeze`` keyword to control whether to squeeze down to a regular ``Index`` when
81+
the values are all tuples of length one (the default is ``True``, as before) (:issue:`17178`)
8082
- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
8183
- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`)
8284
- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)

pandas/core/indexes/multi.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pandas.core.base as base
3030
from pandas.util._decorators import (Appender, cache_readonly,
3131
deprecate, deprecate_kwarg)
32+
from pandas.util._validators import validate_bool_kwarg
3233
import pandas.core.common as com
3334
import pandas.core.missing as missing
3435
import pandas.core.algorithms as algos
@@ -68,6 +69,11 @@ class MultiIndex(Index):
6869
Copy the meta-data
6970
verify_integrity : boolean, default True
7071
Check that the levels/labels are consistent and valid
72+
squeeze : bool, default True
73+
Whether to squeeze an iterable of length 1 tuples down
74+
to an Index, or return a MultiIndex with a single dimension
75+
76+
.. versionadded:: 0.21.0
7177
"""
7278

7379
# initialize to zero-length tuples to make everything work
@@ -79,9 +85,11 @@ class MultiIndex(Index):
7985
rename = Index.set_names
8086

8187
def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
82-
copy=False, verify_integrity=True, _set_identity=True,
88+
copy=False, verify_integrity=True, squeeze=True,
89+
_set_identity=True,
8390
name=None, **kwargs):
8491

92+
validate_bool_kwarg(squeeze, 'squeeze')
8593
# compat with Index
8694
if name is not None:
8795
names = name
@@ -91,7 +99,7 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
9199
raise ValueError('Length of levels and labels must be the same.')
92100
if len(levels) == 0:
93101
raise ValueError('Must pass non-zero number of levels/labels')
94-
if len(levels) == 1:
102+
if len(levels) == 1 and squeeze:
95103
if names:
96104
name = names[0]
97105
else:
@@ -1052,7 +1060,7 @@ def lexsort_depth(self):
10521060
return 0
10531061

10541062
@classmethod
1055-
def from_arrays(cls, arrays, sortorder=None, names=None):
1063+
def from_arrays(cls, arrays, sortorder=None, names=None, squeeze=True):
10561064
"""
10571065
Convert arrays to MultiIndex
10581066
@@ -1064,6 +1072,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
10641072
sortorder : int or None
10651073
Level of sortedness (must be lexicographically sorted by that
10661074
level)
1075+
squeeze : bool, default True
1076+
Whether to squeeze an iterable of length 1 tuples down
1077+
to an Index, or return a MultiIndex with a single dimension
1078+
1079+
.. versionadded:: 0.21.0
10671080
10681081
Returns
10691082
-------
@@ -1080,7 +1093,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
10801093
MultiIndex.from_product : Make a MultiIndex from cartesian product
10811094
of iterables
10821095
"""
1083-
if len(arrays) == 1:
1096+
if len(arrays) == 1 and squeeze:
10841097
name = None if names is None else names[0]
10851098
return Index(arrays[0], name=name)
10861099

@@ -1097,10 +1110,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
10971110
names = [getattr(arr, "name", None) for arr in arrays]
10981111

10991112
return MultiIndex(levels=levels, labels=labels, sortorder=sortorder,
1100-
names=names, verify_integrity=False)
1113+
names=names, verify_integrity=False, squeeze=squeeze)
11011114

11021115
@classmethod
1103-
def from_tuples(cls, tuples, sortorder=None, names=None):
1116+
def from_tuples(cls, tuples, sortorder=None, names=None, squeeze=True):
11041117
"""
11051118
Convert list of tuples to MultiIndex
11061119
@@ -1111,6 +1124,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
11111124
sortorder : int or None
11121125
Level of sortedness (must be lexicographically sorted by that
11131126
level)
1127+
squeeze : bool, default True
1128+
Whether to squeeze an iterable of length 1 tuples down
1129+
to an Index, or return a MultiIndex with a single dimension
1130+
1131+
.. versionadded:: 0.21.0
11141132
11151133
Returns
11161134
-------
@@ -1143,10 +1161,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
11431161
else:
11441162
arrays = lzip(*tuples)
11451163

1146-
return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
1164+
return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names,
1165+
squeeze=squeeze)
11471166

11481167
@classmethod
1149-
def from_product(cls, iterables, sortorder=None, names=None):
1168+
def from_product(cls, iterables, sortorder=None, names=None, squeeze=True):
11501169
"""
11511170
Make a MultiIndex from the cartesian product of multiple iterables
11521171
@@ -1160,6 +1179,12 @@ def from_product(cls, iterables, sortorder=None, names=None):
11601179
names : list / sequence of strings or None
11611180
Names for the levels in the index.
11621181
1182+
squeeze : bool, default True
1183+
Whether to squeeze an iterable of length 1 tuples down
1184+
to an Index, or return a MultiIndex with a single dimension
1185+
1186+
.. versionadded:: 0.21.0
1187+
11631188
Returns
11641189
-------
11651190
index : MultiIndex
@@ -1184,7 +1209,8 @@ def from_product(cls, iterables, sortorder=None, names=None):
11841209

11851210
labels, levels = _factorize_from_iterables(iterables)
11861211
labels = cartesian_product(labels)
1187-
return MultiIndex(levels, labels, sortorder=sortorder, names=names)
1212+
return MultiIndex(levels, labels, sortorder=sortorder, names=names,
1213+
squeeze=squeeze)
11881214

11891215
def _sort_levels_monotonic(self):
11901216
"""

pandas/tests/indexes/test_multi.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,45 @@ def test_constructor_single_level(self):
547547
labels=[[0, 1, 2, 3]])
548548
assert single_level.name is None
549549

550+
def test_constructor_single_level_no_squeeze(self):
551+
single_level = MultiIndex(levels=[[('foo',), ('bar',), ('baz',),
552+
('qux',)]],
553+
labels=[[0, 1, 2, 3]], names=['first'],
554+
squeeze=False)
555+
assert isinstance(single_level, MultiIndex)
556+
assert single_level.names == ['first']
557+
558+
def test_from_tuples_no_squeeze(self):
559+
idx = MultiIndex.from_tuples([('a',), ('b',)], names=['name'])
560+
expected = Index(['a', 'b'], name='name')
561+
tm.assert_index_equal(idx, expected)
562+
563+
idx = MultiIndex.from_tuples([('a',), ('b',)], names=['name'],
564+
squeeze=False)
565+
assert isinstance(idx, MultiIndex)
566+
tm.assert_index_equal(idx.levels[0], expected)
567+
568+
def test_from_product_no_squeeze(self):
569+
idx = MultiIndex.from_product([('a',)], names=['a'])
570+
expected = Index(['a'], name='a')
571+
tm.assert_index_equal(idx, expected)
572+
573+
idx = MultiIndex.from_product([('a',)], names=['a'], squeeze=False)
574+
assert idx.names == ['a']
575+
tm.assert_index_equal(idx.levels[0], expected)
576+
577+
def test_from_arrays_no_squeeze(self):
578+
idx = MultiIndex.from_arrays([('a',)], names=['name'])
579+
expected = Index(['a'], name='name')
580+
assert isinstance(idx, Index)
581+
assert not isinstance(idx, MultiIndex)
582+
583+
result = MultiIndex.from_arrays([('a',)], names=['name'],
584+
squeeze=False)
585+
assert isinstance(result, MultiIndex)
586+
assert result.names == ['name']
587+
tm.assert_index_equal(result.levels[0], expected)
588+
550589
def test_constructor_no_levels(self):
551590
tm.assert_raises_regex(ValueError, "non-zero number "
552591
"of levels/labels",

0 commit comments

Comments
 (0)