11# pylint: disable=E1101,E1103,W0232
2+ from collections import OrderedDict
23import datetime
34from sys import getsizeof
45import warnings
1819 is_integer , is_iterator , is_list_like , is_object_dtype , is_scalar ,
1920 pandas_dtype )
2021from pandas .core .dtypes .dtypes import ExtensionDtype , PandasExtensionDtype
22+ from pandas .core .dtypes .generic import ABCDataFrame
2123from pandas .core .dtypes .missing import array_equivalent , isna
2224
2325import pandas .core .algorithms as algos
@@ -125,25 +127,25 @@ class MultiIndex(Index):
125127 Parameters
126128 ----------
127129 levels : sequence of arrays
128- The unique labels for each level
130+ The unique labels for each level.
129131 codes : sequence of arrays
130- Integers for each level designating which label at each location
132+ Integers for each level designating which label at each location.
131133
132134 .. versionadded:: 0.24.0
133135 labels : sequence of arrays
134- Integers for each level designating which label at each location
136+ Integers for each level designating which label at each location.
135137
136138 .. deprecated:: 0.24.0
137139 Use ``codes`` instead
138140 sortorder : optional int
139141 Level of sortedness (must be lexicographically sorted by that
140- level)
142+ level).
141143 names : optional sequence of objects
142- Names for each of the index levels. (name is accepted for compat)
143- copy : boolean , default False
144- Copy the meta-data
145- verify_integrity : boolean , default True
146- Check that the levels/codes are consistent and valid
144+ Names for each of the index levels. (name is accepted for compat).
145+ copy : bool , default False
146+ Copy the meta-data.
147+ verify_integrity : bool , default True
148+ Check that the levels/codes are consistent and valid.
147149
148150 Attributes
149151 ----------
@@ -158,6 +160,7 @@ class MultiIndex(Index):
158160 from_arrays
159161 from_tuples
160162 from_product
163+ from_frame
161164 set_levels
162165 set_codes
163166 to_frame
@@ -175,13 +178,9 @@ class MultiIndex(Index):
175178 MultiIndex.from_product : Create a MultiIndex from the cartesian product
176179 of iterables.
177180 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
181+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
178182 Index : The base pandas Index type.
179183
180- Notes
181- -----
182- See the `user guide
183- <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
184-
185184 Examples
186185 ---------
187186 A new ``MultiIndex`` is typically constructed using one of the helper
@@ -196,6 +195,11 @@ class MultiIndex(Index):
196195
197196 See further examples for how to construct a MultiIndex in the doc strings
198197 of the mentioned helper methods.
198+
199+ Notes
200+ -----
201+ See the `user guide
202+ <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
199203 """
200204
201205 # initialize to zero-length tuples to make everything work
@@ -288,7 +292,7 @@ def _verify_integrity(self, codes=None, levels=None):
288292 @classmethod
289293 def from_arrays (cls , arrays , sortorder = None , names = None ):
290294 """
291- Convert arrays to MultiIndex
295+ Convert arrays to MultiIndex.
292296
293297 Parameters
294298 ----------
@@ -297,7 +301,9 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
297301 len(arrays) is the number of levels.
298302 sortorder : int or None
299303 Level of sortedness (must be lexicographically sorted by that
300- level)
304+ level).
305+ names : list / sequence of str, optional
306+ Names for the levels in the index.
301307
302308 Returns
303309 -------
@@ -308,11 +314,15 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
308314 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
309315 MultiIndex.from_product : Make a MultiIndex from cartesian product
310316 of iterables.
317+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
311318
312319 Examples
313320 --------
314321 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
315322 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
323+ MultiIndex(levels=[[1, 2], ['blue', 'red']],
324+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
325+ names=['number', 'color'])
316326 """
317327 if not is_list_like (arrays ):
318328 raise TypeError ("Input must be a list / sequence of array-likes." )
@@ -337,31 +347,37 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
337347 @classmethod
338348 def from_tuples (cls , tuples , sortorder = None , names = None ):
339349 """
340- Convert list of tuples to MultiIndex
350+ Convert list of tuples to MultiIndex.
341351
342352 Parameters
343353 ----------
344354 tuples : list / sequence of tuple-likes
345355 Each tuple is the index of one row/column.
346356 sortorder : int or None
347357 Level of sortedness (must be lexicographically sorted by that
348- level)
358+ level).
359+ names : list / sequence of str, optional
360+ Names for the levels in the index.
349361
350362 Returns
351363 -------
352364 index : MultiIndex
353365
354366 See Also
355367 --------
356- MultiIndex.from_arrays : Convert list of arrays to MultiIndex
368+ MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
357369 MultiIndex.from_product : Make a MultiIndex from cartesian product
358- of iterables
370+ of iterables.
371+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
359372
360373 Examples
361374 --------
362375 >>> tuples = [(1, u'red'), (1, u'blue'),
363- (2, u'red'), (2, u'blue')]
376+ ... (2, u'red'), (2, u'blue')]
364377 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
378+ MultiIndex(levels=[[1, 2], ['blue', 'red']],
379+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
380+ names=['number', 'color'])
365381 """
366382 if not is_list_like (tuples ):
367383 raise TypeError ('Input must be a list / sequence of tuple-likes.' )
@@ -388,7 +404,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
388404 @classmethod
389405 def from_product (cls , iterables , sortorder = None , names = None ):
390406 """
391- Make a MultiIndex from the cartesian product of multiple iterables
407+ Make a MultiIndex from the cartesian product of multiple iterables.
392408
393409 Parameters
394410 ----------
@@ -397,7 +413,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
397413 sortorder : int or None
398414 Level of sortedness (must be lexicographically sorted by that
399415 level).
400- names : list / sequence of strings or None
416+ names : list / sequence of str, optional
401417 Names for the levels in the index.
402418
403419 Returns
@@ -408,16 +424,17 @@ def from_product(cls, iterables, sortorder=None, names=None):
408424 --------
409425 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
410426 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
427+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
411428
412429 Examples
413430 --------
414431 >>> numbers = [0, 1, 2]
415- >>> colors = [u 'green', u 'purple']
432+ >>> colors = ['green', 'purple']
416433 >>> pd.MultiIndex.from_product([numbers, colors],
417- names=['number', 'color'])
418- MultiIndex(levels=[[0, 1, 2], [u 'green', u 'purple']],
434+ ... names=['number', 'color'])
435+ MultiIndex(levels=[[0, 1, 2], ['green', 'purple']],
419436 labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
420- names=[u 'number', u 'color'])
437+ names=['number', 'color'])
421438 """
422439 from pandas .core .arrays .categorical import _factorize_from_iterables
423440 from pandas .core .reshape .util import cartesian_product
@@ -431,6 +448,68 @@ def from_product(cls, iterables, sortorder=None, names=None):
431448 codes = cartesian_product (codes )
432449 return MultiIndex (levels , codes , sortorder = sortorder , names = names )
433450
451+ @classmethod
452+ def from_frame (cls , df , sortorder = None , names = None ):
453+ """
454+ Make a MultiIndex from a DataFrame.
455+
456+ .. versionadded:: 0.24.0
457+
458+ Parameters
459+ ----------
460+ df : DataFrame
461+ DataFrame to be converted to MultiIndex.
462+ sortorder : int, optional
463+ Level of sortedness (must be lexicographically sorted by that
464+ level).
465+ names : list-like, optional
466+ If no names are provided, use the column names, or tuple of column
467+ names if the columns is a MultiIndex. If a sequence, overwrite
468+ names with the given sequence.
469+
470+ Returns
471+ -------
472+ MultiIndex
473+ The MultiIndex representation of the given DataFrame.
474+
475+ See Also
476+ --------
477+ MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
478+ MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
479+ MultiIndex.from_product : Make a MultiIndex from cartesian product
480+ of iterables.
481+
482+ Examples
483+ --------
484+ >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
485+ ... ['NJ', 'Temp'], ['NJ', 'Precip']],
486+ ... columns=['a', 'b'])
487+ >>> df
488+ a b
489+ 0 HI Temp
490+ 1 HI Precip
491+ 2 NJ Temp
492+ 3 NJ Precip
493+
494+ >>> pd.MultiIndex.from_frame(df)
495+ MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
496+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
497+ names=['a', 'b'])
498+
499+ Using explicit names, instead of the column names
500+
501+ >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
502+ MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
503+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
504+ names=['state', 'observation'])
505+ """
506+ if not isinstance (df , ABCDataFrame ):
507+ raise TypeError ("Input must be a DataFrame" )
508+
509+ column_names , columns = lzip (* df .iteritems ())
510+ names = column_names if names is None else names
511+ return cls .from_arrays (columns , sortorder = sortorder , names = names )
512+
434513 # --------------------------------------------------------------------
435514
436515 @property
@@ -1386,11 +1465,16 @@ def to_frame(self, index=True, name=None):
13861465 else :
13871466 idx_names = self .names
13881467
1389- result = DataFrame ({(name or level ):
1390- self ._get_level_values (level )
1391- for name , level in
1392- zip (idx_names , range (len (self .levels )))},
1393- copy = False )
1468+ # Guarantee resulting column order
1469+ result = DataFrame (
1470+ OrderedDict ([
1471+ ((level if name is None else name ),
1472+ self ._get_level_values (level ))
1473+ for name , level in zip (idx_names , range (len (self .levels )))
1474+ ]),
1475+ copy = False
1476+ )
1477+
13941478 if index :
13951479 result .index = self
13961480 return result
0 commit comments