2323import numpy .ma as ma
2424
2525from pandas .core .common import (isnull , notnull , PandasError , _try_sort ,
26- _default_index , _stringify , _maybe_upcast )
26+ _default_index , _stringify )
2727from pandas .core .daterange import DateRange
2828from pandas .core .generic import NDFrame
2929from pandas .core .index import Index , MultiIndex , NULL_INDEX , _ensure_index
@@ -1638,7 +1638,8 @@ def reindex_like(self, other, method=None, copy=True):
16381638
16391639 truncate = generic .truncate
16401640
1641- def set_index (self , col_or_cols , drop = True , inplace = False ):
1641+ def set_index (self , col_or_cols , drop = True , inplace = False ,
1642+ verify_integrity = True ):
16421643 """
16431644 Set the DataFrame index (row labels) using one or more existing
16441645 columns. By default yields a new object.
@@ -1650,6 +1651,10 @@ def set_index(self, col_or_cols, drop=True, inplace=False):
16501651 Delete columns to be used as the new index
16511652 inplace : boolean, default False
16521653 Modify the DataFrame in place (do not create a new object)
1654+ verify_integrity : boolean, default True
1655+ Check the new index for duplicates. Otherwise defer the check until
1656+ necessary. Setting to False will improve the performance of this
1657+ method
16531658
16541659 Returns
16551660 -------
@@ -1674,8 +1679,8 @@ def set_index(self, col_or_cols, drop=True, inplace=False):
16741679
16751680 index = MultiIndex .from_arrays (arrays , names = cols )
16761681
1677- if not index ._verify_integrity ():
1678- duplicates = index ._get_duplicates ()
1682+ if verify_integrity and not index ._verify_integrity ():
1683+ duplicates = index .get_duplicates ()
16791684 raise Exception ('Index has duplicate keys: %s' % duplicates )
16801685
16811686 # clear up memory usage
@@ -2738,60 +2743,13 @@ def append(self, other, ignore_index=False):
27382743 if not self :
27392744 return other .copy ()
27402745
2741- if ignore_index :
2742- new_index = None
2746+ from pandas .tools .merge import concat
2747+ if isinstance (other , list ):
2748+ to_concat = [self ] + other
27432749 else :
2744- new_index = self .index .append (other .index )
2745- assert (new_index ._verify_integrity ())
2746-
2747- if self .columns .equals (other .columns ):
2748- return self ._append_same_columns (other , new_index )
2749- else :
2750- return self ._append_different_columns (other , new_index )
2751-
2752- def _append_different_columns (self , other , new_index ):
2753- indexer = self .columns .get_indexer (other .columns )
2754-
2755- if not (indexer == - 1 ).any ():
2756- new_columns = self .columns
2757- else :
2758- new_columns = self .columns .union (other .columns )
2759-
2760- new_data = self ._append_column_by_column (other )
2761- return self ._constructor (data = new_data , index = new_index ,
2762- columns = new_columns )
2763-
2764- def _append_same_columns (self , other , new_index ):
2765- if self ._is_mixed_type :
2766- new_data = self ._append_column_by_column (other )
2767- else :
2768- new_data = np .concatenate ((self .values , other .values ), axis = 0 )
2769- return self ._constructor (new_data , index = new_index ,
2770- columns = self .columns )
2771-
2772- def _append_column_by_column (self , other ):
2773- def _concat_missing (values , n ):
2774- values = _maybe_upcast (values )
2775- missing_values = np .empty (n , dtype = values .dtype )
2776- missing_values .fill (np .nan )
2777- return values , missing_values
2778-
2779- new_data = {}
2780- for col in self :
2781- values = self ._get_raw_column (col )
2782- if col in other :
2783- other_values = other ._get_raw_column (col )
2784- else :
2785- values , other_values = _concat_missing (values , len (other ))
2786- new_data [col ] = np .concatenate ((values , other_values ))
2787-
2788- for col in other :
2789- values = other ._get_raw_column (col )
2790- if col not in self :
2791- values , missing_values = _concat_missing (values , len (self ))
2792- new_data [col ] = np .concatenate ((missing_values , values ))
2793-
2794- return new_data
2750+ to_concat = [self , other ]
2751+ return concat (to_concat , ignore_index = ignore_index ,
2752+ verify_integrity = True )
27952753
27962754 def _get_raw_column (self , col ):
27972755 return self ._data .get (col )
@@ -3618,6 +3576,8 @@ def factor_agg(factor, vec, func):
36183576
36193577
36203578def extract_index (data ):
3579+ from pandas .core .index import _union_indexes
3580+
36213581 index = None
36223582 if len (data ) == 0 :
36233583 index = NULL_INDEX
@@ -3663,51 +3623,6 @@ def extract_index(data):
36633623 return _ensure_index (index )
36643624
36653625
3666- def _union_indexes (indexes ):
3667- if len (indexes ) == 0 :
3668- return Index ([])
3669-
3670- if len (indexes ) == 1 :
3671- result = indexes [0 ]
3672- if isinstance (result , list ):
3673- result = Index (sorted (result ))
3674- return result
3675-
3676- indexes , kind = _sanitize_and_check (indexes )
3677-
3678- if kind == 'special' :
3679- result = indexes [0 ]
3680- for other in indexes [1 :]:
3681- result = result .union (other )
3682- return result
3683- elif kind == 'array' :
3684- index = indexes [0 ]
3685- for other in indexes [1 :]:
3686- if not index .equals (other ):
3687- return Index (lib .fast_unique_multiple (indexes ))
3688-
3689- return index
3690- else :
3691- return Index (lib .fast_unique_multiple_list (indexes ))
3692-
3693-
3694- def _sanitize_and_check (indexes ):
3695- kinds = list (set ([type (index ) for index in indexes ]))
3696-
3697- if list in kinds :
3698- if len (kinds ) > 1 :
3699- indexes = [Index (_try_sort (x )) if not isinstance (x , Index ) else x
3700- for x in indexes ]
3701- kinds .remove (list )
3702- else :
3703- return indexes , 'list'
3704-
3705-
3706- if len (kinds ) > 1 or Index not in kinds :
3707- return indexes , 'special'
3708- else :
3709- return indexes , 'array'
3710-
37113626
37123627def _check_data_types (data ):
37133628 have_raw_arrays = False
0 commit comments