@@ -342,12 +342,15 @@ def create_new_level_and_codes_by_mapping(
342342 """
343343 level_to_map_from_idx = ini .names .index (level_to_create_from )
344344 new_level = ini .levels [level_to_map_from_idx ].map (mapper )
345+ # TODO copy paste this section without the mapping
346+ # fast path is an array from zero to length of index
345347 if not new_level .has_duplicates :
346348 # Fast route, can just return new level and codes from level we mapped from
347349 return new_level , ini .codes [level_to_map_from_idx ]
348350
349351 # Slow route: have to update the codes
350352 dup_level = ini .get_level_values (level_to_create_from ).map (mapper )
353+ # TODO these two steps for slow route
351354 new_level = new_level .unique ()
352355 new_codes = new_level .get_indexer (dup_level ) # type: ignore
353356
@@ -700,7 +703,7 @@ def update_levels_from_other(
700703 level_to_create_from = source ,
701704 mapper = updater ,
702705 )
703-
706+ # TODO copy paste code
704707 if level in ini .names :
705708 level_idx = ini .names .index (level )
706709 levels [level_idx ] = new_level
@@ -716,6 +719,28 @@ def update_levels_from_other(
716719 return res
717720
718721
722+ def create_level_from_collection (
723+ level : str , value : Collection [Any ]
724+ ) -> tuple [pd .Index [Any ], list [int ]]:
725+ """
726+ Create new level and corresponding codes.
727+
728+ From a level name and a
729+ collection of values.
730+
731+ TODO
732+ """
733+ new_level = pd .Index (value , name = level )
734+ if not new_level .has_duplicates :
735+ # Fast route, can just return new level and codes from level we mapped from
736+ return value , list (range (len (value )))
737+ # Slow route, have to update the codes
738+ new_level = new_level .unique ()
739+ new_codes = new_level .get_indexer (value ) # type: ignore
740+
741+ return new_level , new_codes
742+
743+
719744def set_levels (
720745 ini : pd .MultiIndex , levels_to_set : dict [str , Any | Collection [Any ]]
721746) -> pd .MultiIndex :
@@ -744,27 +769,49 @@ def set_levels(
744769 ValueError
745770 If the length of the values is not equal to the length of the index
746771 """
747- # TODO mypy says this is unreachable
748- if not isinstance (ini , pd .MultiIndex ):
749- msg = f"Expected MultiIndex, got { type (ini )} "
750- raise TypeError (msg )
772+ levels : list [pd .Index [Any ]] = list (ini .levels )
773+ codes : list [list [int ] | npt .NDArray [np .integer [Any ]]] = list (ini .codes )
774+ names : list [str ] = list (ini .names )
751775
752- df = ini .to_frame (index = False )
776+ # TODO don't define a variable here, we need it only once
777+ new_names = levels_to_set .keys () # the names for the new levels
778+ new_values = levels_to_set .values () # the values for the new levels
753779
754780 for level , value in levels_to_set .items ():
755- # TODO do we need the isinstance check for strings here?
756781 if isinstance (value , Collection ) and not isinstance (value , str ):
757782 if len (value ) != len (ini ):
758783 msg = (
759784 f"Length of values for level '{ level } ' does not "
760785 f"match index length: { len (value )} != { len (ini )} "
761786 )
762787 raise ValueError (msg )
763- df [level ] = value
788+
789+ new_level , new_codes = create_level_from_collection (
790+ level = level ,
791+ value = value ,
792+ )
793+
794+ # Are we replacing?
795+ if level in ini .names :
796+ level_idx = ini .names .index (level )
797+ levels [level_idx ] = new_level
798+ codes [level_idx ] = new_codes
799+ else :
800+ levels .append (new_level )
801+ codes .append (new_codes )
802+ names .append (level )
803+
764804 else :
765- df [level ] = [value ] * len (ini )
805+ codes = [
806+ * ini .codes , # type: ignore # not sure why check above isn't working
807+ * ([[0 ] * ini .shape [0 ]] * len (new_values )), # type: ignore # fix when moving to pandas-openscm
808+ ]
809+ levels = [* ini .levels , * [pd .Index ([value ]) for value in new_values ]] # type: ignore # fix when moving to pandas-openscm
810+ names = [* ini .names , * new_names ] # type: ignore # fix when moving to pandas-openscm
766811
767- return pd .MultiIndex .from_frame (df )
812+ res = pd .MultiIndex (levels = levels , codes = codes , names = names )
813+
814+ return res
768815
769816
770817def set_index_levels (
@@ -792,6 +839,10 @@ def set_index_levels(
792839 :
793840 `df` with updates applied to its index
794841 """
842+ if not isinstance (df .index , pd .MultiIndex ):
843+ msg = f"Expected MultiIndex, got { type (df .index )} "
844+ raise TypeError (msg )
845+
795846 if copy :
796847 df = df .copy ()
797848
0 commit comments