Skip to content

Commit fef81bd

Browse files
committed
works but needs clean up
1 parent 8ffae3c commit fef81bd

File tree

1 file changed

+61
-10
lines changed

1 file changed

+61
-10
lines changed

src/pandas_openscm/index_manipulation.py

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -342,12 +342,15 @@ def create_new_level_and_codes_by_mapping(
342342
"""
343343
level_to_map_from_idx = ini.names.index(level_to_create_from)
344344
new_level = ini.levels[level_to_map_from_idx].map(mapper)
345+
# TODO copy paste this section without the mapping
346+
# fast path is an array from zero to length of index
345347
if not new_level.has_duplicates:
346348
# Fast route, can just return new level and codes from level we mapped from
347349
return new_level, ini.codes[level_to_map_from_idx]
348350

349351
# Slow route: have to update the codes
350352
dup_level = ini.get_level_values(level_to_create_from).map(mapper)
353+
# TODO these two steps for slow route
351354
new_level = new_level.unique()
352355
new_codes = new_level.get_indexer(dup_level) # type: ignore
353356

@@ -700,7 +703,7 @@ def update_levels_from_other(
700703
level_to_create_from=source,
701704
mapper=updater,
702705
)
703-
706+
# TODO copy paste code
704707
if level in ini.names:
705708
level_idx = ini.names.index(level)
706709
levels[level_idx] = new_level
@@ -716,6 +719,28 @@ def update_levels_from_other(
716719
return res
717720

718721

722+
def create_level_from_collection(
723+
level: str, value: Collection[Any]
724+
) -> tuple[pd.Index[Any], list[int]]:
725+
"""
726+
Create new level and corresponding codes.
727+
728+
From a level name and a
729+
collection of values.
730+
731+
TODO
732+
"""
733+
new_level = pd.Index(value, name=level)
734+
if not new_level.has_duplicates:
735+
# Fast route, can just return new level and codes from level we mapped from
736+
return value, list(range(len(value)))
737+
# Slow route, have to update the codes
738+
new_level = new_level.unique()
739+
new_codes = new_level.get_indexer(value) # type: ignore
740+
741+
return new_level, new_codes
742+
743+
719744
def set_levels(
720745
ini: pd.MultiIndex, levels_to_set: dict[str, Any | Collection[Any]]
721746
) -> pd.MultiIndex:
@@ -744,27 +769,49 @@ def set_levels(
744769
ValueError
745770
If the length of the values is not equal to the length of the index
746771
"""
747-
# TODO mypy says this is unreachable
748-
if not isinstance(ini, pd.MultiIndex):
749-
msg = f"Expected MultiIndex, got {type(ini)}"
750-
raise TypeError(msg)
772+
levels: list[pd.Index[Any]] = list(ini.levels)
773+
codes: list[list[int] | npt.NDArray[np.integer[Any]]] = list(ini.codes)
774+
names: list[str] = list(ini.names)
751775

752-
df = ini.to_frame(index=False)
776+
# TODO don't define a variable here, we need it only once
777+
new_names = levels_to_set.keys() # the names for the new levels
778+
new_values = levels_to_set.values() # the values for the new levels
753779

754780
for level, value in levels_to_set.items():
755-
# TODO do we need the isinstance check for strings here?
756781
if isinstance(value, Collection) and not isinstance(value, str):
757782
if len(value) != len(ini):
758783
msg = (
759784
f"Length of values for level '{level}' does not "
760785
f"match index length: {len(value)} != {len(ini)}"
761786
)
762787
raise ValueError(msg)
763-
df[level] = value
788+
789+
new_level, new_codes = create_level_from_collection(
790+
level=level,
791+
value=value,
792+
)
793+
794+
# Are we replacing?
795+
if level in ini.names:
796+
level_idx = ini.names.index(level)
797+
levels[level_idx] = new_level
798+
codes[level_idx] = new_codes
799+
else:
800+
levels.append(new_level)
801+
codes.append(new_codes)
802+
names.append(level)
803+
764804
else:
765-
df[level] = [value] * len(ini)
805+
codes = [
806+
*ini.codes, # type: ignore # not sure why check above isn't working
807+
*([[0] * ini.shape[0]] * len(new_values)), # type: ignore # fix when moving to pandas-openscm
808+
]
809+
levels = [*ini.levels, *[pd.Index([value]) for value in new_values]] # type: ignore # fix when moving to pandas-openscm
810+
names = [*ini.names, *new_names] # type: ignore # fix when moving to pandas-openscm
766811

767-
return pd.MultiIndex.from_frame(df)
812+
res = pd.MultiIndex(levels=levels, codes=codes, names=names)
813+
814+
return res
768815

769816

770817
def set_index_levels(
@@ -792,6 +839,10 @@ def set_index_levels(
792839
:
793840
`df` with updates applied to its index
794841
"""
842+
if not isinstance(df.index, pd.MultiIndex):
843+
msg = f"Expected MultiIndex, got {type(df.index)}"
844+
raise TypeError(msg)
845+
795846
if copy:
796847
df = df.copy()
797848

0 commit comments

Comments
 (0)