Skip to content

Commit a8384fb

Browse files
committed
Finish off testing
1 parent e419123 commit a8384fb

File tree

2 files changed

+219
-14
lines changed

2 files changed

+219
-14
lines changed

src/pandas_openscm/index_manipulation.py

Lines changed: 126 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,53 @@ def create_new_level_and_codes_by_mapping(
405405
return new_level, new_codes
406406

407407

408+
def create_new_level_and_codes_by_mapping_multiple(
409+
ini: pd.MultiIndex,
410+
levels_to_create_from: tuple[str, ...],
411+
mapper: Callable[[Any], Any] | dict[Any, Any] | pd.Series[Any],
412+
) -> tuple[pd.Index[Any], npt.NDArray[np.integer[Any]]]:
413+
"""
414+
Create a new level and associated codes by mapping existing levels
415+
416+
This is a thin function intended for internal use
417+
to handle some slightly tricky logic.
418+
419+
Parameters
420+
----------
421+
ini
422+
Input index
423+
424+
levels_to_create_from
425+
Levels to create the new level from
426+
427+
mapper
428+
Function to use to map existing levels to new levels
429+
430+
Returns
431+
-------
432+
new_level :
433+
New level
434+
435+
new_codes :
436+
New codes
437+
"""
438+
# You could probably do some optimisation here
439+
# that checks for unique combinations of codes
440+
# for the levels we're using,
441+
# then only applies the mapping to those unique combos
442+
# to reduce the number of evaluations of mapper.
443+
# That feels tricky to get right, so just doing the brute force way for now.
444+
dup_level = ini.droplevel(
445+
ini.names.difference(list(levels_to_create_from)) # type: ignore # pandas-stubs confused
446+
).map(mapper)
447+
448+
# Brute force: get codes from new levels
449+
new_level = dup_level.unique()
450+
new_codes = new_level.get_indexer(dup_level)
451+
452+
return new_level, new_codes
453+
454+
408455
def update_index_levels_func(
409456
df: pd.DataFrame,
410457
updates: Mapping[Any, Callable[[Any], Any] | dict[Any, Any] | pd.Series[Any]],
@@ -629,7 +676,11 @@ def update_index_levels_from_other_func(
629676
def update_levels_from_other(
630677
ini: pd.MultiIndex,
631678
update_sources: dict[
632-
Any, tuple[Any, Callable[[Any], Any] | dict[Any, Any] | pd.Series[Any]]
679+
Any,
680+
tuple[
681+
Any | tuple[Any, ...],
682+
Callable[[Any], Any] | dict[Any, Any] | pd.Series[Any],
683+
],
633684
],
634685
remove_unused_levels: bool = True,
635686
) -> pd.MultiIndex:
@@ -650,13 +701,24 @@ def update_levels_from_other(
650701
Each key is the level to which the updates will be applied
651702
(or the level that will be created if it doesn't already exist).
652703
653-
Each value is a tuple of which the first element
704+
There are two options for the values.
705+
706+
The first is used when only one level is used to update the 'target level'.
707+
In this case, each value is a tuple of which the first element
654708
is the level to use to generate the values (the 'source level')
655709
and the second is mapper of the form used by
656710
[pd.Index.map][pandas.Index.map]
657711
which will be applied to the source level
658712
to update/create the level of interest.
659713
714+
Each value is a tuple of which the first element
715+
is the level or levels (if a tuple)
716+
to use to generate the values (the 'source level')
717+
and the second is mapper of the form used by
718+
[pd.Index.map][pandas.Index.map]
719+
which will be applied to the source level
720+
to update/create the level of interest.
721+
660722
remove_unused_levels
661723
Call `ini.remove_unused_levels` before updating the levels
662724
@@ -718,6 +780,19 @@ def update_levels_from_other(
718780
('sa', 'model sa', 'v2', 'km')],
719781
names=['scenario', 'model', 'variable', 'unit'])
720782
>>>
783+
>>> # Create a new level based on multiple existing levels
784+
>>> update_levels_from_other(
785+
... start,
786+
... {
787+
... "model || scenario": (("model", "scenario"), lambda x: " || ".join(x)),
788+
... },
789+
... )
790+
MultiIndex([('sa', 'ma', 'v1', 'kg', 'sa || ma'),
791+
('sb', 'ma', 'v2', 'm', 'sb || ma'),
792+
('sa', 'mb', 'v1', 'kg', 'sa || mb'),
793+
('sa', 'mb', 'v2', 'm', 'sa || mb')],
794+
names=['scenario', 'model', 'variable', 'unit', 'model || scenario'])
795+
>>>
721796
>>> # Both at the same time
722797
>>> update_levels_from_other(
723798
... start,
@@ -731,7 +806,28 @@ def update_levels_from_other(
731806
('sa', 'mb', 'v1', nan, 'Sa'),
732807
('sa', 'mb', 'v2', nan, 'Sa')],
733808
names=['scenario', 'model', 'variable', 'unit', 'title'])
734-
"""
809+
>>>
810+
>>> # Setting with a range of different methods
811+
>>> update_levels_from_other(
812+
... start,
813+
... {
814+
... # callable
815+
... "y-label": (("variable", "unit"), lambda x: f"{x[0]} ({x[1]})"),
816+
... # dict
817+
... "title": ("scenario", {"sa": "Scenario A", "sb": "Delta"}),
818+
... # pd.Series
819+
... "Source": (
820+
... "model",
821+
... pd.Series(["Internal", "External"], index=["ma", "mb"]),
822+
... ),
823+
... },
824+
... )
825+
MultiIndex([('sa', 'ma', 'v1', 'kg', 'v1 (kg)', 'Scenario A', 'Internal'),
826+
('sb', 'ma', 'v2', 'm', 'v2 (m)', 'Delta', 'Internal'),
827+
('sa', 'mb', 'v1', 'kg', 'v1 (kg)', 'Scenario A', 'External'),
828+
('sa', 'mb', 'v2', 'm', 'v2 (m)', 'Scenario A', 'External')],
829+
names=['scenario', 'model', 'variable', 'unit', 'y-label', 'title', 'Source'])
830+
""" # noqa: E501
735831
if remove_unused_levels:
736832
ini = ini.remove_unused_levels() # type: ignore
737833

@@ -740,17 +836,35 @@ def update_levels_from_other(
740836
names: list[str] = list(ini.names)
741837

742838
for level, (source, updater) in update_sources.items():
743-
if source not in ini.names:
744-
msg = (
745-
f"{source} is not available in the index. Available levels: {ini.names}"
839+
if isinstance(source, tuple):
840+
missing_levels = set(source) - set(ini.names)
841+
if missing_levels:
842+
conj = "is" if len(missing_levels) == 1 else "are"
843+
msg = (
844+
f"{missing_levels} {conj} not available in the index. "
845+
f"Available levels: {ini.names}"
846+
)
847+
raise KeyError(msg)
848+
849+
new_level, new_codes = create_new_level_and_codes_by_mapping_multiple(
850+
ini=ini,
851+
levels_to_create_from=source,
852+
mapper=updater,
746853
)
747-
raise KeyError(msg)
748854

749-
new_level, new_codes = create_new_level_and_codes_by_mapping(
750-
ini=ini,
751-
level_to_create_from=source,
752-
mapper=updater,
753-
)
855+
else:
856+
if source not in ini.names:
857+
msg = (
858+
f"{source} is not available in the index. "
859+
f"Available levels: {ini.names}"
860+
)
861+
raise KeyError(msg)
862+
863+
new_level, new_codes = create_new_level_and_codes_by_mapping(
864+
ini=ini,
865+
level_to_create_from=source,
866+
mapper=updater,
867+
)
754868

755869
if level in ini.names:
756870
level_idx = ini.names.index(level)

tests/integration/index_manipulation/test_integration_index_manipulation_update_levels_from_other.py

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,62 @@
143143
},
144144
id="multiple-updates-incl-external-func",
145145
),
146+
pytest.param(
147+
pd.MultiIndex.from_tuples(
148+
[
149+
("sa", "va", "kg", 0),
150+
("sb", "vb", "m", -1),
151+
("sa", "va", "kg", -2),
152+
("sa", "vb", "kg", 2),
153+
],
154+
names=["scenario", "variable", "unit", "run_id"],
155+
),
156+
{
157+
"vv": (("scenario", "variable"), lambda x: " - ".join(x)),
158+
"sv": (
159+
("scenario", "variable"),
160+
{
161+
("sa", "va"): "hi",
162+
("sb", "vb"): "bye",
163+
("sa", "vb"): "psi",
164+
},
165+
),
166+
"su": (
167+
("scenario", "unit"),
168+
pd.Series(
169+
["alpha", "beta"],
170+
index=pd.MultiIndex.from_tuples(
171+
[
172+
("sa", "kg"),
173+
("sb", "m"),
174+
],
175+
names=["scenario", "unit"],
176+
),
177+
),
178+
),
179+
"unit": ("unit", lambda x: x.replace("kg", "g").replace("m", "km")),
180+
"u_run_id_abs": (
181+
("unit", "run_id"),
182+
lambda x: f"{x[0]}_{np.abs(x[1])}",
183+
),
184+
},
185+
id="multiple-updates-multiple-sources-incl-dict-series-external-func",
186+
),
146187
),
147188
)
148189
def test_update_levels_from_other(start, update_sources):
149190
res = update_levels_from_other(start, update_sources=update_sources)
150191

151-
exp = start.to_frame(index=False)
192+
# Need this so we order of updates doesn't matter
193+
helper = start.to_frame(index=False)
194+
exp = helper.copy()
152195
for level, (source, mapper) in update_sources.items():
153-
exp[level] = exp[source].map(mapper)
196+
if isinstance(source, tuple):
197+
exp[level] = pd.MultiIndex.from_frame(helper[list(source)]).map(mapper)
198+
199+
else:
200+
exp[level] = helper[source].map(mapper)
201+
154202
exp = pd.MultiIndex.from_frame(exp)
155203

156204
pd.testing.assert_index_equal(res, exp)
@@ -181,6 +229,49 @@ def test_update_levels_from_other_missing_level():
181229
update_levels_from_other(start, update_sources=update_sources)
182230

183231

232+
@pytest.mark.parametrize(
233+
"sources, exp",
234+
(
235+
(
236+
("units", "variable"),
237+
pytest.raises(
238+
KeyError,
239+
match=re.escape(
240+
f"{set(['units'])} is not available in the index. "
241+
f"Available levels: {['scenario', 'variable', 'unit', 'run_id']}"
242+
),
243+
),
244+
),
245+
(
246+
("units", "variables"),
247+
pytest.raises(
248+
KeyError,
249+
match=re.escape(
250+
f"{set(['units', 'variables'])} are not available in the index. "
251+
f"Available levels: {['scenario', 'variable', 'unit', 'run_id']}"
252+
),
253+
),
254+
),
255+
),
256+
)
257+
def test_update_levels_from_other_missing_levels(sources, exp):
258+
start = pd.MultiIndex.from_tuples(
259+
[
260+
("sa", "va", "kg", 0),
261+
("sb", "vb", "m", -1),
262+
("sa", "va", "kg", -2),
263+
("sa", "vb", "kg", 2),
264+
],
265+
names=["scenario", "variable", "unit", "run_id"],
266+
)
267+
update_sources = {
268+
"uu": (sources, lambda x: x),
269+
}
270+
271+
with exp:
272+
update_levels_from_other(start, update_sources=update_sources)
273+
274+
184275
def test_doesnt_trip_over_droped_levels(setup_pandas_accessors):
185276
def update_func(in_v: int) -> int:
186277
if in_v < 0:

0 commit comments

Comments
 (0)