Skip to content

Commit a8c0bde

Browse files
committed
Finish convert-unit_like tests and no-op tests
1 parent 084039e commit a8c0bde

File tree

2 files changed

+423
-72
lines changed

2 files changed

+423
-72
lines changed

src/pandas_openscm/unit_conversion.py

Lines changed: 146 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def convert_unit_from_target_series(
4242
with an index that contains all the rows in `df`.
4343
4444
unit_level
45-
Level in `df` which holds unit information
45+
Level in `df`'s index which holds unit information
4646
4747
ur
4848
Unit registry to use for the conversion.
@@ -192,7 +192,7 @@ def convert_unit(
192192
For further details, see examples
193193
194194
unit_level
195-
Level in `df` which holds unit information
195+
Level in `df`'s index which holds unit information
196196
197197
Passed to [convert_unit_from_target_series][].
198198
@@ -306,36 +306,167 @@ def convert_unit(
306306
return res
307307

308308

309+
class AmbiguousTargetUnitError(ValueError):
310+
"""
311+
Raised when `target` provided to `convert_unit_like` gives ambiguous desired units
312+
"""
313+
314+
def __init__(self, msg: str) -> None:
315+
"""
316+
Initialise the error
317+
318+
Parameters
319+
----------
320+
msg
321+
Message to provide to the user
322+
"""
323+
super().__init__(msg)
324+
325+
309326
def convert_unit_like(
310327
df: pd.DataFrame,
311328
target: pd.DataFrame,
312329
df_unit_level: str = "unit",
313330
target_unit_level: str | None = None,
314331
ur: pint.UnitRegistry | None = None,
315332
) -> pd.DataFrame:
333+
"""
334+
Convert units to match another [pd.DataFrame][pandas.DataFrame]
335+
336+
This is essentially a helper function for [convert_unit_from_target_series][].
337+
It implements one set of logic for extracting desired units and tries to be clever,
338+
handling differences in index levels
339+
between `df` and `target` sensibly wherever possible.
340+
341+
If you want behaviour other than what is implemented here,
342+
use [convert_unit_from_target_series][] directly.
343+
344+
Parameters
345+
----------
346+
df
347+
[pd.DataFrame][pandas.DataFrame] whose units should be converted
348+
349+
target
350+
[pd.DataFrame][pandas.DataFrame] whose units should be matched
351+
352+
df_unit_level
353+
Level in `df`'s index which holds unit information
354+
355+
target_unit_level
356+
Level in `target`'s index which holds unit information
357+
358+
If not supplied, we use `df_unit_level`.
359+
360+
ur
361+
Unit registry to use for the conversion.
362+
363+
Passed to [convert_unit_from_target_series][].
364+
365+
Returns
366+
-------
367+
:
368+
`df` with converted units
369+
370+
Examples
371+
--------
372+
>>> import pandas as pd
373+
>>>
374+
>>> start = pd.DataFrame(
375+
... [
376+
... [1010.0, 2010.0, 1150.0],
377+
... [100.1, 100.3, 99.8],
378+
... [0.0011, 0.0012, 0.0013],
379+
... [310_000, 311_000, 310_298],
380+
... ],
381+
... columns=[2020, 2030, 2050],
382+
... index=pd.MultiIndex.from_tuples(
383+
... (
384+
... ("sa", "temperature", "mK"),
385+
... ("sa", "body temperature", "degF"),
386+
... ("sb", "temperature", "kK"),
387+
... ("sb", "body temperature", "mK"),
388+
... ),
389+
... names=["scenario", "variable", "unit"],
390+
... ),
391+
... )
392+
>>>
393+
>>> target = pd.DataFrame(
394+
... [[1.0, 2.0], [1.1, 1.2]],
395+
... columns=[1990.0, 2010.0],
396+
... index=pd.MultiIndex.from_tuples(
397+
... (
398+
... ("temperature", "K"),
399+
... ("body temperature", "degC"),
400+
... ),
401+
... names=["variable", "unit"],
402+
... ),
403+
... )
404+
>>>
405+
>>> convert_unit_like(start, target)
406+
2020 2030 2050
407+
scenario variable unit
408+
sa temperature K 1.010000 2.010000 1.150000
409+
body temperature degC 37.833333 37.944444 37.666667
410+
sb temperature K 1.100000 1.200000 1.300000
411+
body temperature degC 36.850000 37.850000 37.148000
412+
"""
316413
if target_unit_level is None:
317414
target_unit_level_use = df_unit_level
318415
else:
319416
target_unit_level_use = target_unit_level
320417

321-
extra_index_levels_target = target.index.names.difference(df.index.names) # type: ignore # pandas-stubs confused
322-
if extra_index_levels_target:
323-
# if extra index levels in target, drop out the extra
324-
# and see if the target is clear, if not, raise)
325-
raise NotImplementedError
326-
327418
df_units_s = df.index.get_level_values(df_unit_level).to_series(
328419
index=df.index.droplevel(df_unit_level)
329420
)
330-
target_units_s = target.index.get_level_values(target_unit_level_use).to_series(
331-
index=target.index.droplevel(target_unit_level_use)
332-
)
421+
422+
extra_index_levels_target = target.index.names.difference(
423+
[*df.index.names, target_unit_level_use]
424+
) # type: ignore # pandas-stubs confused
425+
if extra_index_levels_target:
426+
# Drop out the extra levels and see if the intended unit is unambiguous
427+
tmp = target.index.droplevel(extra_index_levels_target).drop_duplicates()
428+
target_units_s = tmp.get_level_values(target_unit_level_use).to_series(
429+
index=tmp.droplevel(target_unit_level_use)
430+
)
431+
ambiguous = target_units_s.index.duplicated(keep=False)
432+
if ambiguous.any():
433+
ambiguous_idx = target_units_s[ambiguous].index
434+
if not isinstance(ambiguous_idx, pd.MultiIndex):
435+
ambiguous_idx = pd.MultiIndex.from_arrays(
436+
[ambiguous_idx.values], names=[ambiguous_idx.name]
437+
)
438+
439+
ambiguous_idx = ambiguous_idx.remove_unused_levels()
440+
ambiguous_drivers = target.index[
441+
multi_index_match(target.index, ambiguous_idx)
442+
]
443+
444+
msg = (
445+
f"`df` has {df.index.names=}. "
446+
f"`target` has {target.index.names=}. "
447+
"The index levels in `target` that are also in `df` are "
448+
f"{target_units_s.index.names}. "
449+
"When we only look at these levels, the desired unit looks like:\n"
450+
f"{target_units_s}\n"
451+
"The unit to use isn't unambiguous for the following metadata:\n"
452+
f"{target_units_s[ambiguous]}\n"
453+
"The drivers of this ambiguity "
454+
"are the following metadata levels in `target`\n"
455+
f"{ambiguous_drivers}"
456+
)
457+
raise AmbiguousTargetUnitError(msg)
458+
459+
else:
460+
target_units_s = target.index.get_level_values(target_unit_level_use).to_series(
461+
index=target.index.droplevel(target_unit_level_use)
462+
)
333463

334464
target_units_s, _ = target_units_s.align(df_units_s)
335-
# # Line below should handle missing specs
336-
# target_units_s = multi_index_lookup(target_units_s, df_units_s.index).fillna(
337-
# df_units_s
338-
# )
465+
if target_units_s.isnull().any():
466+
# Fill rows that don't get a spec with their existing units
467+
target_units_s = multi_index_lookup(target_units_s, df_units_s.index).fillna(
468+
df_units_s
469+
)
339470

340471
res = convert_unit_from_target_series(
341472
df=df, desired_unit=target_units_s, unit_level=df_unit_level, ur=ur

0 commit comments

Comments
 (0)