@@ -42,7 +42,7 @@ def convert_unit_from_target_series(
4242 with an index that contains all the rows in `df`.
4343
4444 unit_level
45- Level in `df` which holds unit information
45+ Level in `df`'s index which holds unit information
4646
4747 ur
4848 Unit registry to use for the conversion.
@@ -192,7 +192,7 @@ def convert_unit(
192192 For further details, see examples
193193
194194 unit_level
195- Level in `df` which holds unit information
195+ Level in `df`'s index which holds unit information
196196
197197 Passed to [convert_unit_from_target_series][].
198198
@@ -306,36 +306,167 @@ def convert_unit(
306306 return res
307307
308308
309+ class AmbiguousTargetUnitError (ValueError ):
310+ """
311+ Raised when `target` provided to `convert_unit_like` gives ambiguous desired units
312+ """
313+
314+ def __init__ (self , msg : str ) -> None :
315+ """
316+ Initialise the error
317+
318+ Parameters
319+ ----------
320+ msg
321+ Message to provide to the user
322+ """
323+ super ().__init__ (msg )
324+
325+
309326def convert_unit_like (
310327 df : pd .DataFrame ,
311328 target : pd .DataFrame ,
312329 df_unit_level : str = "unit" ,
313330 target_unit_level : str | None = None ,
314331 ur : pint .UnitRegistry | None = None ,
315332) -> pd .DataFrame :
333+ """
334+ Convert units to match another [pd.DataFrame][pandas.DataFrame]
335+
336+ This is essentially a helper function for [convert_unit_from_target_series][].
337+ It implements one set of logic for extracting desired units and tries to be clever,
338+ handling differences in index levels
339+ between `df` and `target` sensibly wherever possible.
340+
341+ If you want behaviour other than what is implemented here,
342+ use [convert_unit_from_target_series][] directly.
343+
344+ Parameters
345+ ----------
346+ df
347+ [pd.DataFrame][pandas.DataFrame] whose units should be converted
348+
349+ target
350+ [pd.DataFrame][pandas.DataFrame] whose units should be matched
351+
352+ df_unit_level
353+ Level in `df`'s index which holds unit information
354+
355+ target_unit_level
356+ Level in `target`'s index which holds unit information
357+
358+ If not supplied, we use `df_unit_level`.
359+
360+ ur
361+ Unit registry to use for the conversion.
362+
363+ Passed to [convert_unit_from_target_series][].
364+
365+ Returns
366+ -------
367+ :
368+ `df` with converted units
369+
370+ Examples
371+ --------
372+ >>> import pandas as pd
373+ >>>
374+ >>> start = pd.DataFrame(
375+ ... [
376+ ... [1010.0, 2010.0, 1150.0],
377+ ... [100.1, 100.3, 99.8],
378+ ... [0.0011, 0.0012, 0.0013],
379+ ... [310_000, 311_000, 310_298],
380+ ... ],
381+ ... columns=[2020, 2030, 2050],
382+ ... index=pd.MultiIndex.from_tuples(
383+ ... (
384+ ... ("sa", "temperature", "mK"),
385+ ... ("sa", "body temperature", "degF"),
386+ ... ("sb", "temperature", "kK"),
387+ ... ("sb", "body temperature", "mK"),
388+ ... ),
389+ ... names=["scenario", "variable", "unit"],
390+ ... ),
391+ ... )
392+ >>>
393+ >>> target = pd.DataFrame(
394+ ... [[1.0, 2.0], [1.1, 1.2]],
395+ ... columns=[1990.0, 2010.0],
396+ ... index=pd.MultiIndex.from_tuples(
397+ ... (
398+ ... ("temperature", "K"),
399+ ... ("body temperature", "degC"),
400+ ... ),
401+ ... names=["variable", "unit"],
402+ ... ),
403+ ... )
404+ >>>
405+ >>> convert_unit_like(start, target)
406+ 2020 2030 2050
407+ scenario variable unit
408+ sa temperature K 1.010000 2.010000 1.150000
409+ body temperature degC 37.833333 37.944444 37.666667
410+ sb temperature K 1.100000 1.200000 1.300000
411+ body temperature degC 36.850000 37.850000 37.148000
412+ """
316413 if target_unit_level is None :
317414 target_unit_level_use = df_unit_level
318415 else :
319416 target_unit_level_use = target_unit_level
320417
321- extra_index_levels_target = target .index .names .difference (df .index .names ) # type: ignore # pandas-stubs confused
322- if extra_index_levels_target :
323- # if extra index levels in target, drop out the extra
324- # and see if the target is clear, if not, raise)
325- raise NotImplementedError
326-
327418 df_units_s = df .index .get_level_values (df_unit_level ).to_series (
328419 index = df .index .droplevel (df_unit_level )
329420 )
330- target_units_s = target .index .get_level_values (target_unit_level_use ).to_series (
331- index = target .index .droplevel (target_unit_level_use )
332- )
421+
422+ extra_index_levels_target = target .index .names .difference (
423+ [* df .index .names , target_unit_level_use ]
424+ ) # type: ignore # pandas-stubs confused
425+ if extra_index_levels_target :
426+ # Drop out the extra levels and see if the intended unit is unambiguous
427+ tmp = target .index .droplevel (extra_index_levels_target ).drop_duplicates ()
428+ target_units_s = tmp .get_level_values (target_unit_level_use ).to_series (
429+ index = tmp .droplevel (target_unit_level_use )
430+ )
431+ ambiguous = target_units_s .index .duplicated (keep = False )
432+ if ambiguous .any ():
433+ ambiguous_idx = target_units_s [ambiguous ].index
434+ if not isinstance (ambiguous_idx , pd .MultiIndex ):
435+ ambiguous_idx = pd .MultiIndex .from_arrays (
436+ [ambiguous_idx .values ], names = [ambiguous_idx .name ]
437+ )
438+
439+ ambiguous_idx = ambiguous_idx .remove_unused_levels ()
440+ ambiguous_drivers = target .index [
441+ multi_index_match (target .index , ambiguous_idx )
442+ ]
443+
444+ msg = (
445+ f"`df` has { df .index .names = } . "
446+ f"`target` has { target .index .names = } . "
447+ "The index levels in `target` that are also in `df` are "
448+ f"{ target_units_s .index .names } . "
449+ "When we only look at these levels, the desired unit looks like:\n "
450+ f"{ target_units_s } \n "
451+ "The unit to use isn't unambiguous for the following metadata:\n "
452+ f"{ target_units_s [ambiguous ]} \n "
453+ "The drivers of this ambiguity "
454+ "are the following metadata levels in `target`\n "
455+ f"{ ambiguous_drivers } "
456+ )
457+ raise AmbiguousTargetUnitError (msg )
458+
459+ else :
460+ target_units_s = target .index .get_level_values (target_unit_level_use ).to_series (
461+ index = target .index .droplevel (target_unit_level_use )
462+ )
333463
334464 target_units_s , _ = target_units_s .align (df_units_s )
335- # # Line below should handle missing specs
336- # target_units_s = multi_index_lookup(target_units_s, df_units_s.index).fillna(
337- # df_units_s
338- # )
465+ if target_units_s .isnull ().any ():
466+ # Fill rows that don't get a spec with their existing units
467+ target_units_s = multi_index_lookup (target_units_s , df_units_s .index ).fillna (
468+ df_units_s
469+ )
339470
340471 res = convert_unit_from_target_series (
341472 df = df , desired_unit = target_units_s , unit_level = df_unit_level , ur = ur
0 commit comments