Fix conflicts and merge

ACCESS-NRI · Oct 10, 2024 · 1c6619f · 1c6619f
2 parents 43836d2 + 656d699
commit 1c6619f
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 10 deletions.
diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
@@ -18,6 +18,8 @@
 import iris.coords
 import iris.exceptions
 
+from netCDF4 import default_fillvals
+
 D_LAT_N96 = 1.25   # Degrees between latitude points on N96 grid
 D_LON_N96 = 1.875  # Degrees between longitude points on N96 grid
 
@@ -1231,3 +1233,34 @@ def test_fix_forecast_reference_time_proleptic_gregorian(forecast_cube,
                                                          time_coord):
     msg = "Is time.units.calendar == 'proleptic_gregorian' branch & testing required?"
     raise NotImplementedError(msg)
+@pytest.mark.parametrize(
+    "cube_data, expected_fill_val",
+    [
+        (np.array([1.1, 2.1], dtype="float32"),
+         np.float32(um2nc.DEFAULT_FILL_VAL_FLOAT)),
+        (np.array([1.1, 2.1], dtype="float64"),
+         np.float64(um2nc.DEFAULT_FILL_VAL_FLOAT)),
+        (np.array([1.1, 2.1], dtype="complex64"),
+         np.complex64(default_fillvals["c8"])),
+        (np.array([1, 2], dtype="int32"),
+         np.int32(default_fillvals["i4"])),
+        (np.array([1, 2], dtype="int64"),
+         np.int64(default_fillvals["i8"]))
+    ]
+)
+def test_fix_fill_value_defaults(cube_data, expected_fill_val):
+    """
+    Check that correct default fill values are found based
+    on a cube's data's type.
+    """
+    fake_cube = DummyCube(12345, "fake_var", attributes={})
+    fake_cube.data = cube_data
+
+    fill_value = um2nc.fix_fill_value(fake_cube)
+
+    assert fill_value == expected_fill_val
+    # Check new fill value type matches cube's data's type
+    assert fill_value.dtype == cube_data.dtype
+
+    # Check that missing value attribute set to expected fill_value
+    assert fake_cube.attributes["missing_value"][0] == expected_fill_val
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
@@ -22,7 +22,7 @@
 import numpy as np
 import cftime
 import cf_units
-from netCDF4 import default_fillvals
+import netCDF4
 
 import iris.util
 import iris.exceptions
@@ -83,6 +83,8 @@
 FORECAST_PERIOD = "forecast_period"
 TIME = "time"
 
+DEFAULT_FILL_VAL_FLOAT = 1.e20
+
 
 class PostProcessingError(Exception):
     """Generic class for um2nc specific errors."""
@@ -304,6 +306,70 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon):
     fix_lon_coord_name(longitude_coordinate, grid_type, dlon)
 
 
+def get_default_fill_value(cube):
+    """
+    Get the default fill value for a cube's data's type.
+
+    Parameters
+    ----------
+    cube: Iris cube object.
+
+    Returns
+    -------
+    fill_value: numpy scalar with type matching cube.data
+    """
+    if cube.data.dtype.kind == 'f':
+        fill_value = DEFAULT_FILL_VAL_FLOAT
+
+    else:
+        fill_value = netCDF4.default_fillvals[
+            f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}"
+        ]
+
+    # NB: the `_FillValue` attribute appears to be converted to match the
+    # cube data's type externally (likely in the netCDF4 library). It's not
+    # strictly necessary to do the conversion here. However, we need
+    # the separate `missing_value` attribute to have the correct type
+    # and so it's cleaner to set the type for both here.
+
+    # NB: The following type conversion approach is similar to the netCDF4
+    # package:
+    # https://github.com/Unidata/netcdf4-python/blob/5ccb3bb67ebf2cc803744707ff654b17ac506d99/src/netCDF4/_netCDF4.pyx#L4413
+    # Update if a cleaner method is found.
+    return np.array([fill_value], dtype=cube.data.dtype)[0]
+
+
+def fix_fill_value(cube, custom_fill_value=None):
+    """
+    Set a cube's missing_value attribute and return the value
+    for later use.
+
+    Parameters
+    ----------
+    cube: Iris cube object (modified in place).
+    custom_fill_value: Fill value to use in place of
+        defaults (not implemented).
+
+    Returns
+    -------
+    fill_value: Fill value to use when writing to netCDF.
+    """
+    if custom_fill_value is not None:
+        msg = "Custom fill values are not currently implemented."
+        raise NotImplementedError(msg)
+
+        # NB: If custom fill values are added, we should check that
+        # their type matches the cube.data.dtype
+
+    fill_value = get_default_fill_value(cube)
+
+    # TODO: Is placing the fill value in an array still necessary,
+    # given the type conversion in get_default_fill_value()
+    cube.attributes['missing_value'] = np.array([fill_value],
+                                                cube.data.dtype)
+    return fill_value
+
+
 # TODO: split cube ops into functions, this will likely increase process() workflow steps
 def cubewrite(cube, sman, compression, use64bit, verbose):
     # TODO: move into process() AND if a new cube is returned, swap into filtered cube list
@@ -313,15 +379,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
     if not use64bit:
         convert_32_bit(cube)
 
-    # Set the missing_value attribute. Use an array to force the type to match
-    # the data type
-    if cube.data.dtype.kind == 'f':
-        fill_value = 1.e20
-    else:
-        # Use netCDF defaults
-        fill_value = default_fillvals['%s%1d' % (cube.data.dtype.kind, cube.data.dtype.itemsize)]
-
-    cube.attributes['missing_value'] = np.array([fill_value], cube.data.dtype)
+    fill_value = fix_fill_value(cube)
 
     fix_forecast_reference_time(cube)