IMPRO-2090 metadata tweaks (metoppv#1454)

* Updates standardise - acceptance test adds missing institution and title attributes to the metadata test - plugin removes a specific meaningless cell method (point: time) - CLI docstring updated * Updates generate-metadata - mandatory attributes are set to default values if not specified - unit and acceptance tests updated * Improves standardise testing coverage for cell methods * 2nd review changes: - rename method - improve doc-string * Makes setting of mandatory attributes a requirement, rather than having a default setting. - New compulsory argument to CLI and plugin - Raises an error if any of the mandatory attributes are not set - Updates tests and adds test for raising this error * Prevents change to mandatory attribute defaults * Corrects test to pass for the right reason * Prevents keyword `attributes` being updated by the plugin
MoseleyS · May 19, 2021 · 27b8d27 · 27b8d27
1 parent 800a442
commit 27b8d27
Show file tree

Hide file tree

Showing 9 changed files with 194 additions and 82 deletions.
diff --git a/improver/cli/generate_metadata_cube.py b/improver/cli/generate_metadata_cube.py
@@ -37,6 +37,7 @@
 @cli.clizefy
 @cli.with_output
 def process(
+    mandatory_attributes_json: cli.inputjson,
     *,
     name="air_pressure_at_sea_level",
     units=None,
@@ -51,6 +52,9 @@ def process(
     """ Generate a cube with metadata only.
 
     Args:
+        mandatory_attributes_json (Dict):
+            Specifies the values of the mandatory attributes, title, institution and
+            source.
         name (Optional[str]):
             Output variable name, or if creating a probability cube the name of the
             underlying variable to which the probability field applies.
@@ -89,7 +93,8 @@ def process(
     # Set arguments to pass to generate_metadata function and remove json_input for
     # processing contents before adding
     generate_metadata_args = locals()
-    generate_metadata_args.pop("json_input", None)
+    for key in ["mandatory_attributes_json", "json_input"]:
+        generate_metadata_args.pop(key, None)
 
     from improver.synthetic_data.generate_metadata import generate_metadata
     from improver.synthetic_data.utilities import (
@@ -128,5 +133,4 @@ def process(
 
         # Update generate_metadata_args with the json_input data
         generate_metadata_args.update(json_input)
-
-    return generate_metadata(**generate_metadata_args)
+    return generate_metadata(mandatory_attributes_json, **generate_metadata_args)
diff --git a/improver/cli/standardise.py b/improver/cli/standardise.py
@@ -48,8 +48,8 @@ def process(
     """
     Standardise a source cube. Available options are renaming, converting units,
     updating attributes and removing named scalar coordinates. Remaining scalar
-    coordinates are collapsed, and data are cast to IMPROVER standard datatypes
-    and units.
+    coordinates are collapsed, CellMethod("point": "time") is discarded, and data
+    are cast to IMPROVER standard datatypes and units.
 
     Deprecated behaviour:
     Translates metadata relating to the grid_id attribute from StaGE

diff --git a/improver/metadata/constants/attributes.py b/improver/metadata/constants/attributes.py
@@ -36,4 +36,4 @@
     "institution": "unknown",
 }
 
-MANDATORY_ATTRIBUTES = ["title", "source", "institution"]
+MANDATORY_ATTRIBUTES = [x for x in MANDATORY_ATTRIBUTE_DEFAULTS.keys()]
diff --git a/improver/standardise.py b/improver/standardise.py
@@ -35,6 +35,8 @@
 
 import iris
 import numpy as np
+from iris.analysis import Linear, Nearest
+from iris.coords import CellMethod
 from iris.cube import Cube
 from iris.exceptions import CoordinateNotFoundError
 from numpy import dtype, ndarray
@@ -124,6 +126,24 @@ def as_correct_dtype(obj: ndarray, required_dtype: dtype) -> ndarray:
                     coord.bounds = round_close(coord.bounds)
                 coord.bounds = as_correct_dtype(coord.bounds, req_dtype)
 
+    @staticmethod
+    def _discard_redundant_cell_methods(cube: Cube) -> None:
+        """
+        Removes cell method "point": "time" from cube if present.
+        """
+        if not cube.cell_methods:
+            return
+        removable_cms = [
+            CellMethod(method="point", coords="time"),
+        ]
+        updated_cms = []
+        for cm in cube.cell_methods:
+            if cm in removable_cms:
+                continue
+            updated_cms.append(cm)
+
+        cube.cell_methods = updated_cms
+
     def process(
         self,
         cube: Cube,
@@ -134,10 +154,13 @@ def process(
     ) -> Cube:
         """
         Perform compulsory and user-configurable metadata adjustments.  The
-        compulsory adjustments are to collapse any scalar dimensions apart from
-        realization (which is expected always to be a dimension); to cast the cube
-        data and coordinates into suitable datatypes; and to convert time-related
-        metadata into the required units.
+        compulsory adjustments are:
+
+        - to collapse any scalar dimensions apart from realization (which is expected
+          always to be a dimension);
+        - to cast the cube data and coordinates into suitable datatypes;
+        - to convert time-related metadata into the required units
+        - to remove cell method ("point": "time").
 
         Args:
             cube:
@@ -165,6 +188,7 @@ def process(
             self._remove_scalar_coords(cube, coords_to_remove)
         if attributes_dict:
             amend_attributes(cube, attributes_dict)
+        self._discard_redundant_cell_methods(cube)
 
         # this must be done after unit conversion as if the input is an integer
         # field, unit conversion outputs the new data as float64

diff --git a/improver/synthetic_data/generate_metadata.py b/improver/synthetic_data/generate_metadata.py
@@ -39,6 +39,7 @@
 from iris.util import squeeze
 from numpy import ndarray
 
+from improver.metadata.constants.attributes import MANDATORY_ATTRIBUTES
 from improver.synthetic_data.set_up_test_cubes import (
     set_up_percentile_cube,
     set_up_probability_cube,
@@ -101,6 +102,7 @@ def _create_data_array(
 
 
 def generate_metadata(
+    mandatory_attributes: dict,
     name: str = "air_pressure_at_sea_level",
     units: Optional[str] = None,
     time_period: Optional[int] = None,
@@ -114,6 +116,9 @@ def generate_metadata(
     """ Generate a cube with metadata only.
 
     Args:
+        mandatory_attributes:
+            Specifies the values of the mandatory attributes, title, institution and
+            source.
         name:
             Output variable name, or if creating a probability cube the name of the
             underlying variable to which the probability field applies.
@@ -140,6 +145,12 @@ def generate_metadata(
         **kwargs:
             Additional keyword arguments to pass to the required cube setup function.
 
+    Raises:
+        ValueError:
+            If any options are not supported
+        KeyError:
+            If mandatory_attributes does not contain all the required keys
+
     Returns:
         Output of set_up_variable_cube(), set_up_percentile_cube() or
         set_up_probability_cube()
@@ -193,6 +204,16 @@ def generate_metadata(
     data = _create_data_array(
         ensemble_members, leading_dimension, npoints, kwargs["height_levels"]
     )
+    missing_mandatory_attributes = MANDATORY_ATTRIBUTES - mandatory_attributes.keys()
+    if missing_mandatory_attributes:
+        raise KeyError(
+            f"No values for these mandatory attributes: {missing_mandatory_attributes}"
+        )
+    if "attributes" in kwargs:
+        kwargs["attributes"] = kwargs["attributes"].copy()
+    else:
+        kwargs["attributes"] = {}
+    kwargs["attributes"].update(mandatory_attributes)
 
     # Set up requested cube
     if cube_type == "percentile":

diff --git a/improver_tests/acceptance/SHA256SUMS b/improver_tests/acceptance/SHA256SUMS
@@ -145,15 +145,16 @@ b25a397ac5ecac536f43c553f17b0bd6b5d2587d4eda8559bf5709e25f2c237b  ./fill-radar-h
 67be15dad94608df8a6a6c362bf58263a522de6ce501e3e99443b782c083ff76  ./generate-landmask/basic/input.nc
 58ab8e2683bd4c2f29a35d32ac5deb7ea2113491ab1513846e37950a74a3d4fd  ./generate-landmask/basic/kgo.nc
 90b32a6b1a38c81cb3dbc16474c8404b0a77bdbc6c0c3149af54a5823387d579  ./generate-metadata-cube/height_levels.json
-aab96e616856e1eb174be5d4e0d46dd0ceaf7764733350b6ce4966a9b89aab41  ./generate-metadata-cube/kgo_default.nc
-ef7e08f06e4f7ff9135b3f1bd555f12ab6a8e4ee4eeba14b06c97244ba4fd53d  ./generate-metadata-cube/kgo_ensemble_members_all_options.nc
-e841e4a2f971209f093b5d5543a9dd9a46dd7e88d5db4aae5c58c436003fb079  ./generate-metadata-cube/kgo_height_levels.nc
-07fe8c16751e7854266e47375c5b63be8583bf6841e0428c934d6d5ad92f08e6  ./generate-metadata-cube/kgo_percentile.nc
-62612a8cf863a080b8f9e5ca62d4182a3186e5debff629c31925849872244774  ./generate-metadata-cube/kgo_pressure_levels.nc
-b65488638fdd95247f6ae04a4203c6c462c32e0bac87df882619d5125fd857b3  ./generate-metadata-cube/kgo_probability.nc
-801bbcb93345faab177b94546f2b5fc5f4585e59b10e6e9a680fe1f600b0b906  ./generate-metadata-cube/kgo_realization.nc
-9223d25967889c999583533e4994fba1c2b1dfc8030e91b506dc9b72778e5567  ./generate-metadata-cube/kgo_single_height_level.nc
-ca37652252852f04bc3016f9c0172cafdebbacabfd8d1a906780b7c4126a8f62  ./generate-metadata-cube/kgo_variable_cube_json_inputs.nc
+0926c654b760a63166340cfaca08912e95cd74a5e00714b36f36feb24639c7be  ./generate-metadata-cube/kgo_default.nc
+a309b380a51a2eefe9192445a882dbe7e67d9e6276fcca6b0ae20cdbf0e82ab0  ./generate-metadata-cube/kgo_ensemble_members_all_options.nc
+b120c3eebefe785543955fb623c65fc4f6d857ec34df086a26d50c1c114214af  ./generate-metadata-cube/kgo_height_levels.nc
+2af4c17ead2eb5337a0689c70565d345f02758638fcfaca7971f49ef0fe4078e  ./generate-metadata-cube/kgo_percentile.nc
+c6f5cf0683b34a30b9493795837f96dfa9f46f21043460ee4692c4787eff4f99  ./generate-metadata-cube/kgo_pressure_levels.nc
+43e6f742d90b72f07176946b82a3918c25fc95595dd688260a03852ecf534fb8  ./generate-metadata-cube/kgo_probability.nc
+21e53d7c3fa1987107d39d71193a693499886c0e53b1bb70f3ddcd01e0b66113  ./generate-metadata-cube/kgo_realization.nc
+0c26ad17a15dda0d47059365c5d85b2d33949fd805f79b1c3e28d775b9702e34  ./generate-metadata-cube/kgo_single_height_level.nc
+66a6342f07f5c3a50819f32ea8945ba94aa2583c6a41f88ac09db4e46eef7e91  ./generate-metadata-cube/kgo_variable_cube_json_inputs.nc
+11927ca3fe55f20b9f53240222d209b1f7519f159b889a295de6cf8b726521df  ./generate-metadata-cube/mandatory_attributes.json
 d712c2afdd378796381c548e6a5d02bb0e9a7c4c4943b54e15cbf0a700a08eeb  ./generate-metadata-cube/percentiles.json
 c20cf3fee216aaf92506d80a9342fb983372c9b71e0587eeb4f311f7b7697c28  ./generate-metadata-cube/pressure_levels.json
 f04e1b0908405a54d83b2f05e8849a56e431282e539b139bc59b61889e91090b  ./generate-metadata-cube/realizations.json
@@ -433,9 +434,9 @@ a8aaf5f83b712e1df02573833e81fe4c9278403c8ebdfaba815cd76b9bb7a714  ./spot-extract
 b3080315914473c89b814adae8a6e7306afb42248212177702acc86b0f5878ae  ./standardise/float64/float64_data.nc
 b40913e8086865f3a0fdcc6793a67dfe695710169c40d3e23f120920eceed0ba  ./standardise/float64/kgo.nc
 9c834da03db5f25627c1be3ff666718ae5186c77895854e3d84db8ce56af16cf  ./standardise/metadata/input.nc
-ab8531ce118476b0977f5adddbcfcaa46ccbe4d8bbb648255f63bb785f51dc5c  ./standardise/metadata/kgo.nc
+ac3ded21e9d3f3aead4288101ea574e019772e9479534910b616d0d38d45f8d7  ./standardise/metadata/kgo.nc
 d4404df3a8acdeed27f20ec621a0d3500bf4c849eaf7a7064c4e42f079b37a43  ./standardise/metadata/metadata.json
-c302d5f2f1c05217340fcae2cab66bc4ead19d432d11d7360af2a512a443d3ba  ./standardise/metadata/radar_metadata.json
+326ae7b6d3cff0fd3da840b643816d897e7133a248e49eacad3ec37af669f49e  ./standardise/metadata/radar_metadata.json
 d89a8587bc28b574b8b1e624bb3bf339aaeb9c8c2355db7fe518af8c9bce527c  ./standardise/radarnet/input_coverage.nimrod
 6335cba81be74577fe10d0a4f5cb75724abcef16499d76c263e80c16cf092a05  ./standardise/radarnet/input_preciprate.nimrod
 8a0cc7c53513fe11919da6ae39e431fde7f5b5831e255d8f30feda2a53eb3c71  ./standardise/radarnet/kgo_coverage.nc

diff --git a/improver_tests/acceptance/test_generate_metadata_cube.py b/improver_tests/acceptance/test_generate_metadata_cube.py
@@ -37,14 +37,16 @@
 pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
 CLI = acc.cli_name_with_dashes(__file__)
 run_cli = acc.run_cli(CLI)
+kgo_dir = acc.kgo_root() / "generate-metadata-cube"
+mandatory_attributes_json = kgo_dir / "mandatory_attributes.json"
 
 
 def test_default(tmp_path):
     """Test default metadata cube generation"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_default.nc"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--output",
         output_path,
     ]
@@ -54,10 +56,10 @@ def test_default(tmp_path):
 
 def test_ensemble_members(tmp_path):
     """Test creating variable cube with all options set"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_ensemble_members_all_options.nc"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--name",
         "air_temperature",
         "--units",
@@ -83,11 +85,11 @@ def test_ensemble_members(tmp_path):
 
 def test_json_all_inputs(tmp_path):
     """Test creating variable cube with all options set"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_variable_cube_json_inputs.nc"
     json_input_path = kgo_dir / "variable_cube_all_inputs.json"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--json-input",
         json_input_path,
         "--output",
@@ -100,23 +102,28 @@ def test_json_all_inputs(tmp_path):
 def test_realization_json(tmp_path):
     """Test variable/realization metadata cube generated using realization
     coordinate defined in the json input"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_realization.nc"
     realizations_path = kgo_dir / "realizations.json"
     output_path = tmp_path / "output.nc"
-    args = ["--json-input", realizations_path, "--output", output_path]
+    args = [
+        mandatory_attributes_json,
+        "--json-input",
+        realizations_path,
+        "--output",
+        output_path,
+    ]
     run_cli(args)
     acc.compare(output_path, kgo_path)
 
 
 def test_percentile_cube(tmp_path):
     """Test percentile metadata cube generated using using percentile
     coordinate defined in the json input"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_percentile.nc"
     percentiles_path = kgo_dir / "percentiles.json"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--json-input",
         percentiles_path,
         "--output",
@@ -129,11 +136,11 @@ def test_percentile_cube(tmp_path):
 def test_probability_cube(tmp_path):
     """Test probability metadata cube generated using using threshold
     coordinate defined in the json input"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_probability.nc"
     thresholds_path = kgo_dir / "thresholds.json"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--json-input",
         thresholds_path,
         "--output",
@@ -145,34 +152,44 @@ def test_probability_cube(tmp_path):
 
 def test_height_levels(tmp_path):
     """Test metadata cube generated with height levels from json"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_height_levels.nc"
     height_levels_path = kgo_dir / "height_levels.json"
     output_path = tmp_path / "output.nc"
-    args = ["--json-input", height_levels_path, "--output", output_path]
+    args = [
+        mandatory_attributes_json,
+        "--json-input",
+        height_levels_path,
+        "--output",
+        output_path,
+    ]
     run_cli(args)
     acc.compare(output_path, kgo_path)
 
 
 def test_single_height_level(tmp_path):
     """Test metadata cube generation giving single value (rather than comma separated
     list) for height levels option demotes height to scalar coordinate"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_single_height_level.nc"
     height_level_path = kgo_dir / "single_height_level.json"
     output_path = tmp_path / "output.nc"
-    args = ["--json-input", height_level_path, "--output", output_path]
+    args = [
+        mandatory_attributes_json,
+        "--json-input",
+        height_level_path,
+        "--output",
+        output_path,
+    ]
     run_cli(args)
     acc.compare(output_path, kgo_path)
 
 
 def test_pressure_levels(tmp_path):
     """Test metadata cube generated with pressure in Pa instead of height in metres"""
-    kgo_dir = acc.kgo_root() / "generate-metadata-cube"
     kgo_path = kgo_dir / "kgo_pressure_levels.nc"
     pressure_levels_path = kgo_dir / "pressure_levels.json"
     output_path = tmp_path / "output.nc"
     args = [
+        mandatory_attributes_json,
         "--json-input",
         pressure_levels_path,
         "--output",

diff --git a/improver_tests/standardise/test_StandardiseMetadata.py b/improver_tests/standardise/test_StandardiseMetadata.py
@@ -141,6 +141,19 @@ def test_metadata_changes(self):
         self.assertDictEqual(result.attributes, expected_attributes)
         self.assertNotIn("forecast_period", [coord.name() for coord in result.coords()])
 
+    def test_discard_cellmethod(self):
+        """Test changes to cell_methods"""
+        cube = self.cube.copy()
+        cube.cell_methods = [
+            iris.coords.CellMethod(method="point", coords="time"),
+            iris.coords.CellMethod(method="max", coords="realization"),
+        ]
+        result = self.plugin.process(cube,)
+        self.assertEqual(
+            result.cell_methods,
+            (iris.coords.CellMethod(method="max", coords="realization"),),
+        )
+
     def test_float_deescalation(self):
         """Test precision de-escalation from float64 to float32"""
         cube = self.cube.copy()