Skip to content

Commit

Permalink
IMPRO-2090 metadata tweaks (metoppv#1454)
Browse files Browse the repository at this point in the history
* Updates standardise

- acceptance test adds missing institution and title attributes to the metadata test
- plugin removes a specific meaningless cell method (point: time)
- CLI docstring updated

* Updates generate-metadata

- mandatory attributes are set to default values if not specified
- unit and acceptance tests updated

* Improves standardise testing coverage for cell methods

* 2nd review changes:

- rename method
- improve doc-string

* Makes setting of mandatory attributes a requirement, rather than having a default setting.

- New compulsory argument to CLI and plugin
- Raises an error if any of the mandatory attributes are not set
- Updates tests and adds test for raising this error

* Prevents change to mandatory attribute defaults

* Corrects test to pass for the right reason

* Prevents keyword `attributes` being updated by the plugin
  • Loading branch information
MoseleyS authored May 19, 2021
1 parent 800a442 commit 27b8d27
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 82 deletions.
10 changes: 7 additions & 3 deletions improver/cli/generate_metadata_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
@cli.clizefy
@cli.with_output
def process(
mandatory_attributes_json: cli.inputjson,
*,
name="air_pressure_at_sea_level",
units=None,
Expand All @@ -51,6 +52,9 @@ def process(
""" Generate a cube with metadata only.
Args:
mandatory_attributes_json (Dict):
Specifies the values of the mandatory attributes, title, institution and
source.
name (Optional[str]):
Output variable name, or if creating a probability cube the name of the
underlying variable to which the probability field applies.
Expand Down Expand Up @@ -89,7 +93,8 @@ def process(
# Set arguments to pass to generate_metadata function and remove json_input for
# processing contents before adding
generate_metadata_args = locals()
generate_metadata_args.pop("json_input", None)
for key in ["mandatory_attributes_json", "json_input"]:
generate_metadata_args.pop(key, None)

from improver.synthetic_data.generate_metadata import generate_metadata
from improver.synthetic_data.utilities import (
Expand Down Expand Up @@ -128,5 +133,4 @@ def process(

# Update generate_metadata_args with the json_input data
generate_metadata_args.update(json_input)

return generate_metadata(**generate_metadata_args)
return generate_metadata(mandatory_attributes_json, **generate_metadata_args)
4 changes: 2 additions & 2 deletions improver/cli/standardise.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def process(
"""
Standardise a source cube. Available options are renaming, converting units,
updating attributes and removing named scalar coordinates. Remaining scalar
coordinates are collapsed, and data are cast to IMPROVER standard datatypes
and units.
coordinates are collapsed, CellMethod("point": "time") is discarded, and data
are cast to IMPROVER standard datatypes and units.
Deprecated behaviour:
Translates metadata relating to the grid_id attribute from StaGE
Expand Down
2 changes: 1 addition & 1 deletion improver/metadata/constants/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@
"institution": "unknown",
}

MANDATORY_ATTRIBUTES = ["title", "source", "institution"]
MANDATORY_ATTRIBUTES = [x for x in MANDATORY_ATTRIBUTE_DEFAULTS.keys()]
32 changes: 28 additions & 4 deletions improver/standardise.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

import iris
import numpy as np
from iris.analysis import Linear, Nearest
from iris.coords import CellMethod
from iris.cube import Cube
from iris.exceptions import CoordinateNotFoundError
from numpy import dtype, ndarray
Expand Down Expand Up @@ -124,6 +126,24 @@ def as_correct_dtype(obj: ndarray, required_dtype: dtype) -> ndarray:
coord.bounds = round_close(coord.bounds)
coord.bounds = as_correct_dtype(coord.bounds, req_dtype)

@staticmethod
def _discard_redundant_cell_methods(cube: Cube) -> None:
"""
Removes cell method "point": "time" from cube if present.
"""
if not cube.cell_methods:
return
removable_cms = [
CellMethod(method="point", coords="time"),
]
updated_cms = []
for cm in cube.cell_methods:
if cm in removable_cms:
continue
updated_cms.append(cm)

cube.cell_methods = updated_cms

def process(
self,
cube: Cube,
Expand All @@ -134,10 +154,13 @@ def process(
) -> Cube:
"""
Perform compulsory and user-configurable metadata adjustments. The
compulsory adjustments are to collapse any scalar dimensions apart from
realization (which is expected always to be a dimension); to cast the cube
data and coordinates into suitable datatypes; and to convert time-related
metadata into the required units.
compulsory adjustments are:
- to collapse any scalar dimensions apart from realization (which is expected
always to be a dimension);
- to cast the cube data and coordinates into suitable datatypes;
- to convert time-related metadata into the required units
- to remove cell method ("point": "time").
Args:
cube:
Expand Down Expand Up @@ -165,6 +188,7 @@ def process(
self._remove_scalar_coords(cube, coords_to_remove)
if attributes_dict:
amend_attributes(cube, attributes_dict)
self._discard_redundant_cell_methods(cube)

# this must be done after unit conversion as if the input is an integer
# field, unit conversion outputs the new data as float64
Expand Down
21 changes: 21 additions & 0 deletions improver/synthetic_data/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from iris.util import squeeze
from numpy import ndarray

from improver.metadata.constants.attributes import MANDATORY_ATTRIBUTES
from improver.synthetic_data.set_up_test_cubes import (
set_up_percentile_cube,
set_up_probability_cube,
Expand Down Expand Up @@ -101,6 +102,7 @@ def _create_data_array(


def generate_metadata(
mandatory_attributes: dict,
name: str = "air_pressure_at_sea_level",
units: Optional[str] = None,
time_period: Optional[int] = None,
Expand All @@ -114,6 +116,9 @@ def generate_metadata(
""" Generate a cube with metadata only.
Args:
mandatory_attributes:
Specifies the values of the mandatory attributes, title, institution and
source.
name:
Output variable name, or if creating a probability cube the name of the
underlying variable to which the probability field applies.
Expand All @@ -140,6 +145,12 @@ def generate_metadata(
**kwargs:
Additional keyword arguments to pass to the required cube setup function.
Raises:
ValueError:
If any options are not supported
KeyError:
If mandatory_attributes does not contain all the required keys
Returns:
Output of set_up_variable_cube(), set_up_percentile_cube() or
set_up_probability_cube()
Expand Down Expand Up @@ -193,6 +204,16 @@ def generate_metadata(
data = _create_data_array(
ensemble_members, leading_dimension, npoints, kwargs["height_levels"]
)
missing_mandatory_attributes = MANDATORY_ATTRIBUTES - mandatory_attributes.keys()
if missing_mandatory_attributes:
raise KeyError(
f"No values for these mandatory attributes: {missing_mandatory_attributes}"
)
if "attributes" in kwargs:
kwargs["attributes"] = kwargs["attributes"].copy()
else:
kwargs["attributes"] = {}
kwargs["attributes"].update(mandatory_attributes)

# Set up requested cube
if cube_type == "percentile":
Expand Down
23 changes: 12 additions & 11 deletions improver_tests/acceptance/SHA256SUMS
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,16 @@ b25a397ac5ecac536f43c553f17b0bd6b5d2587d4eda8559bf5709e25f2c237b ./fill-radar-h
67be15dad94608df8a6a6c362bf58263a522de6ce501e3e99443b782c083ff76 ./generate-landmask/basic/input.nc
58ab8e2683bd4c2f29a35d32ac5deb7ea2113491ab1513846e37950a74a3d4fd ./generate-landmask/basic/kgo.nc
90b32a6b1a38c81cb3dbc16474c8404b0a77bdbc6c0c3149af54a5823387d579 ./generate-metadata-cube/height_levels.json
aab96e616856e1eb174be5d4e0d46dd0ceaf7764733350b6ce4966a9b89aab41 ./generate-metadata-cube/kgo_default.nc
ef7e08f06e4f7ff9135b3f1bd555f12ab6a8e4ee4eeba14b06c97244ba4fd53d ./generate-metadata-cube/kgo_ensemble_members_all_options.nc
e841e4a2f971209f093b5d5543a9dd9a46dd7e88d5db4aae5c58c436003fb079 ./generate-metadata-cube/kgo_height_levels.nc
07fe8c16751e7854266e47375c5b63be8583bf6841e0428c934d6d5ad92f08e6 ./generate-metadata-cube/kgo_percentile.nc
62612a8cf863a080b8f9e5ca62d4182a3186e5debff629c31925849872244774 ./generate-metadata-cube/kgo_pressure_levels.nc
b65488638fdd95247f6ae04a4203c6c462c32e0bac87df882619d5125fd857b3 ./generate-metadata-cube/kgo_probability.nc
801bbcb93345faab177b94546f2b5fc5f4585e59b10e6e9a680fe1f600b0b906 ./generate-metadata-cube/kgo_realization.nc
9223d25967889c999583533e4994fba1c2b1dfc8030e91b506dc9b72778e5567 ./generate-metadata-cube/kgo_single_height_level.nc
ca37652252852f04bc3016f9c0172cafdebbacabfd8d1a906780b7c4126a8f62 ./generate-metadata-cube/kgo_variable_cube_json_inputs.nc
0926c654b760a63166340cfaca08912e95cd74a5e00714b36f36feb24639c7be ./generate-metadata-cube/kgo_default.nc
a309b380a51a2eefe9192445a882dbe7e67d9e6276fcca6b0ae20cdbf0e82ab0 ./generate-metadata-cube/kgo_ensemble_members_all_options.nc
b120c3eebefe785543955fb623c65fc4f6d857ec34df086a26d50c1c114214af ./generate-metadata-cube/kgo_height_levels.nc
2af4c17ead2eb5337a0689c70565d345f02758638fcfaca7971f49ef0fe4078e ./generate-metadata-cube/kgo_percentile.nc
c6f5cf0683b34a30b9493795837f96dfa9f46f21043460ee4692c4787eff4f99 ./generate-metadata-cube/kgo_pressure_levels.nc
43e6f742d90b72f07176946b82a3918c25fc95595dd688260a03852ecf534fb8 ./generate-metadata-cube/kgo_probability.nc
21e53d7c3fa1987107d39d71193a693499886c0e53b1bb70f3ddcd01e0b66113 ./generate-metadata-cube/kgo_realization.nc
0c26ad17a15dda0d47059365c5d85b2d33949fd805f79b1c3e28d775b9702e34 ./generate-metadata-cube/kgo_single_height_level.nc
66a6342f07f5c3a50819f32ea8945ba94aa2583c6a41f88ac09db4e46eef7e91 ./generate-metadata-cube/kgo_variable_cube_json_inputs.nc
11927ca3fe55f20b9f53240222d209b1f7519f159b889a295de6cf8b726521df ./generate-metadata-cube/mandatory_attributes.json
d712c2afdd378796381c548e6a5d02bb0e9a7c4c4943b54e15cbf0a700a08eeb ./generate-metadata-cube/percentiles.json
c20cf3fee216aaf92506d80a9342fb983372c9b71e0587eeb4f311f7b7697c28 ./generate-metadata-cube/pressure_levels.json
f04e1b0908405a54d83b2f05e8849a56e431282e539b139bc59b61889e91090b ./generate-metadata-cube/realizations.json
Expand Down Expand Up @@ -433,9 +434,9 @@ a8aaf5f83b712e1df02573833e81fe4c9278403c8ebdfaba815cd76b9bb7a714 ./spot-extract
b3080315914473c89b814adae8a6e7306afb42248212177702acc86b0f5878ae ./standardise/float64/float64_data.nc
b40913e8086865f3a0fdcc6793a67dfe695710169c40d3e23f120920eceed0ba ./standardise/float64/kgo.nc
9c834da03db5f25627c1be3ff666718ae5186c77895854e3d84db8ce56af16cf ./standardise/metadata/input.nc
ab8531ce118476b0977f5adddbcfcaa46ccbe4d8bbb648255f63bb785f51dc5c ./standardise/metadata/kgo.nc
ac3ded21e9d3f3aead4288101ea574e019772e9479534910b616d0d38d45f8d7 ./standardise/metadata/kgo.nc
d4404df3a8acdeed27f20ec621a0d3500bf4c849eaf7a7064c4e42f079b37a43 ./standardise/metadata/metadata.json
c302d5f2f1c05217340fcae2cab66bc4ead19d432d11d7360af2a512a443d3ba ./standardise/metadata/radar_metadata.json
326ae7b6d3cff0fd3da840b643816d897e7133a248e49eacad3ec37af669f49e ./standardise/metadata/radar_metadata.json
d89a8587bc28b574b8b1e624bb3bf339aaeb9c8c2355db7fe518af8c9bce527c ./standardise/radarnet/input_coverage.nimrod
6335cba81be74577fe10d0a4f5cb75724abcef16499d76c263e80c16cf092a05 ./standardise/radarnet/input_preciprate.nimrod
8a0cc7c53513fe11919da6ae39e431fde7f5b5831e255d8f30feda2a53eb3c71 ./standardise/radarnet/kgo_coverage.nc
Expand Down
41 changes: 29 additions & 12 deletions improver_tests/acceptance/test_generate_metadata_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@
pytestmark = [pytest.mark.acc, acc.skip_if_kgo_missing]
CLI = acc.cli_name_with_dashes(__file__)
run_cli = acc.run_cli(CLI)
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
mandatory_attributes_json = kgo_dir / "mandatory_attributes.json"


def test_default(tmp_path):
"""Test default metadata cube generation"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_default.nc"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--output",
output_path,
]
Expand All @@ -54,10 +56,10 @@ def test_default(tmp_path):

def test_ensemble_members(tmp_path):
"""Test creating variable cube with all options set"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_ensemble_members_all_options.nc"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--name",
"air_temperature",
"--units",
Expand All @@ -83,11 +85,11 @@ def test_ensemble_members(tmp_path):

def test_json_all_inputs(tmp_path):
"""Test creating variable cube with all options set"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_variable_cube_json_inputs.nc"
json_input_path = kgo_dir / "variable_cube_all_inputs.json"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--json-input",
json_input_path,
"--output",
Expand All @@ -100,23 +102,28 @@ def test_json_all_inputs(tmp_path):
def test_realization_json(tmp_path):
"""Test variable/realization metadata cube generated using realization
coordinate defined in the json input"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_realization.nc"
realizations_path = kgo_dir / "realizations.json"
output_path = tmp_path / "output.nc"
args = ["--json-input", realizations_path, "--output", output_path]
args = [
mandatory_attributes_json,
"--json-input",
realizations_path,
"--output",
output_path,
]
run_cli(args)
acc.compare(output_path, kgo_path)


def test_percentile_cube(tmp_path):
"""Test percentile metadata cube generated using using percentile
coordinate defined in the json input"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_percentile.nc"
percentiles_path = kgo_dir / "percentiles.json"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--json-input",
percentiles_path,
"--output",
Expand All @@ -129,11 +136,11 @@ def test_percentile_cube(tmp_path):
def test_probability_cube(tmp_path):
"""Test probability metadata cube generated using using threshold
coordinate defined in the json input"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_probability.nc"
thresholds_path = kgo_dir / "thresholds.json"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--json-input",
thresholds_path,
"--output",
Expand All @@ -145,34 +152,44 @@ def test_probability_cube(tmp_path):

def test_height_levels(tmp_path):
"""Test metadata cube generated with height levels from json"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_height_levels.nc"
height_levels_path = kgo_dir / "height_levels.json"
output_path = tmp_path / "output.nc"
args = ["--json-input", height_levels_path, "--output", output_path]
args = [
mandatory_attributes_json,
"--json-input",
height_levels_path,
"--output",
output_path,
]
run_cli(args)
acc.compare(output_path, kgo_path)


def test_single_height_level(tmp_path):
"""Test metadata cube generation giving single value (rather than comma separated
list) for height levels option demotes height to scalar coordinate"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_single_height_level.nc"
height_level_path = kgo_dir / "single_height_level.json"
output_path = tmp_path / "output.nc"
args = ["--json-input", height_level_path, "--output", output_path]
args = [
mandatory_attributes_json,
"--json-input",
height_level_path,
"--output",
output_path,
]
run_cli(args)
acc.compare(output_path, kgo_path)


def test_pressure_levels(tmp_path):
"""Test metadata cube generated with pressure in Pa instead of height in metres"""
kgo_dir = acc.kgo_root() / "generate-metadata-cube"
kgo_path = kgo_dir / "kgo_pressure_levels.nc"
pressure_levels_path = kgo_dir / "pressure_levels.json"
output_path = tmp_path / "output.nc"
args = [
mandatory_attributes_json,
"--json-input",
pressure_levels_path,
"--output",
Expand Down
13 changes: 13 additions & 0 deletions improver_tests/standardise/test_StandardiseMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,19 @@ def test_metadata_changes(self):
self.assertDictEqual(result.attributes, expected_attributes)
self.assertNotIn("forecast_period", [coord.name() for coord in result.coords()])

def test_discard_cellmethod(self):
"""Test changes to cell_methods"""
cube = self.cube.copy()
cube.cell_methods = [
iris.coords.CellMethod(method="point", coords="time"),
iris.coords.CellMethod(method="max", coords="realization"),
]
result = self.plugin.process(cube,)
self.assertEqual(
result.cell_methods,
(iris.coords.CellMethod(method="max", coords="realization"),),
)

def test_float_deescalation(self):
"""Test precision de-escalation from float64 to float32"""
cube = self.cube.copy()
Expand Down
Loading

0 comments on commit 27b8d27

Please sign in to comment.