Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e1b7112
- New array_summary utility to format numpy arrays in CML.
ukmo-ccbunney Oct 9, 2025
b937bc7
Add data stats to CML
ukmo-ccbunney Oct 9, 2025
d5f7431
Pass keyword options through xml call hierarchy
ukmo-ccbunney Oct 10, 2025
515ccb2
Replaced extra keywords in xml_element functions with settings in con…
ukmo-ccbunney Oct 10, 2025
48b7fae
Added context manager for controlling CML output and formatting
ukmo-ccbunney Oct 10, 2025
68c4ea8
Reinstated "no-masked-elements" crc output
ukmo-ccbunney Oct 10, 2025
f0791f1
Added docstring for `array_checksum`
ukmo-ccbunney Oct 14, 2025
050c065
Added `coord_order` option. Tidied up.
ukmo-ccbunney Oct 14, 2025
1eb276f
Updated CMLSettings.set ketword defaults to None. Now only updates
ukmo-ccbunney Oct 14, 2025
edc51da
Only strip trailing zeros for floats. Only output stats for > lenght 1
ukmo-ccbunney Oct 16, 2025
1c99b67
Turn off numpy-formatting for all CML output in tests
ukmo-ccbunney Oct 16, 2025
172e5a1
Added some CML formatting keywords to `_shared_utils.assert_CML`
ukmo-ccbunney Oct 16, 2025
95f3a42
Updated test results for new default CML formatting.
ukmo-ccbunney Oct 16, 2025
7791944
New cube XML tests to covert new formatting options
ukmo-ccbunney Oct 16, 2025
c66847f
Update docstring for Cube.xml and CubeList.xml. Also
ukmo-ccbunney Oct 16, 2025
1793214
Added whatsnew
ukmo-ccbunney Oct 16, 2025
9c380a6
Typo in doctest
ukmo-ccbunney Oct 16, 2025
e42c113
Fix doc tests (switched to using code-block)
ukmo-ccbunney Oct 16, 2025
50aff0c
Merge branch 'main' into cml_array_formatting
ukmo-ccbunney Oct 17, 2025
f89384c
Update docs/src/whatsnew/latest.rst
ukmo-ccbunney Oct 17, 2025
ba76a83
Update lib/iris/tests/_shared_utils.py
ukmo-ccbunney Oct 17, 2025
bdc9740
Update lib/iris/util.py
ukmo-ccbunney Oct 17, 2025
218b842
Update lib/iris/tests/_shared_utils.py
ukmo-ccbunney Oct 17, 2025
8783cf4
Added typing for CMLSettings data attributes
ukmo-ccbunney Oct 17, 2025
bb3b125
Update lib/iris/util.py
ukmo-ccbunney Oct 17, 2025
51d7cc7
Update lib/iris/util.py
ukmo-ccbunney Oct 17, 2025
bf67d39
Put `numpy.typing.ArrayLike` in `TYPE_CHECKING` block
ukmo-ccbunney Oct 17, 2025
2b75c14
Factored out `fixed_std` nested function in cube.py and coords.py
ukmo-ccbunney Oct 17, 2025
7dda361
Fix broken username link
ukmo-ccbunney Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
11 changes: 9 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ This document explains the changes made to Iris for this release
:func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib``
compression. (:issue:`6565`, :pull:`6728`)

#. `@ukmo-ccbunney`_ added a new :class:`~iris.util.CMLSettings` class to control
the formatting of Cube CML output via a context manager.
(:issue:`6244`, :pull:`6743`)


🐛 Bugs Fixed
=============
Expand Down Expand Up @@ -109,9 +113,12 @@ This document explains the changes made to Iris for this release
#. `@melissaKG`_ upgraded Iris' tests to no longer use the deprecated
``git whatchanged`` command. (:pull:`6672`)

#. `@ukmo-ccbunney` merged functionality of ``assert_CML_approx_data`` into
#. `@ukmo-ccbunney`_ merged functionality of ``assert_CML_approx_data`` into
``assert_CML`` via the use of a new ``approx_data`` keyword. (:pull:`6713`)

#. `@ukmo-ccbunney`_ ``assert_CML`` now uses stricter array formatting to avoid
changes in tests due to Numpy version changes. (:pull:`6743`)


.. comment
Whatsnew author names (@github name) in alphabetical order. Note that,
Expand All @@ -124,4 +131,4 @@ This document explains the changes made to Iris for this release
.. comment
Whatsnew resources in alphabetical order:

.. _netcdf-c#3183: https://github.com/Unidata/netcdf-c/issues/3183
.. _netcdf-c#3183: https://github.com/Unidata/netcdf-c/issues/3183
73 changes: 70 additions & 3 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import iris.exceptions
import iris.time
import iris.util
from iris.util import CML_SETTINGS
import iris.warnings

#: The default value for ignore_axis which controls guess_coord_axis' behaviour
Expand Down Expand Up @@ -853,10 +854,45 @@ def xml_element(self, doc):
if self.coord_system:
element.appendChild(self.coord_system.xml_element(doc))

is_masked_array = np.ma.isMaskedArray(self._values)

# Add the values
element.setAttribute("value_type", str(self._value_type_name()))
element.setAttribute("shape", str(self.shape))

# data checksum
if CML_SETTINGS.coord_checksum:
crc = iris.util.array_checksum(self._values)
element.setAttribute("checksum", crc)

if is_masked_array:
# Add the number of masked elements
if np.ma.is_masked(self._values):
crc = iris.util.array_checksum(self._values.mask)
else:
crc = "no-masked-elements"
element.setAttribute("mask_checksum", crc)

# array ordering:
def _order(array):
order = ""
if array.flags["C_CONTIGUOUS"]:
order = "C"
elif array.flags["F_CONTIGUOUS"]:
order = "F"
return order

if CML_SETTINGS.coord_order:
element.setAttribute("order", _order(self._values))
if is_masked_array:
element.setAttribute("mask_order", _order(self._values.mask))

# masked element count:
if CML_SETTINGS.masked_value_count and is_masked_array:
element.setAttribute(
"masked_count", str(np.count_nonzero(self._values.mask))
)

# The values are referred to "points" of a coordinate and "data"
# otherwise.
if isinstance(self, Coord):
Expand All @@ -865,7 +901,31 @@ def xml_element(self, doc):
values_term = "indices"
else:
values_term = "data"
element.setAttribute(values_term, self._xml_array_repr(self._values))
element.setAttribute(
values_term,
self._xml_array_repr(self._values),
)

if iris.util.CML_SETTINGS.coord_data_array_stats and len(self._values) > 1:
data = self._values

if np.issubdtype(data.dtype.type, np.number):
data_min = data.min()
data_max = data.max()
if data_min == data_max:
# When data is constant, std() is too sensitive.
data_std = 0
else:
data_std = data.std()

stats_xml_element = doc.createElement("stats")
stats_xml_element.setAttribute("std", str(data_std))
stats_xml_element.setAttribute("min", str(data_min))
stats_xml_element.setAttribute("max", str(data_max))
stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
stats_xml_element.setAttribute("mean", str(data.mean()))

element.appendChild(stats_xml_element)

return element

Expand Down Expand Up @@ -896,7 +956,11 @@ def _xml_array_repr(data):
if hasattr(data, "to_xml_attr"):
result = data._values.to_xml_attr()
else:
result = iris.util.format_array(data)
edgeitems = CML_SETTINGS.array_edgeitems
if CML_SETTINGS.numpy_formatting:
result = iris.util.format_array(data, edgeitems=edgeitems)
else:
result = iris.util.array_summary(data, edgeitems=edgeitems)
return result

def _value_type_name(self):
Expand Down Expand Up @@ -2565,7 +2629,10 @@ def xml_element(self, doc):

# Add bounds, points are handled by the parent class.
if self.has_bounds():
element.setAttribute("bounds", self._xml_array_repr(self.bounds))
element.setAttribute(
"bounds",
self._xml_array_repr(self.bounds),
)

return element

Expand Down
99 changes: 69 additions & 30 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from typing import TYPE_CHECKING, Any, Optional, TypeGuard
import warnings
from xml.dom.minidom import Document
import zlib

from cf_units import Unit
import dask.array as da
Expand Down Expand Up @@ -56,6 +55,7 @@
from iris.mesh import MeshCoord
import iris.exceptions
import iris.util
from iris.util import CML_SETTINGS
import iris.warnings

__all__ = ["Cube", "CubeAttrsDict", "CubeList"]
Expand Down Expand Up @@ -171,7 +171,10 @@ def insert(self, index, cube):
super(CubeList, self).insert(index, cube)

def xml(self, checksum=False, order=True, byteorder=True):
"""Return a string of the XML that this list of cubes represents."""
"""Return a string of the XML that this list of cubes represents.

See :func:`iris.util.CML_SETTINGS.set` for controlling the XML output formatting.
"""
with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
doc = Document()
cubes_xml_element = doc.createElement("cubes")
Expand Down Expand Up @@ -3902,12 +3905,29 @@ def xml(
order: bool = True,
byteorder: bool = True,
) -> str:
"""Return a fully valid CubeML string representation of the Cube."""
"""Return a fully valid CubeML string representation of the Cube.

The format of the generated XML can be controlled using the
``iris.util.CML_SETTINGS.set`` method as a context manager.

For example, to include array statistics for the coordinate data:

.. code-block:: python

with CML_SETTINGS.set(coord_data_array_stats=True):
print(cube.xml())

See :func:`iris.util.CML_SETTINGS.set` for more details.

"""
with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
doc = Document()

cube_xml_element = self._xml_element(
doc, checksum=checksum, order=order, byteorder=byteorder
doc,
checksum=checksum,
order=order,
byteorder=byteorder,
)
cube_xml_element.setAttribute("xmlns", XML_NAMESPACE_URI)
doc.appendChild(cube_xml_element)
Expand All @@ -3916,7 +3936,13 @@ def xml(
doc = self._sort_xml_attrs(doc)
return iris.util._print_xml(doc)

def _xml_element(self, doc, checksum=False, order=True, byteorder=True):
def _xml_element(
self,
doc,
checksum=False,
order=True,
byteorder=True,
):
cube_xml_element = doc.createElement("cube")

if self.standard_name:
Expand Down Expand Up @@ -4006,39 +4032,46 @@ def dimmeta_xml_element(element, typename, dimscall):
data_xml_element = doc.createElement("data")
data_xml_element.setAttribute("shape", str(self.shape))

# NB. Getting a checksum triggers any deferred loading,
# NB. Getting a checksum or data stats triggers any deferred loading,
# in which case it also has the side-effect of forcing the
# byte order to be native.

if checksum:
data = self.data

# Ensure consistent memory layout for checksums.
def normalise(data):
data = np.ascontiguousarray(data)
if data.dtype.newbyteorder("<") != data.dtype:
data = data.byteswap(False)
data.dtype = data.dtype.newbyteorder("<")
return data

crc = iris.util.array_checksum(data)
data_xml_element.setAttribute("checksum", crc)
if ma.isMaskedArray(data):
# Fill in masked values to avoid the checksum being
# sensitive to unused numbers. Use a fixed value so
# a change in fill_value doesn't affect the
# checksum.
crc = "0x%08x" % (zlib.crc32(normalise(data.filled(0))) & 0xFFFFFFFF,)
data_xml_element.setAttribute("checksum", crc)
if ma.is_masked(data):
crc = "0x%08x" % (zlib.crc32(normalise(data.mask)) & 0xFFFFFFFF,)
crc = iris.util.array_checksum(data.mask)
else:
crc = "no-masked-elements"
data_xml_element.setAttribute("mask_checksum", crc)

if CML_SETTINGS.data_array_stats:
data = self.data
data_min = data.min()
data_max = data.max()
if data_min == data_max:
# When data is constant, std() is too sensitive.
data_std = 0
else:
crc = "0x%08x" % (zlib.crc32(normalise(data)) & 0xFFFFFFFF,)
data_xml_element.setAttribute("checksum", crc)
elif self.has_lazy_data():
data_xml_element.setAttribute("state", "deferred")
else:
data_xml_element.setAttribute("state", "loaded")
data_std = data.std()

stats_xml_element = doc.createElement("stats")
stats_xml_element.setAttribute("std", str(data_std))
stats_xml_element.setAttribute("min", str(data_min))
stats_xml_element.setAttribute("max", str(data_max))
stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
stats_xml_element.setAttribute("mean", str(data.mean()))

data_xml_element.appendChild(stats_xml_element)

# We only print the "state" if we have not output checksum or data stats:
if not (checksum or CML_SETTINGS.data_array_stats):
if self.has_lazy_data():
data_xml_element.setAttribute("state", "deferred")
else:
data_xml_element.setAttribute("state", "loaded")

# Add the dtype, and also the array and mask orders if the
# data is loaded.
Expand All @@ -4065,8 +4098,14 @@ def _order(array):
if array_byteorder is not None:
data_xml_element.setAttribute("byteorder", array_byteorder)

if order and ma.isMaskedArray(data):
data_xml_element.setAttribute("mask_order", _order(data.mask))
if ma.isMaskedArray(data):
if CML_SETTINGS.masked_value_count:
data_xml_element.setAttribute(
"masked_count", str(np.count_nonzero(data.mask))
)
if order:
data_xml_element.setAttribute("mask_order", _order(data.mask))

else:
dtype = self.lazy_data().dtype
data_xml_element.setAttribute("dtype", dtype.name)
Expand Down
44 changes: 34 additions & 10 deletions lib/iris/tests/_shared_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,10 @@ def assert_CML(
request: pytest.FixtureRequest,
cubes,
reference_filename=None,
checksum=True,
approx_data=False,
checksum=True,
coord_checksum=None,
numpy_formatting=None,
**kwargs,
):
"""Test that the CML for the given cubes matches the contents of
Expand All @@ -379,6 +381,9 @@ def assert_CML(
The data payload of individual cubes is not compared unless ``checksum``
or ``approx_data`` are True.

Further control of the CML formatting can be made using the
:data:`iris.util.CML_SETTINGS` context manager.

Notes
-----
The ``approx_data`` keyword provides functionality equivalent to the
Expand All @@ -393,20 +398,28 @@ def assert_CML(
A pytest ``request`` fixture passed down from the calling test. Is
required by :func:`result_path`. See :func:`result_path` Examples
for how to access the ``request`` fixture.
cubes :
cubes : iris.cube.Cube or iris.cube.CubeList
Either a Cube or a sequence of Cubes.
reference_filename : optional, default=None
The relative path (relative to the test results directory).
If omitted, the result is generated from the calling
method's name, class, and module using
:meth:`iris.tests.IrisTest.result_path`.
checksum : bool, optional
When True, causes the CML to include a checksum for each
Cube's data. Defaults to True.
approx_data : bool, optional, default=False
When True, the cube's data will be compared with the reference
data and asserted to be within a specified tolerance. Implies
``checksum=False``.
checksum : bool, optional, default=True
When True, causes the CML to include a checksum for each
Cube's data. Defaults to True.
coord_checksum : bool, optional, default=True
When True, causes the CML to include a checksum for each
Cube's coordinate data. Defaults to True.
numpy_formatting : bool, optional, default=False
When True, causes the CML to use numpy-style formatting for
array data. When False, uses simplified array formatting
that doesn't rely on Numpy's ``arr2string`` formatter.
Defaults to False.

"""
_check_for_request_fixture(request, "assert_CML")
Expand All @@ -417,20 +430,31 @@ def assert_CML(
reference_filename = result_path(request, None, "cml")
# Note: reference_path could be a tuple of path parts
reference_path = get_result_path(reference_filename)

# default CML output options for tests:
extra_format_options = {"numpy_formatting": False, "coord_checksum": True}
# update formatting opts with keywords passed into this function:
for k in extra_format_options.keys():
if (user_opt := locals()[k]) is not None:
extra_format_options[k] = user_opt

if approx_data:
# compare data payload stats against known good stats
checksum = False # ensure we are not comparing data checksums
# compare data payload stats against known good stats.
# Make sure options that compare exact data are disabled:
checksum = False
extra_format_options["data_array_stats"] = False

for i, cube in enumerate(cubes):
# Build the json stats filename based on CML file path:
fname = reference_path.removesuffix(".cml")
fname += f".data.{i}.json"
assert_data_almost_equal(cube.data, fname, **kwargs)
if isinstance(cubes, (list, tuple)):

with iris.util.CML_SETTINGS.set(**extra_format_options):
cml = iris.cube.CubeList(cubes).xml(
checksum=checksum, order=False, byteorder=False
)
else:
cml = cubes.xml(checksum=checksum, order=False, byteorder=False)

_check_same(cml, reference_path)


Expand Down
Loading
Loading