SciTools · bjlittle · Oct 17, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 10, 2025
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -40,6 +40,10 @@ This document explains the changes made to Iris for this release
    :func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib``
    compression. (:issue:`6565`, :pull:`6728`)
 
+#. `@ukmo-ccbunney`_ added a new :class:`~iris.util.CMLSettings` class to control
+   the formatting of Cube CML output via a context manager.
+   (:issue:`6244`, :pull:`6743`)
+
 
 🐛 Bugs Fixed
 =============
@@ -109,9 +113,12 @@ This document explains the changes made to Iris for this release
 #. `@melissaKG`_ upgraded Iris' tests to no longer use the deprecated
    ``git whatchanged`` command. (:pull:`6672`)
 
-#. `@ukmo-ccbunney` merged functionality of ``assert_CML_approx_data`` into
+#. `@ukmo-ccbunney`_ merged functionality of ``assert_CML_approx_data`` into
    ``assert_CML`` via the use of a new ``approx_data`` keyword. (:pull:`6713`)
 
+#. `@ukmo-ccbunney`_ ``assert_CML`` now uses stricter array formatting to avoid
+   changes in tests due to Numpy version changes. (:pull:`6743`)
+
 
 .. comment
     Whatsnew author names (@github name) in alphabetical order. Note that,
@@ -124,4 +131,4 @@ This document explains the changes made to Iris for this release
 .. comment
     Whatsnew resources in alphabetical order:
 
-.. _netcdf-c#3183: https://github.com/Unidata/netcdf-c/issues/3183
+.. _netcdf-c#3183: https://github.com/Unidata/netcdf-c/issues/3183
diff --git a/lib/iris/coords.py b/lib/iris/coords.py
@@ -32,6 +32,7 @@
 import iris.exceptions
 import iris.time
 import iris.util
+from iris.util import CML_SETTINGS
 import iris.warnings
 
 #: The default value for ignore_axis which controls guess_coord_axis' behaviour
@@ -853,10 +854,45 @@ def xml_element(self, doc):
             if self.coord_system:
                 element.appendChild(self.coord_system.xml_element(doc))
 
+        is_masked_array = np.ma.isMaskedArray(self._values)
+
         # Add the values
         element.setAttribute("value_type", str(self._value_type_name()))
         element.setAttribute("shape", str(self.shape))
 
+        # data checksum
+        if CML_SETTINGS.coord_checksum:
+            crc = iris.util.array_checksum(self._values)
+            element.setAttribute("checksum", crc)
+
+            if is_masked_array:
+                # Add the number of masked elements
+                if np.ma.is_masked(self._values):
+                    crc = iris.util.array_checksum(self._values.mask)
+                else:
+                    crc = "no-masked-elements"
+                element.setAttribute("mask_checksum", crc)
+
+        # array ordering:
+        def _order(array):
+            order = ""
+            if array.flags["C_CONTIGUOUS"]:
+                order = "C"
+            elif array.flags["F_CONTIGUOUS"]:
+                order = "F"
+            return order
+
+        if CML_SETTINGS.coord_order:
+            element.setAttribute("order", _order(self._values))
+            if is_masked_array:
+                element.setAttribute("mask_order", _order(self._values.mask))
+
+        # masked element count:
+        if CML_SETTINGS.masked_value_count and is_masked_array:
+            element.setAttribute(
+                "masked_count", str(np.count_nonzero(self._values.mask))
+            )
+
         # The values are referred to "points" of a coordinate and "data"
         # otherwise.
         if isinstance(self, Coord):
@@ -865,7 +901,31 @@ def xml_element(self, doc):
             values_term = "indices"
         else:
             values_term = "data"
-        element.setAttribute(values_term, self._xml_array_repr(self._values))
+        element.setAttribute(
+            values_term,
+            self._xml_array_repr(self._values),
+        )
+
+        if iris.util.CML_SETTINGS.coord_data_array_stats and len(self._values) > 1:
+            data = self._values
+
+            if np.issubdtype(data.dtype.type, np.number):
+                data_min = data.min()
+                data_max = data.max()
+                if data_min == data_max:
+                    # When data is constant, std() is too sensitive.
+                    data_std = 0
+                else:
+                    data_std = data.std()
+
+                stats_xml_element = doc.createElement("stats")
+                stats_xml_element.setAttribute("std", str(data_std))
+                stats_xml_element.setAttribute("min", str(data_min))
+                stats_xml_element.setAttribute("max", str(data_max))
+                stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
+                stats_xml_element.setAttribute("mean", str(data.mean()))
+
+                element.appendChild(stats_xml_element)
 
         return element
 
@@ -896,7 +956,11 @@ def _xml_array_repr(data):
         if hasattr(data, "to_xml_attr"):
             result = data._values.to_xml_attr()
         else:
-            result = iris.util.format_array(data)
+            edgeitems = CML_SETTINGS.array_edgeitems
+            if CML_SETTINGS.numpy_formatting:
+                result = iris.util.format_array(data, edgeitems=edgeitems)
+            else:
+                result = iris.util.array_summary(data, edgeitems=edgeitems)
         return result
 
     def _value_type_name(self):
@@ -2565,7 +2629,10 @@ def xml_element(self, doc):
 
         # Add bounds, points are handled by the parent class.
         if self.has_bounds():
-            element.setAttribute("bounds", self._xml_array_repr(self.bounds))
+            element.setAttribute(
+                "bounds",
+                self._xml_array_repr(self.bounds),
+            )
 
         return element
 

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -22,7 +22,6 @@
 from typing import TYPE_CHECKING, Any, Optional, TypeGuard
 import warnings
 from xml.dom.minidom import Document
-import zlib
 
 from cf_units import Unit
 import dask.array as da
@@ -56,6 +55,7 @@
     from iris.mesh import MeshCoord
 import iris.exceptions
 import iris.util
+from iris.util import CML_SETTINGS
 import iris.warnings
 
 __all__ = ["Cube", "CubeAttrsDict", "CubeList"]
@@ -171,7 +171,10 @@ def insert(self, index, cube):
         super(CubeList, self).insert(index, cube)
 
     def xml(self, checksum=False, order=True, byteorder=True):
-        """Return a string of the XML that this list of cubes represents."""
+        """Return a string of the XML that this list of cubes represents.
+
+        See :func:`iris.util.CML_SETTINGS.set` for controlling the XML output formatting.
+        """
         with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
             doc = Document()
             cubes_xml_element = doc.createElement("cubes")
@@ -3902,12 +3905,29 @@ def xml(
         order: bool = True,
         byteorder: bool = True,
     ) -> str:
-        """Return a fully valid CubeML string representation of the Cube."""
+        """Return a fully valid CubeML string representation of the Cube.
+
+        The format of the generated XML can be controlled using the
+        ``iris.util.CML_SETTINGS.set`` method as a context manager.
+
+        For example, to include array statistics for the coordinate data:
+
+        .. code-block:: python
+
+            with CML_SETTINGS.set(coord_data_array_stats=True):
+                print(cube.xml())
+
+        See :func:`iris.util.CML_SETTINGS.set` for more details.
+
+        """
         with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
             doc = Document()
 
             cube_xml_element = self._xml_element(
-                doc, checksum=checksum, order=order, byteorder=byteorder
+                doc,
+                checksum=checksum,
+                order=order,
+                byteorder=byteorder,
             )
             cube_xml_element.setAttribute("xmlns", XML_NAMESPACE_URI)
             doc.appendChild(cube_xml_element)
@@ -3916,7 +3936,13 @@ def xml(
             doc = self._sort_xml_attrs(doc)
             return iris.util._print_xml(doc)
 
-    def _xml_element(self, doc, checksum=False, order=True, byteorder=True):
+    def _xml_element(
+        self,
+        doc,
+        checksum=False,
+        order=True,
+        byteorder=True,
+    ):
         cube_xml_element = doc.createElement("cube")
 
         if self.standard_name:
@@ -4006,39 +4032,46 @@ def dimmeta_xml_element(element, typename, dimscall):
         data_xml_element = doc.createElement("data")
         data_xml_element.setAttribute("shape", str(self.shape))
 
-        # NB. Getting a checksum triggers any deferred loading,
+        # NB. Getting a checksum or data stats triggers any deferred loading,
         # in which case it also has the side-effect of forcing the
         # byte order to be native.
+
         if checksum:
             data = self.data
-
-            # Ensure consistent memory layout for checksums.
-            def normalise(data):
-                data = np.ascontiguousarray(data)
-                if data.dtype.newbyteorder("<") != data.dtype:
-                    data = data.byteswap(False)
-                    data.dtype = data.dtype.newbyteorder("<")
-                return data
-
+            crc = iris.util.array_checksum(data)
+            data_xml_element.setAttribute("checksum", crc)
             if ma.isMaskedArray(data):
-                # Fill in masked values to avoid the checksum being
-                # sensitive to unused numbers. Use a fixed value so
-                # a change in fill_value doesn't affect the
-                # checksum.
-                crc = "0x%08x" % (zlib.crc32(normalise(data.filled(0))) & 0xFFFFFFFF,)
-                data_xml_element.setAttribute("checksum", crc)
                 if ma.is_masked(data):
-                    crc = "0x%08x" % (zlib.crc32(normalise(data.mask)) & 0xFFFFFFFF,)
+                    crc = iris.util.array_checksum(data.mask)
                 else:
                     crc = "no-masked-elements"
                 data_xml_element.setAttribute("mask_checksum", crc)
+
+        if CML_SETTINGS.data_array_stats:
+            data = self.data
+            data_min = data.min()
+            data_max = data.max()
+            if data_min == data_max:
+                # When data is constant, std() is too sensitive.
+                data_std = 0
             else:
-                crc = "0x%08x" % (zlib.crc32(normalise(data)) & 0xFFFFFFFF,)
-                data_xml_element.setAttribute("checksum", crc)
-        elif self.has_lazy_data():
-            data_xml_element.setAttribute("state", "deferred")
-        else:
-            data_xml_element.setAttribute("state", "loaded")
+                data_std = data.std()
+
+            stats_xml_element = doc.createElement("stats")
+            stats_xml_element.setAttribute("std", str(data_std))
+            stats_xml_element.setAttribute("min", str(data_min))
+            stats_xml_element.setAttribute("max", str(data_max))
+            stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
+            stats_xml_element.setAttribute("mean", str(data.mean()))
+
+            data_xml_element.appendChild(stats_xml_element)
+
+        # We only print the "state" if we have not output checksum or data stats:
+        if not (checksum or CML_SETTINGS.data_array_stats):
+            if self.has_lazy_data():
+                data_xml_element.setAttribute("state", "deferred")
+            else:
+                data_xml_element.setAttribute("state", "loaded")
 
         # Add the dtype, and also the array and mask orders if the
         # data is loaded.
@@ -4065,8 +4098,14 @@ def _order(array):
                 if array_byteorder is not None:
                     data_xml_element.setAttribute("byteorder", array_byteorder)
 
-            if order and ma.isMaskedArray(data):
-                data_xml_element.setAttribute("mask_order", _order(data.mask))
+            if ma.isMaskedArray(data):
+                if CML_SETTINGS.masked_value_count:
+                    data_xml_element.setAttribute(
+                        "masked_count", str(np.count_nonzero(data.mask))
+                    )
+                if order:
+                    data_xml_element.setAttribute("mask_order", _order(data.mask))
+
         else:
             dtype = self.lazy_data().dtype
         data_xml_element.setAttribute("dtype", dtype.name)

diff --git a/lib/iris/tests/_shared_utils.py b/lib/iris/tests/_shared_utils.py
@@ -366,8 +366,10 @@ def assert_CML(
     request: pytest.FixtureRequest,
     cubes,
     reference_filename=None,
-    checksum=True,
     approx_data=False,
+    checksum=True,
+    coord_checksum=None,
+    numpy_formatting=None,
     **kwargs,
 ):
     """Test that the CML for the given cubes matches the contents of
@@ -379,6 +381,9 @@ def assert_CML(
     The data payload of individual cubes is not compared unless ``checksum``
     or ``approx_data`` are True.
 
+    Further control of the CML formatting can be made using the
+    :data:`iris.util.CML_SETTINGS` context manager.
+
     Notes
     -----
     The ``approx_data`` keyword provides functionality equivalent to the
@@ -393,20 +398,28 @@ def assert_CML(
         A pytest ``request`` fixture passed down from the calling test. Is
         required by :func:`result_path`. See :func:`result_path` Examples
         for how to access the ``request`` fixture.
-    cubes :
+    cubes : iris.cube.Cube or iris.cube.CubeList
         Either a Cube or a sequence of Cubes.
     reference_filename : optional, default=None
         The relative path (relative to the test results directory).
         If omitted, the result is generated from the calling
         method's name, class, and module using
         :meth:`iris.tests.IrisTest.result_path`.
-    checksum : bool, optional
-        When True, causes the CML to include a checksum for each
-        Cube's data. Defaults to True.
     approx_data : bool, optional, default=False
         When True, the cube's data will be compared with the reference
         data and asserted to be within a specified tolerance. Implies
         ``checksum=False``.
+    checksum : bool, optional, default=True
+        When True, causes the CML to include a checksum for each
+        Cube's data. Defaults to True.
+    coord_checksum : bool, optional, default=True
+        When True, causes the CML to include a checksum for each
+        Cube's coordinate data. Defaults to True.
+    numpy_formatting : bool, optional, default=False
+        When True, causes the CML to use numpy-style formatting for
+        array data. When False, uses simplified array formatting
+        that doesn't rely on Numpy's ``arr2string`` formatter.
+        Defaults to False.
 
     """
     _check_for_request_fixture(request, "assert_CML")
@@ -417,20 +430,31 @@ def assert_CML(
         reference_filename = result_path(request, None, "cml")
     # Note: reference_path could be a tuple of path parts
     reference_path = get_result_path(reference_filename)
+
+    # default CML output options for tests:
+    extra_format_options = {"numpy_formatting": False, "coord_checksum": True}
+    # update formatting opts with keywords passed into this function:
+    for k in extra_format_options.keys():
+        if (user_opt := locals()[k]) is not None:
+            extra_format_options[k] = user_opt
+
     if approx_data:
-        # compare data payload stats against known good stats
-        checksum = False  # ensure we are not comparing data checksums
+        # compare data payload stats against known good stats.
+        # Make sure options that compare exact data are disabled:
+        checksum = False
+        extra_format_options["data_array_stats"] = False
+
         for i, cube in enumerate(cubes):
             # Build the json stats filename based on CML file path:
             fname = reference_path.removesuffix(".cml")
             fname += f".data.{i}.json"
             assert_data_almost_equal(cube.data, fname, **kwargs)
-    if isinstance(cubes, (list, tuple)):
+
+    with iris.util.CML_SETTINGS.set(**extra_format_options):
         cml = iris.cube.CubeList(cubes).xml(
             checksum=checksum, order=False, byteorder=False
         )
-    else:
-        cml = cubes.xml(checksum=checksum, order=False, byteorder=False)
+
     _check_same(cml, reference_path)