Skip to content

Commit 1e34201

Browse files
Added cumulative sum preprocessor (#2642)
Co-authored-by: Bouwe Andela <b.andela@esciencecenter.nl>
1 parent 31fe0d4 commit 1e34201

File tree

12 files changed

+608
-177
lines changed

12 files changed

+608
-177
lines changed

doc/recipe/preprocessor.rst

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2870,8 +2870,46 @@ Other
28702870

28712871
Miscellaneous functions that do not belong to any of the other categories.
28722872

2873-
Clip
2874-
----
2873+
.. _cumulative_sum:
2874+
2875+
``cumulative_sum``
2876+
------------------
2877+
2878+
This function calculates cumulative sums along a given coordinate.
2879+
2880+
The ``cumulative_sum`` preprocessor supports the following arguments in the
2881+
recipe:
2882+
2883+
* ``coord`` (:obj:`str`): Coordinate over which the cumulative sum is
2884+
calculated.
2885+
Must be 0D or 1D.
2886+
* ``weights`` (array-like, :obj:`bool`, or ``None``, default: ``None``):
2887+
Weights for the calculation of the cumulative sum.
2888+
Each element in the data is multiplied by the corresponding weight before
2889+
summing.
2890+
Can be an array of the same shape as the input data, ``False`` or ``None``
2891+
(no weighting), or ``True`` (calculate the weights from the coordinate
2892+
bounds; only works if each coordinate point has exactly 2 bounds).
2893+
* ``method`` (:obj:`str`, default: ``"sequential"``): Method used to perform
2894+
the cumulative sum.
2895+
Only relevant if the cube has `lazy data
2896+
<https://scitools-iris.readthedocs.io/en/stable/userguide/real_and_lazy_data.html>`__.
2897+
See :func:`dask.array.cumsum` for details.
2898+
2899+
Example:
2900+
2901+
.. code-block:: yaml
2902+
2903+
preprocessors:
2904+
preproc_cumulative_sum:
2905+
cumulative_sum:
2906+
coord: time
2907+
weights: true
2908+
2909+
See also :func:`esmvalcore.preprocessor.cumulative_sum`.
2910+
2911+
``clip``
2912+
--------
28752913

28762914
This function clips data values to a certain minimum, maximum or range. The function takes two
28772915
arguments:
@@ -2892,7 +2930,7 @@ The example below shows how to set all values below zero to zero.
28922930
.. _histogram:
28932931

28942932
``histogram``
2895-
-------------------
2933+
-------------
28962934

28972935
This function calculates histograms.
28982936

esmvalcore/iris_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def add_leading_dim_to_cube(cube, dim_coord):
3939
4040
Raises
4141
------
42-
CoordinateMultiDimError
42+
iris.exceptions.CoordinateMultiDimError
4343
``dim_coord`` is not 1D.
4444
4545
"""

esmvalcore/preprocessor/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
mask_outside_range,
5050
)
5151
from ._multimodel import ensemble_statistics, multi_model_statistics
52-
from ._other import clip, histogram
52+
from ._other import clip, cumulative_sum, histogram
5353
from ._regrid import (
5454
extract_coordinate_points,
5555
extract_levels,
@@ -146,7 +146,8 @@
146146
# Other
147147
"clip",
148148
"rolling_window_statistics",
149-
# Region selection
149+
"cumulative_sum",
150+
# Region operations
150151
"extract_region",
151152
"extract_shape",
152153
"extract_volume",

esmvalcore/preprocessor/_other.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
import numpy as np
1414
from iris.coords import Coord, DimCoord
1515
from iris.cube import Cube
16+
from iris.exceptions import CoordinateMultiDimError
1617

1718
from esmvalcore.iris_helpers import rechunk_cube
1819
from esmvalcore.preprocessor._shared import (
1920
get_all_coord_dims,
2021
get_all_coords,
2122
get_array_module,
23+
get_coord_weights,
2224
get_weights,
2325
preserve_float_dtype,
2426
)
@@ -58,6 +60,84 @@ def clip(cube, minimum=None, maximum=None):
5860
return cube
5961

6062

63+
@preserve_float_dtype
64+
def cumulative_sum(
65+
cube: Cube,
66+
coord: Coord | str,
67+
weights: np.ndarray | da.Array | bool | None = None,
68+
method: Literal["sequential", "blelloch"] = "sequential",
69+
) -> Cube:
70+
"""Calculate cumulative sum of the elements along a given coordinate.
71+
72+
Parameters
73+
----------
74+
cube:
75+
Input cube.
76+
coord:
77+
Coordinate over which the cumulative sum is calculated. Must be 0D or
78+
1D.
79+
weights:
80+
Weights for the calculation of the cumulative sum. Each element in the
81+
data is multiplied by the corresponding weight before summing. Can be
82+
an array of the same shape as the input data, ``False`` or ``None`` (no
83+
weighting), or ``True`` (calculate the weights from the coordinate
84+
bounds; only works if each coordinate point has exactly 2 bounds).
85+
method:
86+
Method used to perform the cumulative sum. Only relevant if the cube
87+
has `lazy data
88+
<https://scitools-iris.readthedocs.io/en/stable/userguide/
89+
real_and_lazy_data.html>`__. See :func:`dask.array.cumsum` for details.
90+
91+
Returns
92+
-------
93+
Cube
94+
Cube of cumulative sum. Has same dimensions and coordinates of the
95+
input cube.
96+
97+
Raises
98+
------
99+
iris.exceptions.CoordinateMultiDimError
100+
``coord`` is not 0D or 1D.
101+
iris.exceptions.CoordinateNotFoundError
102+
``coord`` is not found in ``cube``.
103+
104+
"""
105+
cube = cube.copy()
106+
107+
# Only 0D and 1D coordinates are supported
108+
coord = cube.coord(coord)
109+
if coord.ndim > 1:
110+
raise CoordinateMultiDimError(coord)
111+
112+
# Weighting, make sure to adapt cube standard name and units in this case
113+
if weights is True:
114+
weights = get_coord_weights(cube, coord, broadcast=True)
115+
if isinstance(weights, (np.ndarray, da.Array)):
116+
cube.data = cube.core_data() * weights
117+
cube.standard_name = None
118+
cube.units = cube.units * coord.units
119+
120+
axes = get_all_coord_dims(cube, [coord])
121+
122+
# For 0D coordinates, cumulative_sum is a no-op (this aligns with
123+
# numpy's/dask's behavior)
124+
if axes:
125+
if cube.has_lazy_data():
126+
cube.data = da.cumsum(
127+
cube.core_data(), axis=axes[0], method=method
128+
)
129+
else:
130+
cube.data = np.cumsum(cube.core_data(), axis=axes[0])
131+
132+
# Adapt cube metadata
133+
if cube.var_name is not None:
134+
cube.var_name = f"cumulative_{cube.var_name}"
135+
if cube.long_name is not None:
136+
cube.long_name = f"Cumulative {cube.long_name}"
137+
138+
return cube
139+
140+
61141
@preserve_float_dtype
62142
def histogram(
63143
cube: Cube,
@@ -133,8 +213,10 @@ def histogram(
133213
Invalid `normalization` or `bin_range` given or `bin_range` is ``None``
134214
and data is fully masked.
135215
iris.exceptions.CoordinateNotFoundError
136-
`longitude` is not found in cube if `weights=True`, `latitude` is in
137-
`coords`, and no `cell_area` is given as
216+
A given coordinate of ``coords`` is not found in ``cube``.
217+
iris.exceptions.CoordinateNotFoundError
218+
`longitude` is not found in cube if ``weights=True``, `latitude` is in
219+
``coords``, and no `cell_area` is given as
138220
:ref:`supplementary_variables`.
139221
140222
"""

esmvalcore/preprocessor/_shared.py

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -301,15 +301,11 @@ def get_weights(
301301
"""Calculate suitable weights for given coordinates."""
302302
npx = get_array_module(cube.core_data())
303303
weights = npx.ones_like(cube.core_data())
304+
coords = [c.name() if hasattr(c, "name") else c for c in coords]
304305

305306
# Time weights: lengths of time interval
306307
if "time" in coords:
307-
weights = weights * broadcast_to_shape(
308-
npx.array(get_time_weights(cube)),
309-
cube.shape,
310-
cube.coord_dims("time"),
311-
chunks=cube.lazy_data().chunks if cube.has_lazy_data() else None,
312-
)
308+
weights = weights * get_coord_weights(cube, "time", broadcast=True)
313309

314310
# Latitude weights: cell areas
315311
if "latitude" in coords:
@@ -319,9 +315,8 @@ def get_weights(
319315
):
320316
raise CoordinateNotFoundError(
321317
f"Cube {cube.summary(shorten=True)} needs a `longitude` "
322-
f"coordinate to calculate cell area weights for weighted "
323-
f"distance metric over coordinates {coords} (alternatively, "
324-
f"a `cell_area` can be given to the cube as supplementary "
318+
f"coordinate to calculate cell area weights (alternatively, a "
319+
f"`cell_area` can be given to the cube as supplementary "
325320
f"variable)"
326321
)
327322
try_adding_calculated_cell_area(cube)
@@ -341,43 +336,74 @@ def get_weights(
341336
return weights
342337

343338

344-
def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array:
345-
"""Compute the weighting of the time axis.
339+
def get_coord_weights(
340+
cube: Cube,
341+
coord: str | Coord,
342+
broadcast: bool = False,
343+
) -> np.ndarray | da.core.Array:
344+
"""Compute weighting for an arbitrary coordinate.
345+
346+
Weights are calculated as the difference between the upper and lower
347+
bounds.
346348
347349
Parameters
348350
----------
349351
cube:
350352
Input cube.
353+
coord:
354+
Coordinate which is used to calculate the weights. Must have bounds
355+
array with 2 bounds per point.
356+
broadcast:
357+
If ``False``, weights have the shape of ``coord``. If ``True``,
358+
broadcast weights to shape of cube.
351359
352360
Returns
353361
-------
354362
np.ndarray or da.Array
355-
Array of time weights for averaging. Returns a
356-
:class:`dask.array.Array` if the input cube has lazy data; a
357-
:class:`numpy.ndarray` otherwise.
363+
Array of axis weights. Returns a :class:`dask.array.Array` if the input
364+
cube has lazy data; a :class:`numpy.ndarray` otherwise.
358365
359366
"""
360-
time = cube.coord("time")
361-
coord_dims = cube.coord_dims("time")
367+
coord = cube.coord(coord)
368+
coord_dims = cube.coord_dims(coord)
362369

363-
# Multidimensional time coordinates are not supported: In this case,
364-
# weights cannot be simply calculated as difference between the bounds
365-
if len(coord_dims) > 1:
370+
# Coordinate needs bounds of size 2
371+
if not coord.has_bounds():
372+
raise ValueError(
373+
f"Cannot calculate weights for coordinate '{coord.name()}' "
374+
f"without bounds"
375+
)
376+
if coord.core_bounds().shape[-1] != 2:
366377
raise ValueError(
367-
f"Weighted statistical operations are not supported for "
368-
f"{len(coord_dims):d}D time coordinates, expected 0D or 1D"
378+
f"Cannot calculate weights for coordinate '{coord.name()}' "
379+
f"with {coord.core_bounds().shape[-1]} bounds per point, expected "
380+
f"2 bounds per point"
369381
)
370382

371-
# Extract 1D time weights (= lengths of time intervals)
372-
time_weights = time.lazy_bounds()[:, 1] - time.lazy_bounds()[:, 0]
373-
if cube.has_lazy_data():
374-
# Align the weight chunks with the data chunks to avoid excessively
375-
# large chunks as a result of broadcasting.
376-
time_chunks = cube.lazy_data().chunks[coord_dims[0]]
377-
time_weights = time_weights.rechunk(time_chunks)
378-
else:
379-
time_weights = time_weights.compute()
380-
return time_weights
383+
# Calculate weights of same shape as coordinate and make sure to use
384+
# identical chunks as parent cube for non-scalar lazy data
385+
weights = np.abs(coord.lazy_bounds()[:, 1] - coord.lazy_bounds()[:, 0])
386+
if cube.has_lazy_data() and coord_dims:
387+
coord_chunks = tuple(cube.lazy_data().chunks[d] for d in coord_dims)
388+
weights = weights.rechunk(coord_chunks)
389+
if not cube.has_lazy_data():
390+
weights = weights.compute()
391+
392+
# Broadcast to cube shape if desired; scalar arrays needs special treatment
393+
# since iris.broadcast_to_shape cannot handle this
394+
if broadcast:
395+
chunks = cube.lazy_data().chunks if cube.has_lazy_data() else None
396+
if coord_dims:
397+
weights = broadcast_to_shape(
398+
weights, cube.shape, coord_dims, chunks=chunks
399+
)
400+
else:
401+
if cube.has_lazy_data():
402+
weights = da.broadcast_to(weights, cube.shape, chunks=chunks)
403+
else:
404+
weights = np.broadcast_to(weights, cube.shape)
405+
406+
return weights
381407

382408

383409
def try_adding_calculated_cell_area(cube: Cube) -> None:

esmvalcore/preprocessor/_time.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
from esmvalcore.cmor.fixes import get_next_month, get_time_bounds
3535
from esmvalcore.iris_helpers import date2num, rechunk_cube
3636
from esmvalcore.preprocessor._shared import (
37+
get_coord_weights,
3738
get_iris_aggregator,
38-
get_time_weights,
3939
preserve_float_dtype,
4040
update_weights_kwargs,
4141
)
@@ -867,7 +867,7 @@ def climate_statistics(
867867
def _add_time_weights_coord(cube):
868868
"""Add time weight coordinate to cube (in-place)."""
869869
time_weights_coord = AuxCoord(
870-
get_time_weights(cube),
870+
get_coord_weights(cube, "time"),
871871
long_name="_time_weights_",
872872
units=cube.coord("time").units,
873873
)

esmvalcore/preprocessor/_volume.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717
from iris.cube import Cube
1818
from iris.util import broadcast_to_shape
1919

20-
from ._shared import (
20+
from esmvalcore.preprocessor._shared import (
21+
get_coord_weights,
2122
get_iris_aggregator,
2223
get_normalized_cube,
2324
preserve_float_dtype,
2425
try_adding_calculated_cell_area,
2526
update_weights_kwargs,
2627
)
27-
from ._supplementary_vars import register_supplementaries
28+
from esmvalcore.preprocessor._supplementary_vars import (
29+
register_supplementaries,
30+
)
2831

2932
logger = logging.getLogger(__name__)
3033

@@ -379,7 +382,6 @@ def axis_statistics(
379382
cube,
380383
_add_axis_stats_weights_coord,
381384
coord=coord,
382-
coord_dims=coord_dims,
383385
)
384386

385387
with warnings.catch_warnings():
@@ -406,19 +408,15 @@ def axis_statistics(
406408
return result
407409

408410

409-
def _add_axis_stats_weights_coord(cube, coord, coord_dims):
411+
def _add_axis_stats_weights_coord(cube, coord):
410412
"""Add weights for axis_statistics to cube (in-place)."""
411-
weights = np.abs(coord.lazy_bounds()[:, 1] - coord.lazy_bounds()[:, 0])
412-
if cube.has_lazy_data():
413-
coord_chunks = tuple(cube.lazy_data().chunks[d] for d in coord_dims)
414-
weights = weights.rechunk(coord_chunks)
415-
else:
416-
weights = weights.compute()
413+
weights = get_coord_weights(cube, coord)
417414
weights_coord = AuxCoord(
418415
weights,
419416
long_name="_axis_statistics_weights_",
420417
units=coord.units,
421418
)
419+
coord_dims = cube.coord_dims(coord)
422420
cube.add_aux_coord(weights_coord, coord_dims)
423421

424422

0 commit comments

Comments
 (0)