Skip to content

Commit 2247a29

Browse files
Optimize functions mask_landsea(), mask_landseaice() and calculate_volume() for lazy input (#2515)
Co-authored-by: Bouwe Andela <b.andela@esciencecenter.nl>
1 parent a35d50d commit 2247a29

File tree

7 files changed

+324
-129
lines changed

7 files changed

+324
-129
lines changed

esmvalcore/preprocessor/_mask.py

Lines changed: 91 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,33 @@
88

99
import logging
1010
import os
11+
from collections.abc import Iterable
12+
from typing import Literal, Optional
1113

1214
import cartopy.io.shapereader as shpreader
1315
import dask.array as da
1416
import iris
17+
import iris.util
1518
import numpy as np
1619
import shapely.vectorized as shp_vect
1720
from iris.analysis import Aggregator
21+
from iris.cube import Cube
1822
from iris.util import rolling_window
1923

24+
from esmvalcore.preprocessor._shared import get_array_module
25+
2026
from ._supplementary_vars import register_supplementaries
2127

2228
logger = logging.getLogger(__name__)
2329

2430

25-
def _get_fx_mask(fx_data, fx_option, mask_type):
31+
def _get_fx_mask(
32+
fx_data: np.ndarray | da.Array,
33+
fx_option: Literal['land', 'sea', 'landsea', 'ice'],
34+
mask_type: Literal['sftlf', 'sftof', 'sftgif'],
35+
) -> np.ndarray | da.Array:
2636
"""Build a percentage-thresholded mask from an fx file."""
27-
inmask = da.zeros_like(fx_data, bool)
37+
inmask = np.zeros_like(fx_data, bool) # respects dask through dispatch
2838
if mask_type == 'sftlf':
2939
if fx_option == 'land':
3040
# Mask land out
@@ -50,22 +60,29 @@ def _get_fx_mask(fx_data, fx_option, mask_type):
5060
return inmask
5161

5262

53-
def _apply_fx_mask(fx_mask, var_data):
54-
"""Apply the fx data extracted mask on the actual processed data."""
55-
# Apply mask across
56-
old_mask = da.ma.getmaskarray(var_data)
57-
mask = old_mask | fx_mask
58-
var_data = da.ma.masked_array(var_data, mask=mask)
59-
# maybe fill_value=1e+20
60-
61-
return var_data
63+
def _apply_mask(
64+
mask: np.ndarray | da.Array,
65+
array: np.ndarray | da.Array,
66+
dim_map: Optional[Iterable[int]] = None,
67+
) -> np.ndarray | da.Array:
68+
"""Apply a (broadcasted) mask on an array."""
69+
npx = get_array_module(mask, array)
70+
if dim_map is not None:
71+
if isinstance(array, da.Array):
72+
chunks = array.chunks
73+
else:
74+
chunks = None
75+
mask = iris.util.broadcast_to_shape(
76+
mask, array.shape, dim_map, chunks=chunks
77+
)
78+
return npx.ma.masked_where(mask, array)
6279

6380

6481
@register_supplementaries(
6582
variables=['sftlf', 'sftof'],
6683
required='prefer_at_least_one',
6784
)
68-
def mask_landsea(cube, mask_out):
85+
def mask_landsea(cube: Cube, mask_out: Literal['land', 'sea']) -> Cube:
6986
"""Mask out either land mass or sea (oceans, seas and lakes).
7087
7188
It uses dedicated ancillary variables (sftlf or sftof) or,
@@ -78,16 +95,15 @@ def mask_landsea(cube, mask_out):
7895
7996
Parameters
8097
----------
81-
cube: iris.cube.Cube
82-
data cube to be masked. If the cube has an
98+
cube:
99+
Data cube to be masked. If the cube has an
83100
:class:`iris.coords.AncillaryVariable` with standard name
84101
``'land_area_fraction'`` or ``'sea_area_fraction'`` that will be used.
85102
If both are present, only the 'land_area_fraction' will be used. If the
86103
ancillary variable is not available, the mask will be calculated from
87104
Natural Earth shapefiles.
88-
89-
mask_out: str
90-
either "land" to mask out land mass or "sea" to mask out seas.
105+
mask_out:
106+
Either ``'land'`` to mask out land mass or ``'sea'`` to mask out seas.
91107
92108
Returns
93109
-------
@@ -112,35 +128,40 @@ def mask_landsea(cube, mask_out):
112128
}
113129

114130
# preserve importance order: try stflf first then sftof
115-
fx_cube = None
131+
ancillary_var = None
116132
try:
117-
fx_cube = cube.ancillary_variable('land_area_fraction')
133+
ancillary_var = cube.ancillary_variable('land_area_fraction')
118134
except iris.exceptions.AncillaryVariableNotFoundError:
119135
try:
120-
fx_cube = cube.ancillary_variable('sea_area_fraction')
136+
ancillary_var = cube.ancillary_variable('sea_area_fraction')
121137
except iris.exceptions.AncillaryVariableNotFoundError:
122-
logger.debug('Ancillary variables land/sea area fraction not '
123-
'found in cube. Check fx_file availability.')
124-
125-
if fx_cube:
126-
fx_cube_data = da.broadcast_to(fx_cube.core_data(), cube.shape)
127-
landsea_mask = _get_fx_mask(fx_cube_data, mask_out,
128-
fx_cube.var_name)
129-
cube.data = _apply_fx_mask(landsea_mask, cube.core_data())
130-
logger.debug("Applying land-sea mask: %s", fx_cube.var_name)
138+
logger.debug(
139+
"Ancillary variables land/sea area fraction not found in "
140+
"cube. Check fx_file availability."
141+
)
142+
143+
if ancillary_var:
144+
landsea_mask = _get_fx_mask(
145+
ancillary_var.core_data(), mask_out, ancillary_var.var_name
146+
)
147+
cube.data = _apply_mask(
148+
landsea_mask,
149+
cube.core_data(),
150+
cube.ancillary_variable_dims(ancillary_var),
151+
)
152+
logger.debug("Applying land-sea mask: %s", ancillary_var.var_name)
131153
else:
132154
if cube.coord('longitude').points.ndim < 2:
133-
cube = _mask_with_shp(cube, shapefiles[mask_out], [
134-
0,
135-
])
155+
cube = _mask_with_shp(cube, shapefiles[mask_out], [0])
136156
logger.debug(
137157
"Applying land-sea mask from Natural Earth shapefile: \n%s",
138158
shapefiles[mask_out],
139159
)
140160
else:
141-
msg = ("Use of shapefiles with irregular grids not yet "
142-
"implemented, land-sea mask not applied.")
143-
raise ValueError(msg)
161+
raise ValueError(
162+
"Use of shapefiles with irregular grids not yet implemented, "
163+
"land-sea mask not applied."
164+
)
144165

145166
return cube
146167

@@ -149,7 +170,7 @@ def mask_landsea(cube, mask_out):
149170
variables=['sftgif'],
150171
required='require_at_least_one',
151172
)
152-
def mask_landseaice(cube, mask_out):
173+
def mask_landseaice(cube: Cube, mask_out: Literal['landsea', 'ice']) -> Cube:
153174
"""Mask out either landsea (combined) or ice.
154175
155176
Function that masks out either landsea (land and seas) or ice (Antarctica,
@@ -159,13 +180,13 @@ def mask_landseaice(cube, mask_out):
159180
160181
Parameters
161182
----------
162-
cube: iris.cube.Cube
163-
data cube to be masked. It should have an
183+
cube:
184+
Data cube to be masked. It should have an
164185
:class:`iris.coords.AncillaryVariable` with standard name
165186
``'land_ice_area_fraction'``.
166-
167187
mask_out: str
168-
either "landsea" to mask out landsea or "ice" to mask out ice.
188+
Either ``'landsea'`` to mask out land and oceans or ``'ice'`` to mask
189+
out ice.
169190
170191
Returns
171192
-------
@@ -178,20 +199,26 @@ def mask_landseaice(cube, mask_out):
178199
Error raised if landsea-ice mask not found as an ancillary variable.
179200
"""
180201
# sftgif is the only one so far but users can set others
181-
fx_cube = None
202+
ancillary_var = None
182203
try:
183-
fx_cube = cube.ancillary_variable('land_ice_area_fraction')
204+
ancillary_var = cube.ancillary_variable('land_ice_area_fraction')
184205
except iris.exceptions.AncillaryVariableNotFoundError:
185-
logger.debug('Ancillary variable land ice area fraction '
186-
'not found in cube. Check fx_file availability.')
187-
if fx_cube:
188-
fx_cube_data = da.broadcast_to(fx_cube.core_data(), cube.shape)
189-
landice_mask = _get_fx_mask(fx_cube_data, mask_out, fx_cube.var_name)
190-
cube.data = _apply_fx_mask(landice_mask, cube.core_data())
206+
logger.debug(
207+
"Ancillary variable land ice area fraction not found in cube. "
208+
"Check fx_file availability."
209+
)
210+
if ancillary_var:
211+
landseaice_mask = _get_fx_mask(
212+
ancillary_var.core_data(), mask_out, ancillary_var.var_name
213+
)
214+
cube.data = _apply_mask(
215+
landseaice_mask,
216+
cube.core_data(),
217+
cube.ancillary_variable_dims(ancillary_var),
218+
)
191219
logger.debug("Applying landsea-ice mask: sftgif")
192220
else:
193-
msg = "Landsea-ice mask could not be found. Stopping. "
194-
raise ValueError(msg)
221+
raise ValueError("Landsea-ice mask could not be found. Stopping.")
195222

196223
return cube
197224

@@ -285,9 +312,10 @@ def _mask_with_shp(cube, shapefilename, region_indices=None):
285312
# Create a set of x,y points from the cube
286313
# 1D regular grids
287314
if cube.coord('longitude').points.ndim < 2:
288-
x_p, y_p = da.meshgrid(
315+
x_p, y_p = np.meshgrid(
289316
cube.coord(axis='X').points,
290-
cube.coord(axis='Y').points)
317+
cube.coord(axis='Y').points,
318+
)
291319
# 2D irregular grids; spit an error for now
292320
else:
293321
msg = ("No fx-files found (sftlf or sftof)!"
@@ -296,14 +324,14 @@ def _mask_with_shp(cube, shapefilename, region_indices=None):
296324
raise ValueError(msg)
297325

298326
# Wrap around longitude coordinate to match data
299-
x_p_180 = da.where(x_p >= 180., x_p - 360., x_p)
327+
x_p_180 = np.where(x_p >= 180., x_p - 360., x_p)
300328

301329
# the NE mask has no points at x = -180 and y = +/-90
302330
# so we will fool it and apply the mask at (-179, -89, 89) instead
303-
x_p_180 = da.where(x_p_180 == -180., x_p_180 + 1., x_p_180)
331+
x_p_180 = np.where(x_p_180 == -180., x_p_180 + 1., x_p_180)
304332

305-
y_p_0 = da.where(y_p == -90., y_p + 1., y_p)
306-
y_p_90 = da.where(y_p_0 == 90., y_p_0 - 1., y_p_0)
333+
y_p_0 = np.where(y_p == -90., y_p + 1., y_p)
334+
y_p_90 = np.where(y_p_0 == 90., y_p_0 - 1., y_p_0)
307335

308336
mask = None
309337
for region in regions:
@@ -313,13 +341,14 @@ def _mask_with_shp(cube, shapefilename, region_indices=None):
313341
else:
314342
mask |= shp_vect.contains(region, x_p_180, y_p_90)
315343

316-
mask = da.array(mask)
317-
iris.util.broadcast_to_shape(mask, cube.shape, cube.coord_dims('latitude')
318-
+ cube.coord_dims('longitude'))
344+
if cube.has_lazy_data():
345+
mask = da.array(mask)
319346

320-
old_mask = da.ma.getmaskarray(cube.core_data())
321-
mask = old_mask | mask
322-
cube.data = da.ma.masked_array(cube.core_data(), mask=mask)
347+
cube.data = _apply_mask(
348+
mask,
349+
cube.core_data(),
350+
cube.coord_dims('latitude') + cube.coord_dims('longitude'),
351+
)
323352

324353
return cube
325354

esmvalcore/preprocessor/_shared.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,11 @@ def get_weights(
329329

330330
# Time weights: lengths of time interval
331331
if 'time' in coords:
332-
weights *= broadcast_to_shape(
332+
weights = weights * broadcast_to_shape(
333333
npx.array(get_time_weights(cube)),
334334
cube.shape,
335335
cube.coord_dims('time'),
336+
chunks=cube.lazy_data().chunks if cube.has_lazy_data() else None,
336337
)
337338

338339
# Latitude weights: cell areas
@@ -350,10 +351,17 @@ def get_weights(
350351
f"variable)"
351352
)
352353
try_adding_calculated_cell_area(cube)
353-
weights *= broadcast_to_shape(
354-
cube.cell_measure('cell_area').core_data(),
354+
area_weights = cube.cell_measure('cell_area').core_data()
355+
if cube.has_lazy_data():
356+
area_weights = da.array(area_weights)
357+
chunks = cube.lazy_data().chunks
358+
else:
359+
chunks = None
360+
weights = weights * broadcast_to_shape(
361+
area_weights,
355362
cube.shape,
356363
cube.cell_measure_dims('cell_area'),
364+
chunks=chunks,
357365
)
358366

359367
return weights

esmvalcore/preprocessor/_volume.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,19 +161,22 @@ def calculate_volume(cube: Cube) -> da.core.Array:
161161
try_adding_calculated_cell_area(cube)
162162
area = cube.cell_measure('cell_area').copy()
163163
area_dim = cube.cell_measure_dims(area)
164-
165-
# Ensure cell area is in square meters as the units
166164
area.convert_units('m2')
165+
area_array = area.core_data()
166+
if cube.has_lazy_data():
167+
area_array = da.array(area_array)
167168

168169
# Make sure input cube has not been modified
169170
if not has_cell_measure:
170171
cube.remove_cell_measure('cell_area')
171172

172173
chunks = cube.core_data().chunks if cube.has_lazy_data() else None
173174
area_arr = broadcast_to_shape(
174-
area.core_data(), cube.shape, area_dim, chunks=chunks)
175+
area_array, cube.shape, area_dim, chunks=chunks
176+
)
175177
thickness_arr = broadcast_to_shape(
176-
thickness, cube.shape, z_dim, chunks=chunks)
178+
thickness, cube.shape, z_dim, chunks=chunks
179+
)
177180
grid_volume = area_arr * thickness_arr
178181

179182
return grid_volume

0 commit comments

Comments
 (0)