Skip to content

Commit 56d24a9

Browse files
committed
Add test and improve documentation
1 parent 4c25b6e commit 56d24a9

File tree

3 files changed

+62
-43
lines changed

3 files changed

+62
-43
lines changed

doc/quickstart/find_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ unstructured grids, which is required by many software packages or tools to
401401
work correctly and specifically by Iris to interpret the grid as a
402402
:ref:`mesh <iris:ugrid>`.
403403
An example is the horizontal regridding of native ICON data to a regular grid.
404-
While the :ref:`built-in regridding schemes <built-in regridding schemes>`
404+
While the :ref:`built-in regridding schemes <default regridding schemes>`
405405
`linear` and `nearest` can handle unstructured grids (i.e., not UGRID-compliant) and meshes (i.e., UGRID-compliant),
406406
the `area_weighted` scheme requires the input data in UGRID format.
407407
This automatic UGRIDization is enabled by default, but can be switched off with

esmvalcore/dataset.py

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
import re
77
import textwrap
88
import uuid
9+
from collections.abc import Iterable
910
from copy import deepcopy
1011
from fnmatch import fnmatchcase
1112
from itertools import groupby
1213
from pathlib import Path
13-
from typing import Any, Iterator, Sequence, Union
14+
from typing import Any, Iterator, Sequence, TypeVar, Union
1415

1516
import dask
17+
from dask.delayed import Delayed
1618
from iris.cube import Cube
1719

1820
from esmvalcore import esgf, local
@@ -80,8 +82,12 @@ def _ismatch(facet_value: FacetValue, pattern: FacetValue) -> bool:
8082
and fnmatchcase(facet_value, pattern))
8183

8284

83-
def _first(elems):
84-
return elems[0]
85+
T = TypeVar('T')
86+
87+
88+
def _first(elems: Iterable[T]) -> T:
89+
"""Return the first element."""
90+
return next(iter(elems))
8591

8692

8793
class Dataset:
@@ -669,16 +675,16 @@ def files(self) -> Sequence[File]:
669675
def files(self, value):
670676
self._files = value
671677

672-
def load(self, compute=True) -> Cube:
678+
def load(self, compute=True) -> Cube | Delayed:
673679
"""Load dataset.
674680
675681
Parameters
676682
----------
677683
compute:
678-
If :obj:`True`, return the cube immediately. If :obj:`False`,
679-
return a :class:`~dask.delayed.Delayed` object that can be used
680-
to load the cube by calling its
681-
:func:`~dask.delayed.Delayed.compute` method. Multiple datasets
684+
If :obj:`True`, return the :class:`~iris.cube.Cube` immediately.
685+
If :obj:`False`, return a :class:`~dask.delayed.Delayed` object
686+
that can be used to load the cube by calling its
687+
:meth:`~dask.delayed.Delayed.compute` method. Multiple datasets
682688
can be loaded in parallel by passing a list of such delayeds
683689
to :func:`dask.compute`.
684690
@@ -731,7 +737,14 @@ def _load(self) -> Cube:
731737
msg = "\n".join(lines)
732738
raise InputFilesNotFound(msg)
733739

740+
input_files = [
741+
file.local_file(self.session['download_dir']) if isinstance(
742+
file, esgf.ESGFFile) else file for file in self.files
743+
]
734744
output_file = _get_output_file(self.facets, self.session.preproc_dir)
745+
debug = self.session['save_intermediary_cubes']
746+
747+
# Load all input files and concatenate them.
735748
fix_dir_prefix = Path(
736749
self.session._fixed_file_dir,
737750
self._get_joined_summary_facets('_', join_lists=True) + '_',
@@ -757,36 +770,6 @@ def _load(self) -> Cube:
757770
settings['concatenate'] = {
758771
'check_level': self.session['check_level']
759772
}
760-
settings['cmor_check_metadata'] = {
761-
'check_level': self.session['check_level'],
762-
'cmor_table': self.facets['project'],
763-
'mip': self.facets['mip'],
764-
'frequency': self.facets['frequency'],
765-
'short_name': self.facets['short_name'],
766-
}
767-
if 'timerange' in self.facets:
768-
settings['clip_timerange'] = {
769-
'timerange': self.facets['timerange'],
770-
}
771-
settings['fix_data'] = {
772-
'check_level': self.session['check_level'],
773-
'session': self.session,
774-
**self.facets,
775-
}
776-
settings['cmor_check_data'] = {
777-
'check_level': self.session['check_level'],
778-
'cmor_table': self.facets['project'],
779-
'mip': self.facets['mip'],
780-
'frequency': self.facets['frequency'],
781-
'short_name': self.facets['short_name'],
782-
}
783-
784-
input_files = [
785-
file.local_file(self.session['download_dir']) if isinstance(
786-
file, esgf.ESGFFile) else file for file in self.files
787-
]
788-
789-
debug = self.session['save_intermediary_cubes']
790773

791774
result = []
792775
for input_file in input_files:
@@ -798,6 +781,7 @@ def _load(self) -> Cube:
798781
debug=debug,
799782
**settings['fix_file'],
800783
)
784+
# Multiple cubes may be present in a file.
801785
cubes = dask.delayed(preprocess)(
802786
files,
803787
'load',
@@ -806,6 +790,7 @@ def _load(self) -> Cube:
806790
debug=debug,
807791
**settings['load'],
808792
)
793+
# Combine the cubes into a single cube per file.
809794
cubes = dask.delayed(preprocess)(
810795
cubes,
811796
'fix_metadata',
@@ -817,6 +802,7 @@ def _load(self) -> Cube:
817802
cube = dask.delayed(_first)(cubes)
818803
result.append(cube)
819804

805+
# Concatenate the cubes from all files.
820806
result = dask.delayed(preprocess)(
821807
result,
822808
'concatenate',
@@ -825,7 +811,34 @@ def _load(self) -> Cube:
825811
debug=debug,
826812
**settings['concatenate'],
827813
)
828-
for step, kwargs in dict(tuple(settings.items())[4:]).items():
814+
815+
# At this point `result` is a list containing a single cube. Apply the
816+
# remaining preprocessor functions to this cube.
817+
settings.clear()
818+
settings['cmor_check_metadata'] = {
819+
'check_level': self.session['check_level'],
820+
'cmor_table': self.facets['project'],
821+
'mip': self.facets['mip'],
822+
'frequency': self.facets['frequency'],
823+
'short_name': self.facets['short_name'],
824+
}
825+
if 'timerange' in self.facets:
826+
settings['clip_timerange'] = {
827+
'timerange': self.facets['timerange'],
828+
}
829+
settings['fix_data'] = {
830+
'check_level': self.session['check_level'],
831+
'session': self.session,
832+
**self.facets,
833+
}
834+
settings['cmor_check_data'] = {
835+
'check_level': self.session['check_level'],
836+
'cmor_table': self.facets['project'],
837+
'mip': self.facets['mip'],
838+
'frequency': self.facets['frequency'],
839+
'short_name': self.facets['short_name'],
840+
}
841+
for step, kwargs in settings.items():
829842
result = dask.delayed(preprocess)(
830843
result,
831844
step,

tests/integration/dataset/test_dataset.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import iris.coords
44
import iris.cube
55
import pytest
6+
from dask.delayed import Delayed
67

78
from esmvalcore.config import CFG
89
from esmvalcore.dataset import Dataset
@@ -34,7 +35,8 @@ def example_data(tmp_path, monkeypatch):
3435
monkeypatch.setitem(CFG, 'output_dir', tmp_path / 'output_dir')
3536

3637

37-
def test_load(example_data):
38+
@pytest.mark.parametrize('lazy', [True, False])
39+
def test_load(example_data, lazy):
3840
tas = Dataset(
3941
short_name='tas',
4042
mip='Amon',
@@ -51,7 +53,11 @@ def test_load(example_data):
5153
tas.find_files()
5254
print(tas.files)
5355

54-
cube = tas.load()
55-
56+
if lazy:
57+
result = tas.load(compute=False)
58+
assert isinstance(result, Delayed)
59+
cube = result.compute()
60+
else:
61+
cube = tas.load()
5662
assert isinstance(cube, iris.cube.Cube)
5763
assert cube.cell_measures()

0 commit comments

Comments
 (0)