12
12
from pathlib import Path
13
13
from typing import Any , Iterator , Sequence , Union
14
14
15
+ import dask
15
16
from iris .cube import Cube
16
17
17
18
from esmvalcore import esgf , local
@@ -79,6 +80,10 @@ def _ismatch(facet_value: FacetValue, pattern: FacetValue) -> bool:
79
80
and fnmatchcase (facet_value , pattern ))
80
81
81
82
83
+ def _first (elems ):
84
+ return elems [0 ]
85
+
86
+
82
87
class Dataset :
83
88
"""Define datasets, find the related files, and load them.
84
89
@@ -664,9 +669,19 @@ def files(self) -> Sequence[File]:
664
669
def files (self , value ):
665
670
self ._files = value
666
671
667
- def load (self ) -> Cube :
672
+ def load (self , compute = True ) -> Cube :
668
673
"""Load dataset.
669
674
675
+ Parameters
676
+ ----------
677
+ compute:
678
+ If :obj:`True`, return the cube immediately. If :obj:`False`,
679
+ return a :class:`~dask.delayed.Delayed` object that can be used
680
+ to load the cube by calling its
681
+ :func:`~dask.delayed.Delayed.compute` method. Multiple datasets
682
+ can be loaded in parallel by passing a list of such delayeds
683
+ to :func:`dask.compute`.
684
+
670
685
Raises
671
686
------
672
687
InputFilesNotFound
@@ -689,7 +704,7 @@ def load(self) -> Cube:
689
704
supplementary_cubes .append (supplementary_cube )
690
705
691
706
output_file = _get_output_file (self .facets , self .session .preproc_dir )
692
- cubes = preprocess (
707
+ cubes = dask . delayed ( preprocess ) (
693
708
[cube ],
694
709
'add_supplementary_variables' ,
695
710
input_files = input_files ,
@@ -698,7 +713,10 @@ def load(self) -> Cube:
698
713
supplementary_cubes = supplementary_cubes ,
699
714
)
700
715
701
- return cubes [0 ]
716
+ cube = dask .delayed (_first )(cubes )
717
+ if compute :
718
+ return cube .compute ()
719
+ return cube
702
720
703
721
def _load (self ) -> Cube :
704
722
"""Load self.files into an iris cube and return it."""
@@ -763,21 +781,61 @@ def _load(self) -> Cube:
763
781
'short_name' : self .facets ['short_name' ],
764
782
}
765
783
766
- result = [
784
+ input_files = [
767
785
file .local_file (self .session ['download_dir' ]) if isinstance (
768
786
file , esgf .ESGFFile ) else file for file in self .files
769
787
]
770
- for step , kwargs in settings .items ():
771
- result = preprocess (
788
+
789
+ debug = self .session ['save_intermediary_cubes' ]
790
+
791
+ result = []
792
+ for input_file in input_files :
793
+ files = dask .delayed (preprocess )(
794
+ [input_file ],
795
+ 'fix_file' ,
796
+ input_files = [input_file ],
797
+ output_file = output_file ,
798
+ debug = debug ,
799
+ ** settings ['fix_file' ],
800
+ )
801
+ cubes = dask .delayed (preprocess )(
802
+ files ,
803
+ 'load' ,
804
+ input_files = [input_file ],
805
+ output_file = output_file ,
806
+ debug = debug ,
807
+ ** settings ['load' ],
808
+ )
809
+ cubes = dask .delayed (preprocess )(
810
+ cubes ,
811
+ 'fix_metadata' ,
812
+ input_files = [input_file ],
813
+ output_file = output_file ,
814
+ debug = debug ,
815
+ ** settings ['fix_metadata' ],
816
+ )
817
+ cube = dask .delayed (_first )(cubes )
818
+ result .append (cube )
819
+
820
+ result = dask .delayed (preprocess )(
821
+ result ,
822
+ 'concatenate' ,
823
+ input_files = input_files ,
824
+ output_file = output_file ,
825
+ debug = debug ,
826
+ ** settings ['concatenate' ],
827
+ )
828
+ for step , kwargs in dict (tuple (settings .items ())[4 :]).items ():
829
+ result = dask .delayed (preprocess )(
772
830
result ,
773
831
step ,
774
- input_files = self . files ,
832
+ input_files = input_files ,
775
833
output_file = output_file ,
776
- debug = self . session [ 'save_intermediary_cubes' ] ,
834
+ debug = debug ,
777
835
** kwargs ,
778
836
)
779
837
780
- cube = result [ 0 ]
838
+ cube = dask . delayed ( _first )( result )
781
839
return cube
782
840
783
841
def from_ranges (self ) -> list ['Dataset' ]:
0 commit comments