From 03d14741e8fe9129aad210a28b1686861dc51cb5 Mon Sep 17 00:00:00 2001 From: David Orme Date: Thu, 27 Jun 2024 11:43:04 +0100 Subject: [PATCH] Turning on single-threaded handling in dask --- virtual_ecosystem/core/data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/virtual_ecosystem/core/data.py b/virtual_ecosystem/core/data.py index fa9c06fbd..bb0409357 100644 --- a/virtual_ecosystem/core/data.py +++ b/virtual_ecosystem/core/data.py @@ -123,6 +123,7 @@ from pathlib import Path from typing import Any +import dask import numpy as np from xarray import DataArray, Dataset, open_mfdataset @@ -133,6 +134,16 @@ from virtual_ecosystem.core.readers import load_to_dataarray from virtual_ecosystem.core.utils import check_outfile +# There are ongoing xarray issues with NetCDF not being thread safe and this causes +# segfaults on different architectures in testing using `xarray.open_mfdataset` +# See: +# - https://github.com/pydata/xarray/issues/7079 +# - https://github.com/pydata/xarray/issues/3961 +# +# Following advice on both those issues, we currently explicitly stop dask from trying +# to use parallel file processing and use open_mfdataset(..., lock=False) +dask.config.set(scheduler="single-threaded") + class Data: """The Virtual Ecosystem data object.