-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
When opening datasets via xr.open_dataset(..., engine="hidefix") concurrently from asyncio using loop.run_in_executor(), the process segfaults. The issue is concurrency-sensitive (fails when C>1) and does not reproduce with other engines (e.g., h5netcdf).
Environment
- OS: macOS 24.6.0
- Architecture: ARM64 (Apple M4)
- Python: 3.11.14 (cpython-3.11.14-macos-aarch64)
- hidefix: 0.12.0
- xarray: 2025.10.1
- h5netcdf: 1.7.3
Minimal Repro:
mre_hidefix_asyncio_engine.py
"""
MRE: hidefix + xarray + asyncio WITHOUT Dash/Flask
Usage:
uv run scripts/mre_hidefix_asyncio.py --n 10 --c 4
This script:
1) Creates a tiny NetCDF file
2) Runs N batches; each batch does C concurrent opens using xarray with
the hidefix backend, loading then closing the dataset
3) Exits (observe whether a segfault or resource_tracker warnings occur)
"""
from __future__ import annotations
import argparse
import asyncio
import os
import faulthandler
from typing import Optional
import numpy as np
import xarray as xr
HERE = os.path.dirname(os.path.abspath(__file__))
NC_PATH = os.path.join(HERE, "mre_hidefix_asyncio.nc")
def create_nc_file(path: str) -> str:
data = np.arange(10, dtype=np.float32)
ds = xr.Dataset({"a": ("x", data)}, coords={"x": np.arange(10)})
engine: Optional[str] = None
try:
engine = "h5netcdf"
ds.to_netcdf(path, engine=engine)
except Exception:
engine = "scipy"
ds.to_netcdf(path, engine=engine)
finally:
ds.close()
print(f"Created NetCDF at {path} using engine={engine}")
return path
def open_with_hidefix() -> None:
ds = xr.open_dataset(NC_PATH, engine="hidefix")
try:
ds.load()
finally:
ds.close()
async def async_open_with_hidefix() -> None:
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, open_with_hidefix)
async def run_batches(n: int, c: int) -> None:
for i in range(n):
await asyncio.gather(*[async_open_with_hidefix() for _ in range(c)])
if (i + 1) % 5 == 0:
print(f"Completed batch {i+1}/{n}")
def main() -> None:
faulthandler.enable()
parser = argparse.ArgumentParser()
parser.add_argument("--n", type=int, default=10, help="number of batches")
parser.add_argument("--c", type=int, default=2, help="concurrency per batch")
args = parser.parse_args()
create_nc_file(NC_PATH)
print(f"PID: {os.getpid()}")
print(f"Running N={args.n}, C={args.c} ...")
asyncio.run(run_batches(args.n, args.c))
print("Done; exiting now.")
if __name__ == "__main__":
main()Crash:
uv run mre_hidefix_asyncio_engine.py --engine hidefix --n 1 --c 2
Exit: 139 (SIGSEGV)
Stable controls:
uv run mre_hidefix_asyncio_engine.py --engine h5netcdf --n 1 --c 2
uv run mre_hidefix_asyncio_engine.py --engine hidefix --n 1 --c 2
Exit: 0
Stack trace excerpt:
Fatal Python error: Segmentation fault
Current thread 0x... (most recent call first):
File ".../site-packages/hidefix/xarray.py", line 98 in __init__
File ".../site-packages/hidefix/xarray.py", line 115 in open
File ".../site-packages/hidefix/xarray.py", line 64 in open_dataset
File ".../xarray/backends/api.py", line 606 in open_dataset
File "mre_hidefix_asyncio_engine.py", line 40 in open_with_engine
...
Command exited with code 139
Metadata
Metadata
Assignees
Labels
No labels