Skip to content

Commit 94d487c

Browse files
committed
(#13) Threadsafe I/O support
1 parent a5d1cb0 commit 94d487c

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

runs/icon_etopo_global.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -543,20 +543,23 @@ def parallel_wrapper(grid, params, reader, writer, chunk_output_dir, clat_rad, c
543543
'total_cores': 12, # Conservative: use 12 of 16 threads
544544
'total_memory_gb': 12.0,
545545
'netcdf_chunk_size': 100,
546+
'threads_per_worker': 1, # Set to None for auto-compute
546547
'memory_per_cpu_mb': None, # Will calculate dynamically
547548
'description': 'Generic laptop (16 threads, 16GB RAM)'
548549
},
549550
'dkrz_hpc': {
550551
'total_cores': 250,
551552
'total_memory_gb': 240.0,
552553
'netcdf_chunk_size': 100,
554+
'threads_per_worker': None, # Auto-compute based on worker memory
553555
'memory_per_cpu_mb': None, # SLURM quota on interactive partition
554556
'description': 'DKRZ HPC interactive partition (standard memory node)'
555557
},
556558
'laptop_performance': {
557559
'total_cores': 20, # Use 20 of 24 threads (leave 4 for background)
558560
'total_memory_gb': 80.0,
559561
'netcdf_chunk_size': 100,
562+
'threads_per_worker': None, # Auto-compute based on worker memory
560563
'memory_per_cpu_mb': None, # Will calculate dynamically
561564
'description': 'AMD Ryzen AI 9 HX 370 (24 threads, 94GB RAM)'
562565
}
@@ -654,11 +657,15 @@ def parallel_wrapper(grid, params, reader, writer, chunk_output_dir, clat_rad, c
654657
logger.info(f" Available cores: {total_cores}")
655658
logger.info(f" Available memory: {total_memory_gb} GB")
656659
logger.info(f" NetCDF chunk size: {netcdf_chunk_size} cells")
660+
661+
# Threading configuration display
662+
if config['threads_per_worker'] is not None:
663+
logger.info(f" Threading mode: MANUAL (threads_per_worker = {config['threads_per_worker']})")
664+
else:
665+
logger.info(f" Threading mode: AUTO (will compute based on worker count)")
666+
657667
if config['memory_per_cpu_mb'] is not None:
658668
logger.info(f" SLURM quota: {config['memory_per_cpu_mb']} MB per CPU")
659-
logger.info(f" Mode: HPC (threads scale with worker memory)")
660-
else:
661-
logger.info(f" Mode: Laptop (threads distributed evenly)")
662669
logger.info("=" * 80)
663670

664671
# Group cells by memory requirements for dynamic worker allocation
@@ -744,15 +751,29 @@ def parallel_wrapper(grid, params, reader, writer, chunk_output_dir, clat_rad, c
744751
n_workers = batch_config['n_workers']
745752
memory_per_worker = f"{int(batch_config['memory_per_worker_gb'])}GB"
746753

747-
# CRITICAL: threads_per_worker MUST be 1 because HDF5 is not thread-safe
748-
# HDF5 was not compiled with --enable-threadsafe on this system.
749-
# Even opening different NetCDF files from different threads causes crashes.
750-
# Use more workers instead of threads for parallelism.
751-
threads_per_worker = 1
754+
# ============================================================
755+
# THREADS PER WORKER CONFIGURATION
756+
# ============================================================
757+
# If threads_per_worker is explicitly set in config, use that value
758+
# Otherwise, auto-compute based on available cores and workers
759+
if config['threads_per_worker'] is not None:
760+
threads_per_worker = config['threads_per_worker']
761+
logger.info(f"\n Using manual threads_per_worker: {threads_per_worker}")
762+
else:
763+
# Auto-compute: distribute available cores among workers
764+
# Reserve at least 1 thread per worker, and cap at reasonable maximum
765+
threads_per_worker = max(1, min(4, total_cores // n_workers))
766+
logger.info(f"\n Auto-computed threads_per_worker: {threads_per_worker}")
767+
logger.info(f" (Based on {total_cores} cores / {n_workers} workers)")
768+
769+
# Note: Thread-safe HDF5 is required for threads_per_worker > 1
770+
# Verify with: python3 -c "import netCDF4; print(netCDF4.__hdf5libversion__)"
771+
# ============================================================
752772

753773
logger.info(f"\n Starting Dask client for memory batch {mem_batch_idx}:")
754774
logger.info(f" Workers: {n_workers} × {memory_per_worker}")
755775
logger.info(f" Threads per worker: {threads_per_worker}")
776+
logger.info(f" Total parallel threads: {n_workers * threads_per_worker}")
756777
logger.info(f" Expected memory per cell: {batch_config['memory_per_cell_gb']:.1f} GB")
757778

758779
client = Client(

0 commit comments

Comments
 (0)