Skip to content

Commit ffb4ae7

Browse files
committed
fixup
1 parent 81c5713 commit ffb4ae7

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

pycsa/core/io.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,11 +647,36 @@ def _get_cached_file(self, filepath):
647647
"""
648648
Get a cached NetCDF file handle, or open and cache if not already open.
649649
This dramatically speeds up parallel processing by avoiding repeated file opens.
650+
651+
Uses diskless=True to avoid HDF5 file locking issues in parallel/distributed environments.
650652
"""
651653
if filepath not in self.file_cache:
652654
if self.verbose:
653655
print(f"Opening and caching: {filepath}")
654-
self.file_cache[filepath] = nc.Dataset(filepath, "r")
656+
657+
import time
658+
max_retries = 3
659+
retry_delay = 0.5
660+
661+
for attempt in range(max_retries):
662+
try:
663+
# diskless=True loads file into memory, avoiding HDF5 multiprocess locking issues
664+
self.file_cache[filepath] = nc.Dataset(filepath, "r", diskless=True, persist=False)
665+
break
666+
except (OSError, RuntimeError, TypeError) as e:
667+
if attempt < max_retries - 1:
668+
# Retry with exponential backoff
669+
if self.verbose:
670+
print(f"Warning: Attempt {attempt+1} failed for {filepath}, retrying: {e}")
671+
time.sleep(retry_delay * (2 ** attempt))
672+
else:
673+
# Final attempt: try without diskless
674+
if self.verbose:
675+
print(f"Warning: diskless mode failed after {max_retries} attempts, trying normal mode: {e}")
676+
try:
677+
self.file_cache[filepath] = nc.Dataset(filepath, "r")
678+
except Exception as e2:
679+
raise RuntimeError(f"Failed to open {filepath} with both diskless and normal modes: {e2}")
655680
return self.file_cache[filepath]
656681

657682
def close_cached_files(self):

0 commit comments

Comments
 (0)