fixup

ray-chew · ray-chew · commit 640f5540d2ea · 2025-10-25T03:35:07.000-07:00
diff --git a/pycsa/core/fourier.py b/pycsa/core/fourier.py
@@ -312,7 +312,7 @@ def get_freq_grid(self, a_m):
             cos_terms = a_m[: len(self.k_idx)]
             sin_terms = a_m[len(self.k_idx) :]
 
-            fourier_coeff = np.zeros((nhar_i, nhar_j), dtype=np.complex_)
+            fourier_coeff = np.zeros((nhar_i, nhar_j), dtype=np.complex128)
 
             for cnt, (row, col) in enumerate(zip(self.k_idx, self.l_idx)):
                 fourier_coeff[row, col] = cos_terms[cnt] + 1.0j * sin_terms[cnt]
diff --git a/pycsa/core/io.py b/pycsa/core/io.py
@@ -6,6 +6,7 @@
 import numpy as np
 import h5py
 import os
+import threading
 
 from datetime import datetime
 from scipy import interpolate
@@ -149,6 +150,7 @@ def __init__(self, cell, params, verbose=False, is_parallel=False):
             self.verbose = verbose
             self.opened_dfs = []
             self.file_cache = {}  # Cache for opened NetCDF files: {filepath: Dataset}
+            self._cache_lock = threading.Lock()  # Thread-safe cache access
 
             self.fn_lon = np.array(
                 [
@@ -186,12 +188,14 @@ def _get_cached_file(self, filepath):
             """
             Get a cached NetCDF file handle, or open and cache it if not already open.
             This dramatically speeds up parallel processing by avoiding repeated file opens.
+            Thread-safe with locking to prevent race conditions.
             """
-            if filepath not in self.file_cache:
-                if self.verbose:
-                    print(f"Opening and caching: {filepath}")
-                self.file_cache[filepath] = nc.Dataset(filepath, "r")
-            return self.file_cache[filepath]
+            with self._cache_lock:
+                if filepath not in self.file_cache:
+                    if self.verbose:
+                        print(f"Opening and caching: {filepath}")
+                    self.file_cache[filepath] = nc.Dataset(filepath, "r")
+                return self.file_cache[filepath]
 
         def close_cached_files(self):
             """Close all cached NetCDF files."""
@@ -624,6 +628,7 @@ def __init__(self, cell, params, verbose=False, is_parallel=False):
             self.verbose = verbose
             self.opened_dfs = []
             self.file_cache = {}  # Cache for opened NetCDF files: {filepath: Dataset}
+            self._cache_lock = threading.Lock()  # Thread-safe cache access
 
             # ETOPO 2022 tiles are at 15 degree intervals
             self.fn_lon = np.array([
@@ -648,36 +653,34 @@ def _get_cached_file(self, filepath):
             Get a cached NetCDF file handle, or open and cache if not already open.
             This dramatically speeds up parallel processing by avoiding repeated file opens.
 
-            Uses diskless=True to avoid HDF5 file locking issues in parallel/distributed environments.
+            Thread-safe with locking. Uses standard file mode (not diskless) to avoid
+            memory explosion when multiple threads load large files simultaneously.
             """
-            if filepath not in self.file_cache:
-                if self.verbose:
-                    print(f"Opening and caching: {filepath}")
-
-                import time
-                max_retries = 3
-                retry_delay = 0.5
-
-                for attempt in range(max_retries):
-                    try:
-                        # diskless=True loads file into memory, avoiding HDF5 multiprocess locking issues
-                        self.file_cache[filepath] = nc.Dataset(filepath, "r", diskless=True, persist=False)
-                        break
-                    except (OSError, RuntimeError, TypeError) as e:
-                        if attempt < max_retries - 1:
-                            # Retry with exponential backoff
-                            if self.verbose:
-                                print(f"Warning: Attempt {attempt+1} failed for {filepath}, retrying: {e}")
-                            time.sleep(retry_delay * (2 ** attempt))
-                        else:
-                            # Final attempt: try without diskless
-                            if self.verbose:
-                                print(f"Warning: diskless mode failed after {max_retries} attempts, trying normal mode: {e}")
-                            try:
-                                self.file_cache[filepath] = nc.Dataset(filepath, "r")
-                            except Exception as e2:
-                                raise RuntimeError(f"Failed to open {filepath} with both diskless and normal modes: {e2}")
-            return self.file_cache[filepath]
+            with self._cache_lock:
+                if filepath not in self.file_cache:
+                    if self.verbose:
+                        print(f"Opening and caching: {filepath}")
+
+                    import time
+                    max_retries = 3
+                    retry_delay = 0.5
+
+                    for attempt in range(max_retries):
+                        try:
+                            # Use standard file access (not diskless) to avoid memory issues
+                            # with multiple threads loading 60GB files into RAM simultaneously.
+                            # The OS file cache handles caching efficiently across threads.
+                            self.file_cache[filepath] = nc.Dataset(filepath, "r")
+                            break
+                        except (OSError, RuntimeError, TypeError) as e:
+                            if attempt < max_retries - 1:
+                                # Retry with exponential backoff
+                                if self.verbose:
+                                    print(f"Warning: Attempt {attempt+1} failed for {filepath}, retrying: {e}")
+                                time.sleep(retry_delay * (2 ** attempt))
+                            else:
+                                raise RuntimeError(f"Failed to open {filepath} after {max_retries} attempts: {e}")
+                return self.file_cache[filepath]
 
         def close_cached_files(self):
             """Close all cached NetCDF files."""