@@ -569,21 +569,30 @@ def calc_prediction(self, input_state=None, save_prediction=None):
569569 en_pred = [self .sim .run_fwd_sim (state , member_index ) for state , member_index in
570570 tqdm (zip (list_state , list_member_index ), total = len (list_state ))]
571571 elif self .sim .input_dict .get ('hpc' , False ): # Run prediction in parallel on hpc
572- _ = [self .sim .run_fwd_sim (state , member_index , nosim = True ) for state , member_index in
573- zip (list_state , list_member_index )]
574- # Run call_sim on the hpc
575- job_id = self .sim .SLURM_HPC_run (self .ne , filename = self .sim .input_dict ['runfile' ])
576- # Wait for the simulations to finish
577- if job_id :
578- self .sim .wait_for_jobs (job_id )
579- else :
580- print ("Job submission failed. Exiting." )
581- # Extract the results
572+ batch_size = 500 # If more than 500 ensemble members, we limit the runs to batches of 500
573+ # Split the ensemble into batches of 500
582574 en_pred = []
583- for member_i in list_member_index :
584- self .sim .extract_data (member_i )
585- en_pred .append (deepcopy (self .sim .pred_data ))
586- self .sim .remove_folder (member_i )
575+ batch_en = [np .arange (start , start + batch_size ) for start in
576+ np .arange (0 , self .ne - batch_size , batch_size )]
577+ if len (batch_en ): # if self.ne is less than batch_size
578+ batch_en .append (np .arange (batch_en [- 1 ][- 1 ]+ 1 , self .ne ))
579+ else :
580+ batch_en .append (np .arange (0 , self .ne ))
581+ for n_e in batch_en :
582+ _ = [self .sim .run_fwd_sim (state , member_index , nosim = True ) for state , member_index in
583+ zip (list_state [n_e ], list_member_index [n_e ])]
584+ # Run call_sim on the hpc
585+ job_id = self .sim .SLURM_HPC_run (n_e , venv = sys .executable , filename = self .sim .input_dict ['runfile' ])
586+ # Wait for the simulations to finish
587+ if job_id :
588+ self .sim .wait_for_jobs (job_id )
589+ else :
590+ print ("Job submission failed. Exiting." )
591+ # Extract the results
592+ for member_i in list_member_index [n_e ]:
593+ self .sim .extract_data (member_i )
594+ en_pred .append (deepcopy (self .sim .pred_data ))
595+ self .sim .remove_folder (member_i )
587596
588597 else : # Run prediction in parallel using p_map
589598 en_pred = p_map (self .sim .run_fwd_sim , list_state ,
0 commit comments