Emerge-Lab · WaelDLZ · Feb 19, 2026
diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
@@ -91,6 +91,10 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
     float min_goal_distance = unpack(kwargs, "min_goal_distance");
     float max_goal_distance = unpack(kwargs, "max_goal_distance");
 
+    int eval_batch_size = unpack(kwargs, "eval_batch_size");
+    int eval_map_counter = unpack(kwargs, "eval_map_counter");
+    bool eval_mode = eval_batch_size > 0;
+
     float reward_bound_goal_radius_min = unpack(kwargs, "reward_bound_goal_radius_min");
     float reward_bound_goal_radius_max = unpack(kwargs, "reward_bound_goal_radius_max");
     float reward_bound_collision_min = unpack(kwargs, "reward_bound_collision_min");
@@ -124,20 +128,18 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
     float reward_bound_acc_min = unpack(kwargs, "reward_bound_acc_min");
     float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max");
 
-    int use_all_maps = unpack(kwargs, "use_all_maps");
-
     clock_gettime(CLOCK_REALTIME, &ts);
     srand(ts.tv_nsec);
     int total_agent_count = 0;
     int env_count = 0;
-    int max_envs = use_all_maps ? num_maps : num_agents;
-    int map_idx = 0;
+    int max_envs = eval_mode ? eval_batch_size : num_agents;
+    int map_idx = eval_map_counter;
     int maps_checked = 0;
     PyObject *agent_offsets = PyList_New(max_envs + 1);
     PyObject *map_ids = PyList_New(max_envs);
     // getting env count
-    while (use_all_maps ? map_idx < max_envs : total_agent_count < num_agents && env_count < max_envs) {
-        int map_id = use_all_maps ? map_idx++ : rand() % num_maps;
+    while (total_agent_count < num_agents && env_count < max_envs) {
+        int map_id = eval_mode ? map_idx++ : rand() % num_maps;
         Drive *env = calloc(1, sizeof(Drive));
         env->init_mode = init_mode;
         env->control_mode = control_mode;
@@ -181,7 +183,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
 
         // Skip map if it doesn't contain any controllable agents
         if (env->active_agent_count == 0) {
-            if (!use_all_maps) {
+            if (!eval_mode) {
                 maps_checked++;
 
                 // Safeguard: if we've checked all available maps and found no active agents, raise an error
@@ -248,7 +250,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
     }
     // printf("Generated %d environments to cover %d agents (requested %d agents)\n", env_count, total_agent_count,
     // num_agents);
-    if (!use_all_maps && total_agent_count >= num_agents) {
+    // NOTE: even in eval we want a fixed value of num_agents now, else you cannot batch.
+    // I still need to think about what we do with the cropped scenario
+    if (total_agent_count >= num_agents) {
         total_agent_count = num_agents;
     }
     PyObject *final_total_agent_count = PyLong_FromLong(total_agent_count);

diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
@@ -47,8 +47,8 @@ def __init__(
         init_mode="create_all_valid",
         control_mode="control_vehicles",
         map_dir="resources/drive/binaries/training",
-        use_all_maps=False,
         allow_fewer_maps=True,
+        eval_batch_size=-1,
         # reward randomization bounds
         reward_bound_goal_radius_min=2.0,
         reward_bound_goal_radius_max=12.0,
@@ -109,6 +109,7 @@ def __init__(
         self.termination_mode = termination_mode
         self.resample_frequency = resample_frequency
         self.dynamics_model = dynamics_model
+        self.eval_batch_size = eval_batch_size
         # reward randomization bounds
         self.reward_bound_goal_radius_min = reward_bound_goal_radius_min
         self.reward_bound_collision_min = reward_bound_collision_min
@@ -242,6 +243,8 @@ def __init__(
                     f"Please reduce num_maps, add more maps to {map_dir}, or set allow_fewer_maps=True."
                 )
 
+        # This is to track on which maps the evaluation is running
+        self.eval_map_counter = 0
         # Iterate through all maps to count total agents that can be initialized for each map
         agent_offsets, map_ids, num_envs = binding.shared(
             map_files=self.map_files,
@@ -255,6 +258,7 @@ def __init__(
             reward_conditioning=self.reward_conditioning,
             min_goal_distance=self.min_goal_distance,
             max_goal_distance=self.max_goal_distance,
+            eval_batch_size=self.eval_batch_size,
             reward_bound_goal_radius_min=self.reward_bound_goal_radius_min,
             reward_bound_goal_radius_max=self.reward_bound_goal_radius_max,
             reward_bound_collision_min=self.reward_bound_collision_min,
@@ -287,14 +291,14 @@ def __init__(
             reward_bound_steer_max=self.reward_bound_steer_max,
             reward_bound_acc_min=self.reward_bound_acc_min,
             reward_bound_acc_max=self.reward_bound_acc_max,
-            use_all_maps=use_all_maps,
         )
 
-        # agent_offsets[-1] works in both cases, just making it explicit that num_agents is ignored if use_all_maps
-        self.num_agents = num_agents if not use_all_maps else agent_offsets[-1]
+        self.num_agents = num_agents
         self.agent_offsets = agent_offsets
         self.map_ids = map_ids
         self.num_envs = num_envs
+        # NOTE: What will happen if the map_counter becomes higher than num_maps ?
+        self.eval_map_counter += num_envs
         super().__init__(buf=buf)
         env_ids = []
         for i in range(num_envs):
@@ -403,6 +407,7 @@ def step(self, actions):
                 reward_conditioning=self.reward_conditioning,
                 min_goal_distance=self.min_goal_distance,
                 max_goal_distance=self.max_goal_distance,
+                eval_batch_size=self.eval_batch_size,
                 # reward randomization bounds
                 reward_bound_collision_min=self.reward_bound_collision_min,
                 reward_bound_goal_radius_min=self.reward_bound_goal_radius_min,
@@ -436,11 +441,11 @@ def step(self, actions):
                 reward_bound_steer_max=self.reward_bound_steer_max,
                 reward_bound_acc_min=self.reward_bound_acc_min,
                 reward_bound_acc_max=self.reward_bound_acc_max,
-                use_all_maps=False,
             )
             self.agent_offsets = agent_offsets
             self.map_ids = map_ids
             self.num_envs = num_envs
+            self.eval_map_counter += num_envs
             env_ids = []
             seed = np.random.randint(0, 2**32 - 1)
             for i in range(num_envs):

diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
@@ -587,6 +587,12 @@ def train(self):
         ):
             pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
 
+        # For now I add a 3rd eval function, goal is to later unify everything.
+        if self.config["eval"]["eval_batch"] and (
+            self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training
+        ):
+            pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
+
     def check_render_queue(self):
         """Check if any async render jobs finished and log them."""
         if not self.render_async or not hasattr(self, "render_queue"):