Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
float min_goal_distance = unpack(kwargs, "min_goal_distance");
float max_goal_distance = unpack(kwargs, "max_goal_distance");

int eval_batch_size = unpack(kwargs, "eval_batch_size");
int eval_map_counter = unpack(kwargs, "eval_map_counter");
bool eval_mode = eval_batch_size > 0;

float reward_bound_goal_radius_min = unpack(kwargs, "reward_bound_goal_radius_min");
float reward_bound_goal_radius_max = unpack(kwargs, "reward_bound_goal_radius_max");
float reward_bound_collision_min = unpack(kwargs, "reward_bound_collision_min");
Expand Down Expand Up @@ -124,20 +128,18 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
float reward_bound_acc_min = unpack(kwargs, "reward_bound_acc_min");
float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max");

int use_all_maps = unpack(kwargs, "use_all_maps");

clock_gettime(CLOCK_REALTIME, &ts);
srand(ts.tv_nsec);
int total_agent_count = 0;
int env_count = 0;
int max_envs = use_all_maps ? num_maps : num_agents;
int map_idx = 0;
int max_envs = eval_mode ? eval_batch_size : num_agents;
int map_idx = eval_map_counter;
int maps_checked = 0;
PyObject *agent_offsets = PyList_New(max_envs + 1);
PyObject *map_ids = PyList_New(max_envs);
// getting env count
while (use_all_maps ? map_idx < max_envs : total_agent_count < num_agents && env_count < max_envs) {
int map_id = use_all_maps ? map_idx++ : rand() % num_maps;
while (total_agent_count < num_agents && env_count < max_envs) {
int map_id = eval_mode ? map_idx++ : rand() % num_maps;
Drive *env = calloc(1, sizeof(Drive));
env->init_mode = init_mode;
env->control_mode = control_mode;
Expand Down Expand Up @@ -181,7 +183,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {

// Skip map if it doesn't contain any controllable agents
if (env->active_agent_count == 0) {
if (!use_all_maps) {
if (!eval_mode) {
maps_checked++;

// Safeguard: if we've checked all available maps and found no active agents, raise an error
Expand Down Expand Up @@ -248,7 +250,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
}
// printf("Generated %d environments to cover %d agents (requested %d agents)\n", env_count, total_agent_count,
// num_agents);
if (!use_all_maps && total_agent_count >= num_agents) {
// NOTE: even in eval we want a fixed value of num_agents now, else you cannot batch.
// I still need to think about what we do with the cropped scenario
if (total_agent_count >= num_agents) {
total_agent_count = num_agents;
}
PyObject *final_total_agent_count = PyLong_FromLong(total_agent_count);
Expand Down
15 changes: 10 additions & 5 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def __init__(
init_mode="create_all_valid",
control_mode="control_vehicles",
map_dir="resources/drive/binaries/training",
use_all_maps=False,
allow_fewer_maps=True,
eval_batch_size=-1,
# reward randomization bounds
reward_bound_goal_radius_min=2.0,
reward_bound_goal_radius_max=12.0,
Expand Down Expand Up @@ -109,6 +109,7 @@ def __init__(
self.termination_mode = termination_mode
self.resample_frequency = resample_frequency
self.dynamics_model = dynamics_model
self.eval_batch_size = eval_batch_size
# reward randomization bounds
self.reward_bound_goal_radius_min = reward_bound_goal_radius_min
self.reward_bound_collision_min = reward_bound_collision_min
Expand Down Expand Up @@ -242,6 +243,8 @@ def __init__(
f"Please reduce num_maps, add more maps to {map_dir}, or set allow_fewer_maps=True."
)

# This is to track on which maps the evaluation is running
self.eval_map_counter = 0
# Iterate through all maps to count total agents that can be initialized for each map
agent_offsets, map_ids, num_envs = binding.shared(
map_files=self.map_files,
Expand All @@ -255,6 +258,7 @@ def __init__(
reward_conditioning=self.reward_conditioning,
min_goal_distance=self.min_goal_distance,
max_goal_distance=self.max_goal_distance,
eval_batch_size=self.eval_batch_size,
reward_bound_goal_radius_min=self.reward_bound_goal_radius_min,
reward_bound_goal_radius_max=self.reward_bound_goal_radius_max,
reward_bound_collision_min=self.reward_bound_collision_min,
Expand Down Expand Up @@ -287,14 +291,14 @@ def __init__(
reward_bound_steer_max=self.reward_bound_steer_max,
reward_bound_acc_min=self.reward_bound_acc_min,
reward_bound_acc_max=self.reward_bound_acc_max,
use_all_maps=use_all_maps,
)

# agent_offsets[-1] works in both cases, just making it explicit that num_agents is ignored if use_all_maps
self.num_agents = num_agents if not use_all_maps else agent_offsets[-1]
self.num_agents = num_agents
self.agent_offsets = agent_offsets
self.map_ids = map_ids
self.num_envs = num_envs
# NOTE: What will happen if the map_counter becomes higher than num_maps ?
self.eval_map_counter += num_envs
super().__init__(buf=buf)
env_ids = []
for i in range(num_envs):
Expand Down Expand Up @@ -403,6 +407,7 @@ def step(self, actions):
reward_conditioning=self.reward_conditioning,
min_goal_distance=self.min_goal_distance,
max_goal_distance=self.max_goal_distance,
eval_batch_size=self.eval_batch_size,
# reward randomization bounds
reward_bound_collision_min=self.reward_bound_collision_min,
reward_bound_goal_radius_min=self.reward_bound_goal_radius_min,
Expand Down Expand Up @@ -436,11 +441,11 @@ def step(self, actions):
reward_bound_steer_max=self.reward_bound_steer_max,
reward_bound_acc_min=self.reward_bound_acc_min,
reward_bound_acc_max=self.reward_bound_acc_max,
use_all_maps=False,
)
self.agent_offsets = agent_offsets
self.map_ids = map_ids
self.num_envs = num_envs
self.eval_map_counter += num_envs
env_ids = []
seed = np.random.randint(0, 2**32 - 1)
for i in range(num_envs):
Expand Down
6 changes: 6 additions & 0 deletions pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,12 @@ def train(self):
):
pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)

# For now I add a 3rd eval function, goal is to later unify everything.
if self.config["eval"]["eval_batch"] and (
self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training
):
pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)

def check_render_queue(self):
"""Check if any async render jobs finished and log them."""
if not self.render_async or not hasattr(self, "render_queue"):
Expand Down