Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
42b8955
Changed counts to max supported by sim, max_controlled, num_created a…
mpragnay Feb 10, 2026
c49b15e
Spawning logic with collision and offroad free spawns, uses 2.0 goal …
mpragnay Feb 11, 2026
8c9376c
Fix mem leaks
mpragnay Feb 11, 2026
87bd891
Apply suggestion from @Copilot
mpragnay Feb 12, 2026
5466e9e
Fixed Agent Counts to align with gigaflow feature set (#288)
mpragnay Feb 12, 2026
7b376fd
Bug fixing demo and renderer, fixed mem leaks in bindings code, chang…
mpragnay Feb 12, 2026
bc5b942
Merge branch '3.0_beta' into pragnay/randomagents
mpragnay Feb 12, 2026
5cbf4c4
Code cleanup, error handling
mpragnay Feb 12, 2026
7237135
merged changes from 3.0_beta
mpragnay Feb 16, 2026
d62e586
Minor fixes
mpragnay Feb 16, 2026
932d727
minor bugs
mpragnay Feb 16, 2026
ef50fd4
Pre-Compute lanes for spawning
mpragnay Feb 17, 2026
2ade546
Previously Working Settings
mpragnay Feb 17, 2026
257d721
Fixed reset to use the same goal
mpragnay Feb 18, 2026
ff6a94f
Separate out goal resets for bug avoidance with other modes
mpragnay Feb 18, 2026
7ceeeb1
Fixed agent collisions for variable dimensions
mpragnay Feb 18, 2026
0aa0a87
Fixed agent collisions for variable dimensions
mpragnay Feb 18, 2026
0c8d21b
working jerk configs
mpragnay Feb 19, 2026
b6c64c9
Merge branch '3.0_beta' into pragnay/randomagents
mpragnay Feb 19, 2026
dc972e5
pre-commit fixes
mpragnay Feb 19, 2026
974b93e
Relative Speed Observation fix
mpragnay Feb 22, 2026
2f5df66
Visualizer Ego POV road lanes added
mpragnay Feb 22, 2026
e94420b
Merge branch '3.0_beta' into pragnay/randomagents
mpragnay Feb 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 38 additions & 20 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rnn_name = Recurrent
[vec]
num_workers = 16
num_envs = 16
batch_size = 4
batch_size = 2
; backend = Serial

[policy]
Expand All @@ -23,11 +23,11 @@ num_agents = 1024
; Options: discrete, continuous
action_type = discrete
; Options: classic, jerk
dynamics_model = classic
dynamics_model = jerk
reward_vehicle_collision = -0.5
reward_offroad_collision = -0.5 # Use -0.05 for carla maps
reward_lane_align = 1
reward_lane_center = 1
reward_lane_align = 0
reward_lane_center = 0
dt = 0.1
reward_goal = 0.4
reward_goal_post_respawn = 0.25
Expand All @@ -38,6 +38,7 @@ min_goal_speed = -0.01
max_goal_speed = 10.0
; What to do when the goal is reached. Options: 0:"respawn", 1:"generate_new_goals", 2:"stop"
goal_behavior = 1
goal_behavior = 1
; Determines the target distance to the new goal in the case of goal_behavior = generate_new_goals.
; Large numbers will select a goal point further away from the agent's current position.
min_goal_distance = 0.5
Expand All @@ -48,21 +49,31 @@ collision_behavior = 0
offroad_behavior = 0
; Number of steps before
episode_length = 300
resample_frequency = 300
resample_frequency = 10000
termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset
map_dir = "resources/drive/binaries/carla_3D"
num_maps = 10000
map_dir = "resources/drive/binaries/carla_data"
num_maps = 8
; If True, allows training with fewer maps than requested (warns instead of erroring)
allow_fewer_maps = True
; Determines which step of the trajectory to initialize the agents at upon reset
init_steps = 0
; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
control_mode = "control_vehicles"
; Options: "created_all_valid", "create_only_controlled"
init_mode = "create_all_valid"
reward_randomization = 1
; Options: "created_all_valid", "create_only_controlled", "random_agents"(creates random number of controlled agents per env)
init_mode = "random_agents"
; Below options only valid for "random_agents" init_mode
min_agents_per_env = 16
max_agents_per_env = 32
spawn_width_min = 1.5
spawn_width_max = 2.5
spawn_length_min = 2.0
spawn_length_max = 5.5
spawn_height = 1.5

; Reward settings
reward_randomization = 0
; Options: 0 - Fixed reward values, 1 - Random reward values
reward_conditioning = 1
reward_conditioning = 0
; Options: 1 - Add reward coefs to obs array, 0 - Dont

# Reward randomization bounds (min, max)
Expand Down Expand Up @@ -116,10 +127,10 @@ reward_bound_acc_max = 1.5

[train]
seed=42
total_timesteps = 2_000_000_000
total_timesteps = 1_000_000_000
; learning_rate = 0.02
; gamma = 0.985
anneal_lr = True
anneal_lr = False
; Needs to be: num_agents * num_workers * BPTT horizon
batch_size = 524288
minibatch_size = 32768
Expand All @@ -129,6 +140,7 @@ adam_beta1 = 0.9
adam_beta2 = 0.999
adam_eps = 1e-8
clip_coef = 0.2
; Ent coef needs to be tuned for RANDOM_AGENTS mode(found 0.02 to work best)
ent_coef = 0.005
gae_lambda = 0.95
gamma = 0.98
Expand All @@ -144,7 +156,7 @@ vtrace_rho_clip = 1
checkpoint_interval = 250
; Rendering options
render = True
render_async = False # Render interval of below 50 might cause process starvation and slowness in training
render_async = True # Render interval of below 50 might cause process starvation and slowness in training
render_interval = 250
; If True, show exactly what the agent sees in agent observation
obs_only = True
Expand All @@ -157,7 +169,7 @@ show_human_logs = False
; If True, zoom in on a part of the map. Otherwise, show full map
zoom_in = True
; Options: List[str to path], str to path (e.g., "resources/drive/training/binaries/map_001.bin"), None
render_map = none
render_map = None

[eval]
eval_interval = 1000
Expand Down Expand Up @@ -242,11 +254,17 @@ mean = 0.98
max = 0.999
scale = auto

[controlled_exp.train.goal_speed]
values = [10, 20, 30, 3]
; [controlled_exp.train.goal_speed]
; values = [10, 20, 30, 3]

; [controlled_exp.train.ent_coef]
; values = [0.001, 0.005, 0.01]

; [controlled_exp.train.seed]
; values = [42, 55, 1]

[controlled_exp.train.ent_coef]
values = [0.001, 0.005, 0.01]
values = [0.025, 0.015]

[controlled_exp.train.seed]
values = [42, 55, 1]
[controlled_exp.env.goal_target_distance]
values = [7.0, 10.0]
59 changes: 58 additions & 1 deletion pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,60 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) {
float reward_bound_acc_max = unpack(kwargs, "reward_bound_acc_max");

int use_all_maps = unpack(kwargs, "use_all_maps");
int min_agents_per_env = unpack(kwargs, "min_agents_per_env");
int max_agents_per_env = unpack(kwargs, "max_agents_per_env");

clock_gettime(CLOCK_REALTIME, &ts);
srand(ts.tv_nsec);

int max_envs = use_all_maps ? num_maps : num_agents;

if (init_mode == RANDOM_AGENTS) {
// Training mode: random agent counts per env
int agent_counts[max_envs];
int remaining = num_agents;
int env_count = 0;

while (remaining > 0) {
int count;
if (remaining <= max_agents_per_env) {
count = remaining;
} else {
// Ensure last env can still meet min_agents_per_env requirement
int upper = (remaining - max_agents_per_env < min_agents_per_env) ? remaining - min_agents_per_env
: max_agents_per_env;
count = min_agents_per_env + rand() % (upper - min_agents_per_env + 1);
}
agent_counts[env_count++] = count;
remaining -= count;
}

PyObject *agent_offsets = PyList_New(env_count + 1);
PyObject *map_ids_list = PyList_New(env_count);

int offset = 0;
for (int i = 0; i < env_count; i++) {
PyList_SetItem(agent_offsets, i, PyLong_FromLong(offset));
PyList_SetItem(map_ids_list, i, PyLong_FromLong(rand() % num_maps));
offset += agent_counts[i];
}
PyList_SetItem(agent_offsets, env_count,
PyLong_FromLong(num_agents)); // In random mode, we guarantee num_agents accross all envs
PyObject *tuple = PyTuple_New(3);
PyTuple_SetItem(tuple, 0, agent_offsets);
PyTuple_SetItem(tuple, 1, map_ids_list);
PyTuple_SetItem(tuple, 2, PyLong_FromLong(env_count));
return tuple;
}

// For all other modes
int total_agent_count = 0;
int env_count = 0;
int max_envs = use_all_maps ? num_maps : num_agents;
int map_idx = 0;
int maps_checked = 0;
PyObject *agent_offsets = PyList_New(max_envs + 1);
PyObject *map_ids = PyList_New(max_envs);

// getting env count
while (use_all_maps ? map_idx < max_envs : total_agent_count < num_agents && env_count < max_envs) {
int map_id = use_all_maps ? map_idx++ : rand() % num_maps;
Expand Down Expand Up @@ -340,8 +384,21 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
char *map_path = unpack_str(kwargs, "map_path");
int max_agents = unpack(kwargs, "max_agents");
int init_steps = unpack(kwargs, "init_steps");
int max_agents_per_env = unpack(kwargs, "max_agents_per_env");

AgentSpawnSettings spawn_settings = {
.min_w = unpack(kwargs, "spawn_width_min"),
.max_w = unpack(kwargs, "spawn_width_max"),
.min_l = unpack(kwargs, "spawn_length_min"),
.max_l = unpack(kwargs, "spawn_length_max"),
.h = unpack(kwargs, "spawn_height"),
};
env->spawn_settings = spawn_settings;

env->num_agents = max_agents;
if (env->init_mode == RANDOM_AGENTS) {
env->spawn_settings.max_agents_in_sim = max_agents_per_env; // Random Agents only supports controlled agents
}
env->map_name = map_path;
env->init_steps = init_steps;
env->timestep = init_steps;
Expand Down
7 changes: 7 additions & 0 deletions pufferlib/ocean/drive/datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,13 @@ void free_agent(struct Agent *agent) {
free(agent->path);
}

void free_agents(struct Agent *agents, int num_agents) {
for (int i = 0; i < num_agents; i++) {
free_agent(&agents[i]);
}
free(agents);
}

void free_road_element(struct RoadMapElement *element) {
free(element->x);
free(element->y);
Expand Down
24 changes: 21 additions & 3 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ void demo() {
exit(1);
}

// Set different seed each time
srand(time(NULL));

// Note: Use below hardcoded settings for 2.0 demo purposes. Since the policy was
// trained with these exact settings, changing them may lead to
// weird behavior.
Expand All @@ -68,6 +71,15 @@ void demo() {
// .map_name = "resources/drive/map_town_02_carla.bin",
// };

AgentSpawnSettings spawn_settings = {
.max_agents_in_sim = conf.max_agents_per_env,
.min_w = conf.spawn_width_min,
.max_w = conf.spawn_width_max,
.min_l = conf.spawn_length_min,
.max_l = conf.spawn_length_max,
.h = conf.spawn_height,
};

Drive env = {
.human_agent_idx = 0,
.action_type = 0, // Demo doesn't support continuous action space
Expand All @@ -90,13 +102,19 @@ void demo() {
.init_steps = conf.init_steps,
.init_mode = conf.init_mode,
.control_mode = conf.control_mode,
.map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin",
.reward_conditioning = 1,
.spawn_settings = spawn_settings,
.map_name = "resources/drive/binaries/carla/carla_3D/map_000.bin",
.reward_conditioning = conf.reward_conditioning,
};

if (conf.init_mode == RANDOM_AGENTS) {
env.num_agents = conf.min_agents_per_env + rand() % (conf.max_agents_per_env - conf.min_agents_per_env + 1);
}

allocate(&env);
c_reset(&env);
c_render(&env);
Weights *weights = load_weights("resources/drive/puffer_drive_resampling_speed_lane.bin");
Weights *weights = load_weights("resources/drive/puffer_drive_zh9lo9pr.bin");
DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning);

int accel_delta = 1;
Expand Down
Loading
Loading