Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions pufferlib/config/ocean/adaptive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ input_size = 256
hidden_size = 256

[env]
num_agents = 1024
num_agents = 8192
; Options: discrete, continuous
action_type = discrete
; Options: classic, jerk
Expand Down Expand Up @@ -49,10 +49,22 @@ num_maps = 1000
init_steps = 0
; Options: "control_vehicles", "control_agents", "control_tracks_to_predict"
control_mode = "control_vehicles"
;
max_controlled_agents = 16
; Options: "created_all_valid", "create_only_controlled"
init_mode = "create_all_valid"
;
create_expert_overflow = False
; train with co players, co players must be defined below if this is true
co_player_enabled = False
; Train with only one ego agent per world, If trye then num_agents must be equal to max_controlled agents * num_ego agents
one_ego_per_scene = False
; Total number of agents training across all worlds
num_ego_agents = 512
;


[policy.conditioning]
[env.conditioning]
; Options: "none", "reward", "entropy", "discount", "all"
type = "none"
collision_weight_lb = -1.0
Expand All @@ -66,16 +78,19 @@ entropy_weight_ub = 0.001
discount_weight_lb = 0.98
discount_weight_ub = 0.80

[co_player_policy]
[env.co_player_policy]
enabled = True
num_ego_agents = 512
policy_name = Drive
rnn_name = Recurrent
policy_path = "resources/drive/policies/varied_discount.pt"
input_size = 64
hidden_size = 256

[co_player_policy.conditioning]
[env.co_player_policy.rnn]
input_size = 256
hidden_size = 256

[env.co_player_policy.conditioning]
; Options: "none", "reward", "entropy", "discount", "all"
type = "all"
collision_weight_lb = -1.0
Expand All @@ -86,22 +101,19 @@ goal_weight_lb = 0.0
goal_weight_ub = 1.0
entropy_weight_lb = 0.0
entropy_weight_ub = 0.001
discount_weight_lb = 0.98
discount_weight_ub = 0.80
discount_weight_lb = 0.80
discount_weight_ub = 0.98

[co_player_rnn]
input_size = 256
hidden_size = 256

[train]
total_timesteps = 2_000_000_000
total_timesteps = 3_000_000_000
# learning_rate = 0.02
# gamma = 0.985
anneal_lr = True
; Needs to be: num_agents * num_workers * BPTT horizon
batch_size = auto
minibatch_size = 372736
minibatch_multiplier = 512
minibatch_multiplier = 256
max_minibatch_size = 372736
; BPTT horizon (overridden by pufferl.py for adaptive agents to k_scenarios * scenario_length)
bptt_horizon = 32
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ control_mode = "control_vehicles"
; Options: "created_all_valid", "create_only_controlled"
init_mode = "create_all_valid"

[policy.conditioning]
[env.conditioning]
; Options: "none", "reward", "entropy", "discount", "all"
type = "none"
collision_weight_lb = -1.0
Expand Down
4 changes: 3 additions & 1 deletion pufferlib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ def forward_eval(self, observations, state):

# TODO: Don't break compile
if h is not None:
assert h.shape[0] == c.shape[0] == observations.shape[0], "LSTM state must be (h, c)"
assert h.shape[0] == c.shape[0] == observations.shape[0], (
f"LSTM state must be (h, c), h shape {h.shape[0]}, observations shape: {observations.shape[0]}"
)
lstm_state = (h, c)
else:
lstm_state = None
Expand Down
29 changes: 28 additions & 1 deletion pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,42 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->ego_agent_ids = NULL;
env->num_ego_agents = 0;
}

// Handle placeholder agents
env->num_place_holders = unpack(kwargs, "num_place_holders");
if (env->num_place_holders > 0) {
double* place_holder_ids_d = unpack_float_array(kwargs, "place_holder_ids", &env->num_place_holders);
if (place_holder_ids_d != NULL) {
env->place_holder_ids = (int*)malloc(env->num_place_holders * sizeof(int));
if (env->place_holder_ids == NULL) {
fprintf(stderr, "Error: Failed to allocate memory for place_holder_ids\n");
free(place_holder_ids_d);
env->num_place_holders = 0;
} else {
for (int i = 0; i < env->num_place_holders; i++) {
env->place_holder_ids[i] = (int)place_holder_ids_d[i];
}
free(place_holder_ids_d);
}
} else {
env->place_holder_ids = NULL;
env->num_place_holders = 0;
}
} else {
env->place_holder_ids = NULL;
env->num_place_holders = 0;
}
} else {
// Non-population play mode - set defaults
env->num_ego_agents = 0;
env->ego_agent_ids = NULL;
env->num_place_holders = 0;
env->place_holder_ids = NULL;
}


env->init_mode = (int)unpack(kwargs, "init_mode");
env->control_mode = (int)unpack(kwargs, "control_mode");
env->create_expert_overflow = (int)unpack(kwargs, "create_expert_overflow");
env->goal_behavior = (int)unpack(kwargs, "goal_behavior");
env->goal_radius = (float)unpack(kwargs, "goal_radius");
int map_id = unpack(kwargs, "map_id");
Expand Down
Loading
Loading