Skip to content

Commit

Permalink
WIP: norm layer with Impala for pick cube vision training.
Browse files Browse the repository at this point in the history
  • Loading branch information
ViktorM committed Sep 4, 2024
1 parent dc4e279 commit a78caac
Show file tree
Hide file tree
Showing 11 changed files with 480 additions and 28 deletions.
2 changes: 1 addition & 1 deletion rl_games/algos_torch/running_mean_std.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def forward(self, input, denorm:bool=False, mask:Optional[torch.Tensor]=None):
y = torch.sqrt(current_var.float() + self.epsilon)*y + current_mean.float()
else:
if self.norm_only:
y = input/ torch.sqrt(current_var.float() + self.epsilon)
y = input / torch.sqrt(current_var.float() + self.epsilon)
else:
y = (input - current_mean.float()) / torch.sqrt(current_var.float() + self.epsilon)
y = torch.clamp(y, min=-5.0, max=5.0)
Expand Down
6 changes: 3 additions & 3 deletions rl_games/configs/atari/ppo_breakout.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ params:
activation: relu
initializer:
name: orthogonal_initializer
gain: 1.41421356237
gain: 1.41421356237
convs:
- filters: 32
kernel_size: 8
Expand All @@ -31,7 +31,7 @@ params:
kernel_size: 3
strides: 1
padding: 0

mlp:
units: [512]
activation: relu
Expand All @@ -55,7 +55,7 @@ params:
learning_rate: 8e-4
lr_schedule: None
kl_threshold: 0.01

grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
Expand Down
79 changes: 79 additions & 0 deletions rl_games/configs/atari/ppo_pacman_envpool_impala.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
params:
algo:
name: a2c_discrete

model:
name: discrete_a2c

network:
name: resnet_actor_critic
require_rewards: True
require_last_actions: True
separate: False
value_shape: 1
space:
discrete:

cnn:
permute_input: False
conv_depths: [16, 32, 32]
activation: relu
initializer:
name: default
regularizer:
name: 'None'

mlp:
units: [512]
activation: relu
regularizer:
name: None
initializer:
name: default
rnn:
name: lstm
units: 256
layers: 1

config:
name: pacman_impala
env_name: envpool
normalize_advantage: True
normalize_input: False
normalize_value: False
reward_shaper:
min_val: -100
max_val: 100
#scale_value: 0.01
gamma: 0.995
tau: 0.95
learning_rate: 3e-4
score_to_win: 100000
grad_norm: 1.5
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
clip_value: True
num_actors: 64
horizon_length: 128
minibatch_size: 2048
mini_epochs: 2
critic_coef: 1
lr_schedule: None
kl_threshold: 0.01
use_diagnostics: True
seq_length: 32
max_epochs: 200000

env_config:
env_name: MsPacman-v5
episodic_life: True
has_lives: True
use_dict_obs_space: True

player:
render: False
games_num: 20
n_game_life: 3
deterministic: True

9 changes: 4 additions & 5 deletions rl_games/configs/atari/ppo_pacman_torch_rnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ params:
model:
name: discrete_a2c



network:
name: actor_critic
separate: False
Expand All @@ -18,7 +16,7 @@ params:
activation: relu
initializer:
name: glorot_normal_initializer
gain: 1.4142
gain: 1.4142
regularizer:
name: 'None'
convs:
Expand All @@ -34,7 +32,6 @@ params:
kernel_size: 3
strides: 1
padding: 0

mlp:
units: [512]
activation: relu
Expand All @@ -54,7 +51,7 @@ params:
#min_val: -1
#max_val: 1
scale_value: 1

normalize_advantage: True
gamma: 0.99
tau: 0.95
Expand All @@ -78,10 +75,12 @@ params:
normalize_input: False
normalize_value: True
max_epochs: 50000

env_config:
skip: 4
name: 'MsPacmanNoFrameskip-v4'
episode_life: True

player:
render: True
games_num: 10
Expand Down
83 changes: 83 additions & 0 deletions rl_games/configs/atari/ppo_pong_envpool_resnet.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
params:
algo:
name: a2c_discrete

model:
name: discrete_a2c

network:
name: e2e_vision_actor_critic
separate: False
value_shape: 1
space:
discrete:

backbone:
type: resnet18 # can be efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
pretrained: True
permute_input: False
freeze: False
preprocess_image: True
args:
zero_init_residual: True
norm_layer: None

mlp:
units: [512]
activation: relu
regularizer:
name: None
initializer:
name: default
rnn:
name: lstm
units: 512
layers: 1
before_mlp: True
concat_output: True

config:
name: Pong_resnet18_LSTM_MLP_512_concat_output_2e-4_linear_LR_norm
env_name: envpool
score_to_win: 20.0
mixed_precision: True
normalize_input: True
normalize_value: True
normalize_advantage: True
reward_shaper:
min_val: -1
max_val: 1
gamma: 0.99
tau: 0.95
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
clip_value: True
save_best_after: 25
save_frequency: 200
num_actors: 64
horizon_length: 128
minibatch_size: 2048
mini_epochs: 2
critic_coef: 1
learning_rate: 2e-4
lr_schedule: linear
kl_threshold: 0.01
use_diagnostics: True
seq_length: 8
max_epochs: 500
#weight_decay: 0.001

env_config:
env_name: Pong-v5
has_lives: False
use_dict_obs_space: False #True
stack_num: 1
gray_scale: False
player:
render: True
games_num: 10
n_game_life: 1
deterministic: True

65 changes: 65 additions & 0 deletions rl_games/configs/maniskill/maniskill_ant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
params:
seed: 5
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [256, 128, 64]
activation: elu
initializer:
name: default

config:
name: AntRun
env_name: maniskill
normalize_input: True
normalize_value: True
value_bootstrap: True
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95

learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
clip_value: True
use_smooth_clamp: True
bound_loss_type: regularisation
bounds_loss_coef: 0.0005
max_epochs: 1000
save_best_after: 25
save_frequency: 100
num_actors: 4096
horizon_length: 16
minibatch_size: 32768
mini_epochs: 4
critic_coef: 2

env_config:
env_name: MS-HumanoidRun-v1

player:
render: True
render_sleep: 0.0
30 changes: 16 additions & 14 deletions rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ params:
continuous:
mu_activation: None
sigma_activation: None

mu_init:
name: default
scale: 0.02
sigma_init:
name: const_initializer
val: 0
Expand All @@ -35,25 +35,27 @@ params:
conv_depths: [16, 32, 32]
activation: relu
initializer:
name: default
name: orthogonal_initializer
gain: 1.41421356237
regularizer:
name: None
mlp:
units: [512, 256]
units: [256]
activation: elu
regularizer:
name: None
initializer:
name: default
# rnn:
# name: lstm
# units: 512
# layers: 1
# before_mlp: True
# concat_output: True
rnn:
name: lstm
layer_norm: True
units: 512
layers: 1
before_mlp: True
concat_output: True

config:
name: PickCube_RGB_impala
name: PickCube_RGB_impala_lstm_init_2e-4_linear_lr
env_name: maniskill
reward_shaper:
scale_value: 1.0
Expand All @@ -70,17 +72,17 @@ params:
scale_value: 1.0
gamma: 0.99
tau : 0.95
learning_rate: 1e-4
lr_schedule: adaptive
learning_rate: 2e-4
lr_schedule: linear
kl_threshold: 0.008
max_epochs: 10000
max_epochs: 20000
save_best_after: 25
save_frequency: 500
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
horizon_length: 32
minibatch_size: 2048
mini_epochs: 2
critic_coef: 1
Expand Down
Loading

0 comments on commit a78caac

Please sign in to comment.