Added ppo hyperparams for 10 mujoco envs (#155)

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
DLR-RM · Sep 1, 2021 · 0955752 · 0955752
1 parent 6cac948
commit 0955752
Showing 1 changed file with 182 additions and 0 deletions.
diff --git a/hyperparams/ppo.yml b/hyperparams/ppo.yml
@@ -343,3 +343,185 @@ CarRacing-v0:
   policy_kwargs: "dict(log_std_init=-2,
                        ortho_init=False,
                        )"
+
+# Tuned
+# 10 mujoco envs
+Ant-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.98
+  learning_rate: 1.90609e-05
+  ent_coef: 4.9646e-07
+  clip_range: 0.1
+  n_epochs: 10
+  gae_lambda: 0.8
+  max_grad_norm: 0.6
+  vf_coef: 0.677239
+
+HalfCheetah-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 64
+  n_steps: 512
+  gamma: 0.98
+  learning_rate: 2.0633e-05
+  ent_coef: 0.000401762
+  clip_range: 0.1
+  n_epochs: 20
+  gae_lambda: 0.92
+  max_grad_norm: 0.8
+  vf_coef: 0.58096
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Hopper-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.999
+  learning_rate: 9.80828e-05
+  ent_coef: 0.00229519
+  clip_range: 0.2
+  n_epochs: 5
+  gae_lambda: 0.99
+  max_grad_norm: 0.7
+  vf_coef: 0.835671
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+HumanoidStandup-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.99
+  learning_rate: 2.55673e-05
+  ent_coef: 3.62109e-06
+  clip_range: 0.3
+  n_epochs: 20
+  gae_lambda: 0.9
+  max_grad_norm: 0.7
+  vf_coef: 0.430793
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Humanoid-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e7
+  batch_size: 256
+  n_steps: 512
+  gamma: 0.95
+  learning_rate: 3.56987e-05
+  ent_coef: 0.00238306
+  clip_range: 0.3
+  n_epochs: 5
+  gae_lambda: 0.9
+  max_grad_norm: 2
+  vf_coef: 0.431892
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+InvertedDoublePendulum-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 512
+  n_steps: 128
+  gamma: 0.98
+  learning_rate: 0.000155454
+  ent_coef: 1.05057e-06
+  clip_range: 0.4
+  n_epochs: 10
+  gae_lambda: 0.8
+  max_grad_norm: 0.5
+  vf_coef: 0.695929
+
+InvertedPendulum-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 64
+  n_steps: 32
+  gamma: 0.999
+  learning_rate: 0.000222425
+  ent_coef: 1.37976e-07
+  clip_range: 0.4
+  n_epochs: 5
+  gae_lambda: 0.9
+  max_grad_norm: 0.3
+  vf_coef: 0.19816
+
+Reacher-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.9
+  learning_rate: 0.000104019
+  ent_coef: 7.52585e-08
+  clip_range: 0.3
+  n_epochs: 5
+  gae_lambda: 1.0
+  max_grad_norm: 0.9
+  vf_coef: 0.950368
+
+Swimmer-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.9999
+  learning_rate: 5.49717e-05
+  ent_coef: 0.0554757
+  clip_range: 0.3
+  n_epochs: 10
+  gae_lambda: 0.95
+  max_grad_norm: 0.6
+  vf_coef: 0.38782
+  policy_kwargs: "dict(
+                    log_std_init=-2,
+                    ortho_init=False,
+                    activation_fn=nn.ReLU,
+                    net_arch=[dict(pi=[256, 256], vf=[256, 256])]
+                  )"
+
+Walker2d-v2:
+  n_envs: 1
+  policy: 'MlpPolicy'
+  n_timesteps: !!float 1e6
+  batch_size: 32
+  n_steps: 512
+  gamma: 0.99
+  learning_rate: 5.05041e-05
+  ent_coef: 0.000585045
+  clip_range: 0.1
+  n_epochs: 20
+  gae_lambda: 0.95
+  max_grad_norm: 1
+  vf_coef: 0.871923