File tree Expand file tree Collapse file tree 3 files changed +15
-15
lines changed Expand file tree Collapse file tree 3 files changed +15
-15
lines changed Original file line number Diff line number Diff line change @@ -2,10 +2,10 @@ behaviors:
2
2
Reacher :
3
3
trainer_type : ppo
4
4
hyperparameters :
5
- batch_size : 2024
6
- buffer_size : 20240
5
+ batch_size : 512
6
+ buffer_size : 20480
7
7
learning_rate : 0.0003
8
- beta : 0.005
8
+ beta : 0.001
9
9
epsilon : 0.2
10
10
lambd : 0.95
11
11
num_epoch : 3
Original file line number Diff line number Diff line change @@ -5,32 +5,32 @@ behaviors:
5
5
learning_rate : 0.0003
6
6
learning_rate_schedule : constant
7
7
batch_size : 128
8
- buffer_size : 500000
9
- buffer_init_steps : 10000
8
+ buffer_size : 2000000
9
+ buffer_init_steps : 1000
10
10
tau : 0.01
11
11
steps_per_update : 10.0
12
12
save_replay_buffer : false
13
13
init_entcoef : 0.01
14
14
reward_signal_steps_per_update : 10.0
15
15
network_settings :
16
16
normalize : false
17
- hidden_units : 256
18
- num_layers : 2
17
+ hidden_units : 512
18
+ num_layers : 3
19
19
vis_encode_type : simple
20
20
reward_signals :
21
21
extrinsic :
22
- gamma : 0.99
22
+ gamma : 0.995
23
23
strength : 2.0
24
24
gail :
25
25
gamma : 0.99
26
- strength : 0.02
26
+ strength : 0.01
27
27
encoding_size : 128
28
28
learning_rate : 0.0003
29
29
use_actions : true
30
30
use_vail : false
31
31
demo_path : Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
32
32
keep_checkpoints : 5
33
- max_steps : 10000000
33
+ max_steps : 3000000
34
34
time_horizon : 128
35
35
summary_freq : 30000
36
36
threaded : true
Original file line number Diff line number Diff line change @@ -4,8 +4,8 @@ behaviors:
4
4
hyperparameters :
5
5
learning_rate : 0.0003
6
6
learning_rate_schedule : constant
7
- batch_size : 256
8
- buffer_size : 500000
7
+ batch_size : 1024
8
+ buffer_size : 2000000
9
9
buffer_init_steps : 0
10
10
tau : 0.005
11
11
steps_per_update : 30.0
@@ -14,15 +14,15 @@ behaviors:
14
14
reward_signal_steps_per_update : 30.0
15
15
network_settings :
16
16
normalize : true
17
- hidden_units : 512
18
- num_layers : 4
17
+ hidden_units : 256
18
+ num_layers : 3
19
19
vis_encode_type : simple
20
20
reward_signals :
21
21
extrinsic :
22
22
gamma : 0.995
23
23
strength : 1.0
24
24
keep_checkpoints : 5
25
- max_steps : 20000000
25
+ max_steps : 15000000
26
26
time_horizon : 1000
27
27
summary_freq : 30000
28
28
threaded : true
You can’t perform that action at this time.
0 commit comments