|
| 1 | +"""Config for ApeX-DQN on Pong-No_FrameSkip-v4. |
| 2 | +
|
| 3 | +- Author: Chris Yoon |
| 4 | +- Contact: chris.yoon@medipixel.io |
| 5 | +""" |
| 6 | + |
| 7 | +from rl_algorithms.common.helper_functions import identity |
| 8 | + |
| 9 | +agent = dict( |
| 10 | + type="ApeX", |
| 11 | + hyper_params=dict( |
| 12 | + gamma=0.99, |
| 13 | + tau=5e-3, |
| 14 | + buffer_size=int(2.5e5), # openai baselines: int(1e4) |
| 15 | + batch_size=512, # openai baselines: 32 |
| 16 | + update_starts_from=int(1e5), # openai baselines: int(1e4) |
| 17 | + multiple_update=1, # multiple learning updates |
| 18 | + train_freq=1, # in openai baselines, train_freq = 4 |
| 19 | + gradient_clip=10.0, # dueling: 10.0 |
| 20 | + n_step=5, |
| 21 | + w_n_step=1.0, |
| 22 | + w_q_reg=0.0, |
| 23 | + per_alpha=0.6, # openai baselines: 0.6 |
| 24 | + per_beta=0.4, |
| 25 | + per_eps=1e-6, |
| 26 | + loss_type=dict(type="DQNLoss"), |
| 27 | + # Epsilon Greedy |
| 28 | + max_epsilon=1.0, |
| 29 | + min_epsilon=0.1, # openai baselines: 0.01 |
| 30 | + epsilon_decay=1e-6, # openai baselines: 1e-7 / 1e-1 |
| 31 | + # grad_cam |
| 32 | + grad_cam_layer_list=[ |
| 33 | + "backbone.cnn.cnn_0.cnn", |
| 34 | + "backbone.cnn.cnn_1.cnn", |
| 35 | + "backbone.cnn.cnn_2.cnn", |
| 36 | + ], |
| 37 | + num_workers=4, |
| 38 | + local_buffer_max_size=1000, |
| 39 | + worker_update_interval=50, |
| 40 | + logger_interval=2000, |
| 41 | + ), |
| 42 | + learner_cfg=dict( |
| 43 | + type="DQNLearner", |
| 44 | + device="cuda", |
| 45 | + backbone=dict( |
| 46 | + type="CNN", |
| 47 | + configs=dict( |
| 48 | + input_sizes=[4, 32, 64], |
| 49 | + output_sizes=[32, 64, 64], |
| 50 | + kernel_sizes=[8, 4, 3], |
| 51 | + strides=[4, 2, 1], |
| 52 | + paddings=[1, 0, 0], |
| 53 | + ), |
| 54 | + ), |
| 55 | + head=dict( |
| 56 | + type="DuelingMLP", |
| 57 | + configs=dict( |
| 58 | + use_noisy_net=False, hidden_sizes=[512], output_activation=identity |
| 59 | + ), |
| 60 | + ), |
| 61 | + optim_cfg=dict( |
| 62 | + lr_dqn=0.0003, # dueling: 6.25e-5, openai baselines: 1e-4 |
| 63 | + weight_decay=0.0, # this makes saturation in cnn weights |
| 64 | + adam_eps=1e-8, # rainbow: 1.5e-4, openai baselines: 1e-8 |
| 65 | + ), |
| 66 | + ), |
| 67 | + worker_cfg=dict(type="DQNWorker", device="cpu",), |
| 68 | + logger_cfg=dict(type="DQNLogger",), |
| 69 | + comm_cfg=dict( |
| 70 | + learner_buffer_port=6554, |
| 71 | + learner_worker_port=6555, |
| 72 | + worker_buffer_port=6556, |
| 73 | + learner_logger_port=6557, |
| 74 | + send_batch_port=6558, |
| 75 | + priorities_port=6559, |
| 76 | + ), |
| 77 | +) |
0 commit comments