diff --git a/rlzoo/distributed/start_dis_role.py b/rlzoo/distributed/start_dis_role.py index 22c03bb..e24cee2 100755 --- a/rlzoo/distributed/start_dis_role.py +++ b/rlzoo/distributed/start_dis_role.py @@ -183,7 +183,7 @@ def run_server(agent, args, training_conf, env_conf, agent_conf): # save the final model rl_agent.save_ckpt(env_name) - print('server finished') + print('Server Finished.') def main(training_conf, env_conf, agent_conf): @@ -201,6 +201,6 @@ def main(training_conf, env_conf, agent_conf): elif agent.role() == Role.Server: run_server(agent, args, training_conf, env_conf, agent_conf) else: - raise RuntimeError('invalid role') + raise RuntimeError('Invalid Role.') agent.barrier() diff --git a/rlzoo/distributed/training_components.py b/rlzoo/distributed/training_components.py index 3b5c447..a975245 100755 --- a/rlzoo/distributed/training_components.py +++ b/rlzoo/distributed/training_components.py @@ -4,15 +4,15 @@ from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP from functools import partial -# constants +# Specify the training configurations training_conf = { - 'total_step': int(1e7), - 'traj_len': 200, - 'train_n_traj': 2, - 'save_interval': 10, + 'total_step': int(1e7), # overall training timesteps + 'traj_len': 200, # length of the rollout trajectory + 'train_n_traj': 2, # update the models after every certain number of trajectories for each learner + 'save_interval': 10, # saving the models after every certain number of updates } -# launch env settings +# Specify the environment and launch it env_name, env_type = 'CartPole-v0', 'classic_control' env_maker = partial(build_env, env_name, env_type) temp_env = env_maker() @@ -41,6 +41,7 @@ def build_network(observation_space, action_space, name='DPPO_CLIP'): def build_opt(actor_lr=1e-4, critic_lr=2e-4): + """ choose the optimizer for learning """ import tensorflow as tf return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] @@ -57,6 +58,6 @@ def build_opt(actor_lr=1e-4, critic_lr=2e-4): from rlzoo.distributed.start_dis_role import main -print('start training') +print('Start Training.') main(training_conf, env_conf, agent_conf) -print('finished') +print('Training Finished.')