Skip to content

Commit

Permalink
add comment
Browse files Browse the repository at this point in the history
  • Loading branch information
quantumiracle committed Jun 26, 2021
1 parent 79b60e4 commit 53ca0fd
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
4 changes: 2 additions & 2 deletions rlzoo/distributed/start_dis_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def run_server(agent, args, training_conf, env_conf, agent_conf):

# save the final model
rl_agent.save_ckpt(env_name)
print('server finished')
print('Server Finished.')


def main(training_conf, env_conf, agent_conf):
Expand All @@ -201,6 +201,6 @@ def main(training_conf, env_conf, agent_conf):
elif agent.role() == Role.Server:
run_server(agent, args, training_conf, env_conf, agent_conf)
else:
raise RuntimeError('invalid role')
raise RuntimeError('Invalid Role.')

agent.barrier()
17 changes: 9 additions & 8 deletions rlzoo/distributed/training_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP
from functools import partial

# constants
# Specify the training configurations
training_conf = {
'total_step': int(1e7),
'traj_len': 200,
'train_n_traj': 2,
'save_interval': 10,
'total_step': int(1e7), # overall training timesteps
'traj_len': 200, # length of the rollout trajectory
'train_n_traj': 2, # update the models after every certain number of trajectories for each learner
'save_interval': 10, # saving the models after every certain number of updates
}

# launch env settings
# Specify the environment and launch it
env_name, env_type = 'CartPole-v0', 'classic_control'
env_maker = partial(build_env, env_name, env_type)
temp_env = env_maker()
Expand Down Expand Up @@ -41,6 +41,7 @@ def build_network(observation_space, action_space, name='DPPO_CLIP'):


def build_opt(actor_lr=1e-4, critic_lr=2e-4):
""" choose the optimizer for learning """
import tensorflow as tf
return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)]

Expand All @@ -57,6 +58,6 @@ def build_opt(actor_lr=1e-4, critic_lr=2e-4):

from rlzoo.distributed.start_dis_role import main

print('start training')
print('Start Training.')
main(training_conf, env_conf, agent_conf)
print('finished')
print('Training Finished.')

0 comments on commit 53ca0fd

Please sign in to comment.