From ef4bb926859e66e8ca6ec937930573edbabef591 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Date: Thu, 17 Dec 2020 11:03:15 +0800 Subject: [PATCH] added rendering trained agent --- networks/utils.py | 2 +- td3/main.py | 9 +++++---- td3/render.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 td3/render.py diff --git a/networks/utils.py b/networks/utils.py index e64e462..73c11c1 100644 --- a/networks/utils.py +++ b/networks/utils.py @@ -27,7 +27,7 @@ def initialize_modules(model, nonlinearity='leaky_relu', init_type='kaiming'): def load_weights(state_dict_path, models, model_names, optimizers=[], optimizer_names=[], return_val=None, return_vals=None): def put_in_list(item): - if not isinstance(item, list, tuple) and item is not None: + if not isinstance(item, (list, tuple)) and item is not None: item = [item] return item diff --git a/td3/main.py b/td3/main.py index 59817c9..80f4ea9 100644 --- a/td3/main.py +++ b/td3/main.py @@ -79,12 +79,13 @@ score_history.append(score) moving_avg = sum(score_history) / len(score_history) agent.add_scalar('Average Score', moving_avg, global_step=e) - tqdm.write(f'Episode: {e + 1}/{args.n_episodes}, \ - Episode Score: {score}, \ - Average Score: {moving_avg}, \ - Best Score: {best_score}') # save weights @ best score if moving_avg > best_score: best_score = moving_avg agent.save_networks() + + tqdm.write(f'Episode: {e + 1}/{args.n_episodes}, \ + Episode Score: {score}, \ + Average Score: {moving_avg}, \ + Best Score: {best_score}') diff --git a/td3/render.py b/td3/render.py new file mode 100644 index 0000000..f15d8f4 --- /dev/null +++ b/td3/render.py @@ -0,0 +1,39 @@ +import gym +from tqdm import tqdm + +from td3.agent import Agent +from td3.main import args +from networks.utils import load_weights + + +if __name__ == '__main__': + args.checkpoint_dir += f'/{args.env_name}_td3.pth' + # env & agent + env = gym.make(args.env_name) + agent = Agent(env, args.alpha, args.beta, args.hidden_dims, args.tau, args.batch_size, + args.gamma, args.d, 0, args.max_size, args.c, args.sigma, + args.one_device, args.log_dir, args.checkpoint_dir) + best_score = env.reward_range[0] + load_weights(args.checkpoint_dir, + [agent.actor] , ['actor']) + episodes = tqdm(range(args.n_episodes)) + for e in episodes: + # resetting + state = env.reset() + done = False + score = 0 + + while not done: + action = agent.choose_action(state) + state_, reward, done, _ = env.step(action) + + # reset, log & render + score += reward + state = state_ + episodes.set_postfix({'Reward': reward}) + env.render() + if score > best_score: + best_score = score + tqdm.write(f'Episode: {e + 1}/{args.n_episodes}, \ + Episode Score: {score}, \ + Best Score: {best_score}')