diff --git a/cfg/hidden_size.cfg b/cfg/hidden_size.cfg index a0b988f..243fbd2 100644 --- a/cfg/hidden_size.cfg +++ b/cfg/hidden_size.cfg @@ -9,7 +9,7 @@ updates_per_step = 200 seed = 11 actor_lr = 5e-5 -n_train_episodes = 800 +n_train_episodes = 3200 beta_coeff = 0.993 test_interval = 40 n_test_episodes = 20 diff --git a/learner/gnn_baseline.py b/learner/gnn_baseline.py index 2f70f78..a20fb24 100644 --- a/learner/gnn_baseline.py +++ b/learner/gnn_baseline.py @@ -2,48 +2,10 @@ def train_baseline(env, args): - debug = args.getboolean('debug') - n_train_episodes = args.getint('n_train_episodes') - test_interval = args.getint('test_interval') n_test_episodes = args.getint('n_test_episodes') centralized = args.getboolean('centralized') - total_numsteps = 0 stats = {'mean': -1.0 * np.Inf, 'std': 0} - # for i in range(n_train_episodes): - # # env.reset() - # # # episode_reward = 0 - # # done = False - # # while not done: - # # - # # action = env.env.controller(centralized) - # # next_state, reward, done, _ = env.step(action) - # # total_numsteps += 1 - # - # if i % test_interval == 0: - # test_rewards = [] - # for _ in range(n_test_episodes): - # ep_reward = 0 - # env.reset() - # done = False - # while not done: - # action = env.env.controller(centralized) - # next_state, reward, done, _ = env.step(action) - # ep_reward += reward - # # env.render() - # test_rewards.append(ep_reward) - # - # mean_reward = np.mean(test_rewards) - # if stats['mean'] < mean_reward: - # stats['mean'] = mean_reward - # stats['std'] = np.std(test_rewards) - # - # if debug: - # print( - # "Episode: {}, total numsteps: {}, reward: {}".format( - # i, - # total_numsteps, - # mean_reward)) test_rewards = [] for _ in range(n_test_episodes): diff --git a/results/plot_bargraph.py b/results/plot_bargraph.py index 9ca91cc..8376714 100644 --- a/results/plot_bargraph.py +++ b/results/plot_bargraph.py @@ -23,11 +23,12 @@ def main(): k_ind = 0 - mean_cost_cent = [1.8930292856893] - std_cost_cent = [0.14931209808414267] + mean_cost_cent = [2.041724997601464] + std_cost_cent = [0.07619242768446846] + + mean_cost_decent = [9.335155311869324] + std_cost_decent = [2.6593089180772074] - mean_cost_decent = [6.840906470373157] - std_cost_decent = [1.6711923712693055] mean_costs_airsim, std_costs_airsim = get_dict(['airsim_trained2.csv'], k_ind) mean_costs_stoch, std_costs_stoch = get_dict(['stoch_transfer_to_airsim2.csv'], k_ind) @@ -53,6 +54,8 @@ def main(): plt.xlabel('K') plt.ylabel(ylabel) + ax.set_xticklabels(('', '', '', '1', '2', '3', '4')) + plt.savefig(save_dir + fig_fname + '.eps', format='eps') plt.show() diff --git a/results/plot_nn.py b/results/plot_nn.py new file mode 100644 index 0000000..bb43f91 --- /dev/null +++ b/results/plot_nn.py @@ -0,0 +1,107 @@ +import matplotlib +import matplotlib.pyplot as plt +import csv +import numpy as np +from collections import OrderedDict + +font = {'family': 'serif', + 'weight': 'bold', + 'size': 14} +matplotlib.rc('font', **font) + +_CENTRALIZED = 'Global' +_DECENTRALIZED = 'Local' + +def main(): + + fig_fname = 'hidden_size' + + fnames = ['hidden_size.csv'] + xlabel = 'Num. Neurons per Layer' + k_ind = 0 + v_ind = 1 + arrow_params = None + + colors = {_CENTRALIZED: 'green', _DECENTRALIZED: 'red', '4': 'blue', '3': 'darkviolet', '2': 'orange', '1': 'gold'} + save_dir = 'fig/' + + mean_costs, std_costs = get_dict(fnames, k_ind, v_ind) + + max_val, min_dec = get_max(mean_costs) + max_val = max_val + 10.0 + ylabel = 'Cost' + title = 'Cost vs. GNN Architecture' + + # plot + fig, ax = plt.subplots() + for k in mean_costs.keys(): + # if k != '4': + if not (k == _CENTRALIZED or k == _DECENTRALIZED): + label = k + ' Hidden Layers' + else: + label = k + ax.errorbar(mean_costs[k].keys(), mean_costs[k].values(), yerr=std_costs[k].values(), marker='o', color=colors[k], + label=label) + + ax.legend() + plt.title(title) + plt.ylim(top=300, bottom=0) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + + if max_val < min_dec < np.Inf and arrow_params: + min_dec = int(np.floor(min_dec / 100.0)*100) + # plt.arrow(x=3.3, y=400.0, dx=0.0, dy=30.0, color='r', width=0.03, head_length=30) + plt.arrow(**arrow_params) + plt.text(x=text_params['x'], y=text_params['y'], s='Cost > '+str(min_dec), color='r') + + plt.savefig(save_dir + fig_fname + '.eps', format='eps') + plt.show() + + +def get_dict(fnames, k_ind, v_ind): + mean_costs = OrderedDict() + std_costs = OrderedDict() + + for fname in fnames: + with open(fname, 'r') as csvfile: + plots = csv.reader(csvfile, delimiter=',') + next(plots, None) + for row in plots: + + if True: # len(row) == 4: + k = row[k_ind].strip() + + if k == 'True': + k = _CENTRALIZED + elif k == 'False': + k = _DECENTRALIZED + + v = float(row[v_ind]) + + mean = float(row[2]) * -1.0 + std = float(row[3]) + if k not in mean_costs: + mean_costs[k] = OrderedDict() + std_costs[k] = OrderedDict() + mean_costs[k][v] = mean + std_costs[k][v] = std + + return mean_costs, std_costs + + +def get_max(list_costs): + # compute average over diff seeds for each parameter combo + max_val = -1.0 * np.Inf + min_decentralized = 1.0 * np.Inf + + for k in list_costs.keys(): + for v in list_costs[k].keys(): + if k != _DECENTRALIZED: + max_val = np.maximum(max_val, list_costs[k][v]) + else: + min_decentralized = np.minimum(min_decentralized, list_costs[k][v]) + return max_val, min_decentralized + +if __name__ == "__main__": + main()