tuning params

katetolstaya · Jun 30, 2019 · d93ed84 · d93ed84
1 parent c464c6a
commit d93ed84
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 43 deletions.
diff --git a/cfg/hidden_size.cfg b/cfg/hidden_size.cfg
@@ -9,7 +9,7 @@ updates_per_step = 200
 seed = 11
 actor_lr = 5e-5
 
-n_train_episodes = 800
+n_train_episodes = 3200
 beta_coeff = 0.993
 test_interval = 40
 n_test_episodes = 20

diff --git a/learner/gnn_baseline.py b/learner/gnn_baseline.py
@@ -2,48 +2,10 @@
 
 
 def train_baseline(env, args):
-    debug = args.getboolean('debug')
-    n_train_episodes = args.getint('n_train_episodes')
-    test_interval = args.getint('test_interval')
     n_test_episodes = args.getint('n_test_episodes')
     centralized = args.getboolean('centralized')
-    total_numsteps = 0
 
     stats = {'mean': -1.0 * np.Inf, 'std': 0}
-    # for i in range(n_train_episodes):
-    #     # env.reset()
-    #     # # episode_reward = 0
-    #     # done = False
-    #     # while not done:
-    #     #
-    #     #     action = env.env.controller(centralized)
-    #     #     next_state, reward, done, _ = env.step(action)
-    #     #     total_numsteps += 1
-    #
-    #     if i % test_interval == 0:
-    #         test_rewards = []
-    #         for _ in range(n_test_episodes):
-    #             ep_reward = 0
-    #             env.reset()
-    #             done = False
-    #             while not done:
-    #                 action = env.env.controller(centralized)
-    #                 next_state, reward, done, _ = env.step(action)
-    #                 ep_reward += reward
-    #                 # env.render()
-    #             test_rewards.append(ep_reward)
-    #
-    #         mean_reward = np.mean(test_rewards)
-    #         if stats['mean'] < mean_reward:
-    #             stats['mean'] = mean_reward
-    #             stats['std'] = np.std(test_rewards)
-    #
-    #         if debug:
-    #             print(
-    #                 "Episode: {}, total numsteps: {}, reward: {}".format(
-    #                     i,
-    #                     total_numsteps,
-    #                     mean_reward))
 
     test_rewards = []
     for _ in range(n_test_episodes):

diff --git a/results/plot_bargraph.py b/results/plot_bargraph.py
@@ -23,11 +23,12 @@ def main():
 
     k_ind = 0
 
-    mean_cost_cent = [1.8930292856893]
-    std_cost_cent = [0.14931209808414267]
+    mean_cost_cent = [2.041724997601464]
+    std_cost_cent = [0.07619242768446846]
+
+    mean_cost_decent = [9.335155311869324]
+    std_cost_decent = [2.6593089180772074]
 
-    mean_cost_decent = [6.840906470373157]
-    std_cost_decent =  [1.6711923712693055]
 
     mean_costs_airsim, std_costs_airsim = get_dict(['airsim_trained2.csv'], k_ind)
     mean_costs_stoch, std_costs_stoch = get_dict(['stoch_transfer_to_airsim2.csv'], k_ind)
@@ -53,6 +54,8 @@ def main():
     plt.xlabel('K')
     plt.ylabel(ylabel)
 
+    ax.set_xticklabels(('', '', '', '1', '2', '3', '4'))
+
     plt.savefig(save_dir + fig_fname + '.eps', format='eps')
     plt.show()
 

diff --git a/results/plot_nn.py b/results/plot_nn.py
@@ -0,0 +1,107 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import csv
+import numpy as np
+from collections import OrderedDict
+
+font = {'family': 'serif',
+        'weight': 'bold',
+        'size': 14}
+matplotlib.rc('font', **font)
+
+_CENTRALIZED = 'Global'
+_DECENTRALIZED = 'Local'
+
+def main():
+
+    fig_fname = 'hidden_size'
+
+    fnames = ['hidden_size.csv']
+    xlabel = 'Num. Neurons per Layer'
+    k_ind = 0
+    v_ind = 1
+    arrow_params = None
+
+    colors = {_CENTRALIZED: 'green', _DECENTRALIZED: 'red', '4': 'blue', '3': 'darkviolet', '2': 'orange', '1': 'gold'}
+    save_dir = 'fig/'
+
+    mean_costs, std_costs = get_dict(fnames, k_ind, v_ind)
+
+    max_val, min_dec = get_max(mean_costs)
+    max_val = max_val + 10.0
+    ylabel = 'Cost'
+    title = 'Cost vs. GNN Architecture'
+
+    # plot
+    fig, ax = plt.subplots()
+    for k in mean_costs.keys():
+        # if k != '4':
+        if not (k == _CENTRALIZED or k == _DECENTRALIZED):
+            label = k + ' Hidden Layers'
+        else:
+            label = k
+        ax.errorbar(mean_costs[k].keys(), mean_costs[k].values(), yerr=std_costs[k].values(), marker='o', color=colors[k],
+                    label=label)
+
+    ax.legend()
+    plt.title(title)
+    plt.ylim(top=300, bottom=0)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+
+    if max_val < min_dec < np.Inf and arrow_params:
+        min_dec = int(np.floor(min_dec / 100.0)*100)
+        # plt.arrow(x=3.3, y=400.0, dx=0.0, dy=30.0, color='r', width=0.03, head_length=30)
+        plt.arrow(**arrow_params)
+        plt.text(x=text_params['x'], y=text_params['y'], s='Cost > '+str(min_dec), color='r')
+
+    plt.savefig(save_dir + fig_fname + '.eps', format='eps')
+    plt.show()
+
+
+def get_dict(fnames, k_ind, v_ind):
+    mean_costs = OrderedDict()
+    std_costs = OrderedDict()
+
+    for fname in fnames:
+        with open(fname, 'r') as csvfile:
+            plots = csv.reader(csvfile, delimiter=',')
+            next(plots, None)
+            for row in plots:
+
+                if True: # len(row) == 4:
+                    k = row[k_ind].strip()
+
+                    if k == 'True':
+                        k = _CENTRALIZED
+                    elif k == 'False':
+                        k = _DECENTRALIZED
+
+                    v = float(row[v_ind])
+
+                    mean = float(row[2]) * -1.0
+                    std = float(row[3])
+                    if k not in mean_costs:
+                        mean_costs[k] = OrderedDict()
+                        std_costs[k] = OrderedDict()
+                    mean_costs[k][v] = mean
+                    std_costs[k][v] = std
+
+    return mean_costs, std_costs
+
+
+def get_max(list_costs):
+    # compute average over diff seeds for each parameter combo
+    max_val = -1.0 * np.Inf
+    min_decentralized = 1.0 * np.Inf
+
+    for k in list_costs.keys():
+        for v in list_costs[k].keys():
+            if k != _DECENTRALIZED:
+                max_val = np.maximum(max_val, list_costs[k][v])
+            else:
+                min_decentralized = np.minimum(min_decentralized, list_costs[k][v])
+    return max_val, min_decentralized
+
+if __name__ == "__main__":
+    main()