updated stuff to do plotting

parimarjan · Jun 6, 2018 · 77e29c0 · 77e29c0
1 parent 4596b24
commit 77e29c0
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 42 deletions.
diff --git a/338-project/river_swim_experiment.py b/338-project/river_swim_experiment.py
@@ -16,6 +16,8 @@
 from experiment import run_finite_tabular_experiment,run_random_search_experiment
 
 import random_search_agents
+from collections import defaultdict
+from matplotlib import pyplot as plt
 
 
 if __name__ == '__main__':
@@ -34,15 +36,15 @@
     args = parser.parse_args()
 
     # Make a filename to identify flags
-    fileName = ('chainLen'
+    fileName = ('riverSwim'
                 + '_len=' + '%03.f' % args.ep_len
                 + '_num_states' + '%03.f' % args.num_states
                 + '_alg=' + str(args.alg)
                 + '_scal=' + '%03.2f' % args.scaling
                 + '_seed=' + str(args.seed)
                 + '.csv')
 
-    folderName = './'
+    folderName = './data/'
     targetPath = folderName + fileName
     print '******************************************************************'
     print fileName
@@ -71,19 +73,51 @@
                 'EpsilonGreedy': finite_tabular_agents.EpsilonGreedy,
                 'BRS': random_search_agents.BasicRandomSearch}
 
-    agent_constructor = alg_dict[args.alg]
-
-    agent = agent_constructor(env.nState, env.nAction, env.epLen,
-                              scaling=args.scaling)
+    # agent_constructor = alg_dict[args.alg]
+    # agent = agent_constructor(env.nState, env.nAction, env.epLen,
+                              # scaling=args.scaling)
+
+    rs_agent = random_search_agents.BasicRandomSearch(env.nState, env.nAction,
+            env.epLen, scaling=args.scaling)
+    # agent_constructors = [finite_tabular_agents.PSRL]
+    agent_constructors = []
+
+    agents = []
+
+    for constructor in agent_constructors:
+        agents.append(constructor(env.nState, env.nAction, env.epLen,
+            scaling=args.scaling))
+
+    seeds = [1,2]
+    data = defaultdict(list)
+    for s in seeds:
+        # run random search agent
+        env.reset()
+        cumRegrets = run_random_search_experiment(rs_agent, env, f_ext,
+                args.nEps, s)
+        data['PSRL'].append(cumRegrets)
+
+        for agent in agents:
+            cumRegrets = run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
+                                recFreq=100, fileFreq=1000, targetPath=targetPath)
+            data[agent.__str__()].append(cumRegrets)
+
+
+    # plotting time!
+    for agent in data:
+        print(agent)
+        x = [i*100 for i in range(len(y))]
+        y = np.mean(data[agent], axis=0)
+        stdev = np.std(data[agent], axis=0)
+        pl.plot(x, y, 'k-')
+        pl.fill_between(x, y-error, y+error)
+        pl.show()
 
     # Run the experiment
-    if args.alg == "BRS":
-        run_random_search_experiment(agent, env, f_ext, args.nEps, args.seed,
-                            recFreq=100, fileFreq=1000, targetPath=targetPath)
-
-    else:
-        run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
-                            recFreq=100, fileFreq=1000, targetPath=targetPath)
-
-
+    # if args.alg == "BRS":
+        # cumRegrets = run_random_search_experiment(agent, env, f_ext, args.nEps, args.seed,
+                            # recFreq=100, fileFreq=1000, targetPath=targetPath)
 
+    # else:
+        # cumRegrets = run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
+                            # recFreq=100, fileFreq=1000, targetPath=targetPath)
diff --git a/psrl_experiments_2016/bandit_confidence.py b/psrl_experiments_2016/bandit_confidence.py
@@ -106,7 +106,7 @@ def outputConfidenceKnownP(alg, nextStateMul, nObs):
 
     # Letting the agent know the transitions, but not the rewards
     agent.R_prior[0, 0] = (0, 1e9)
-    agent.P_prior[0, 0][0] = 1e9
+    agent.P_prior[0, 0][0] = 0
     for s in range(1, env.nState):
         agent.P_prior[0, 0][s] += 1e9
         agent.P_prior[s, 0][s] += 1e9

diff --git a/src/experiment.py b/src/experiment.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import pandas as pd
-
 from shutil import copyfile
 
 def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,
@@ -27,13 +26,15 @@ def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,
     Returns:
         NULL - data is output to targetPath as csv file
     '''
+    cumRegrets = []
     data = []
     qVals, qMax = env.compute_qVals()
     np.random.seed(seed)
 
     cumRegret = 0
     cumReward = 0
     empRegret = 0
+    env.reset()
 
     for ep in xrange(1, nEps + 2):
         # Reset the environment
@@ -81,18 +82,20 @@ def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,
 
         # Logging to dataframe
         if ep % recFreq == 0:
-            data.append([ep, epReward, cumReward, cumRegret, empRegret])
-            print 'episode:', ep, 'epReward:', epReward, 'cumRegret:', cumRegret
-
-        if ep % max(fileFreq, recFreq) == 0:
-            dt = pd.DataFrame(data,
-                              columns=['episode', 'epReward', 'cumReward',
-                                       'cumRegret', 'empRegret'])
-            print 'Writing to file ' + targetPath
-            dt.to_csv('tmp.csv', index=False, float_format='%.2f')
-            copyfile('tmp.csv', targetPath)
-            print '****************************'
-
+            cumRegrets.append(cumRegret)
+            # data.append([ep, epReward, cumReward, cumRegret, empRegret])
+            # print 'episode:', ep, 'epReward:', epReward, 'cumRegret:', cumRegret
+
+        # if ep % max(fileFreq, recFreq) == 0:
+            # dt = pd.DataFrame(data,
+                              # columns=['episode', 'epReward', 'cumReward',
+                                       # 'cumRegret', 'empRegret'])
+            # print 'Writing to file ' + targetPath
+            # dt.to_csv('tmp.csv', index=False, float_format='%.2f')
+            # copyfile('tmp.csv', targetPath)
+            # print '****************************'
+
+    return cumRegrets
     print '**************************************************'
     print 'Experiment complete'
     print '**************************************************'
@@ -121,7 +124,8 @@ def run_episode():
 
         return epReward, epRegret
 
-
+    env.reset()
+    cumRegrets = []
     data = []
     qVals, qMax = env.compute_qVals()
     np.random.seed(seed)
@@ -169,22 +173,26 @@ def run_episode():
                 recFreq = 10000
 
             # Logging to dataframe
+
+            # FIXME: how often do we want it to record.
             if cur_ep % recFreq == 0:
-                print(cur_ep)
-                data.append([cur_ep, epRewardPos, cumReward, cumRegret, empRegret])
-                print 'episode:', cur_ep, 'epRewardPos:', epRewardPos, 'cumRegret:', cumRegret
-
-            if cur_ep % max(fileFreq, recFreq) == 0:
-                dt = pd.DataFrame(data,
-                                  columns=['episode', 'epReward', 'cumReward',
-                                           'cumRegret', 'empRegret'])
-                print 'Writing to file ' + targetPath
-                dt.to_csv('tmp.csv', index=False, float_format='%.2f')
-                copyfile('tmp.csv', targetPath)
-                print '****************************'
+                # data.append([cur_ep, epRewardPos, cumReward, cumRegret, empRegret])
+                # print 'episode:', cur_ep, 'epRewardPos:', epRewardPos, 'cumRegret:', cumRegret
+                cumRegrets.append(cumRegret)
+
+            # if cur_ep % max(fileFreq, recFreq) == 0:
+                # dt = pd.DataFrame(data,
+                                  # columns=['episode', 'epReward', 'cumReward',
+                                           # 'cumRegret', 'empRegret'])
+                # print 'Writing to file ' + targetPath
+                # dt.to_csv('tmp.csv', index=False, float_format='%.2f')
+                # copyfile('tmp.csv', targetPath)
+                # print '****************************'
 
         agent.theta = agent.theta + (agent.alpha / agent.batch_size)*reward_differences
 
+    return cumRegrets
+
     print '**************************************************'
     print 'Experiment complete'
     print '**************************************************'

diff --git a/src/finite_tabular_agents.py b/src/finite_tabular_agents.py
@@ -323,6 +323,8 @@ class PSRL(FiniteHorizonTabularAgent):
     '''
     Posterior Sampling for Reinforcement Learning
     '''
+    def __str__(self):
+        return "PSRL"
 
     def update_policy(self, h=False):
         '''
@@ -586,6 +588,8 @@ def __init__(self, nState, nAction, epLen,
         self.delta = delta
         self.scaling = scaling
 
+    def __str__(self):
+        return "UCRL2"
 
     def get_slack(self, time):
         '''
@@ -776,6 +780,9 @@ def __init__(self, nState, nAction, epLen, epsilon=0.1, **kwargs):
                                             alpha0=0.0001, tau0=0.0001)
         self.epsilon = epsilon
 
+    def __str__(self):
+        return "EpsilonGreedy"
+
     def update_policy(self, time=False):
         '''
         Compute UCRL Q-values via extended value iteration.