Skip to content

Commit

Permalink
updated stuff to do plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
parimarjan committed Jun 6, 2018
1 parent 4596b24 commit 77e29c0
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 42 deletions.
64 changes: 49 additions & 15 deletions 338-project/river_swim_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from experiment import run_finite_tabular_experiment,run_random_search_experiment

import random_search_agents
from collections import defaultdict
from matplotlib import pyplot as plt


if __name__ == '__main__':
Expand All @@ -34,15 +36,15 @@
args = parser.parse_args()

# Make a filename to identify flags
fileName = ('chainLen'
fileName = ('riverSwim'
+ '_len=' + '%03.f' % args.ep_len
+ '_num_states' + '%03.f' % args.num_states
+ '_alg=' + str(args.alg)
+ '_scal=' + '%03.2f' % args.scaling
+ '_seed=' + str(args.seed)
+ '.csv')

folderName = './'
folderName = './data/'
targetPath = folderName + fileName
print '******************************************************************'
print fileName
Expand Down Expand Up @@ -71,19 +73,51 @@
'EpsilonGreedy': finite_tabular_agents.EpsilonGreedy,
'BRS': random_search_agents.BasicRandomSearch}

agent_constructor = alg_dict[args.alg]

agent = agent_constructor(env.nState, env.nAction, env.epLen,
scaling=args.scaling)
# agent_constructor = alg_dict[args.alg]
# agent = agent_constructor(env.nState, env.nAction, env.epLen,
# scaling=args.scaling)

rs_agent = random_search_agents.BasicRandomSearch(env.nState, env.nAction,
env.epLen, scaling=args.scaling)
# agent_constructors = [finite_tabular_agents.PSRL]
agent_constructors = []

agents = []

for constructor in agent_constructors:
agents.append(constructor(env.nState, env.nAction, env.epLen,
scaling=args.scaling))

seeds = [1,2]
data = defaultdict(list)
for s in seeds:
# run random search agent
env.reset()
cumRegrets = run_random_search_experiment(rs_agent, env, f_ext,
args.nEps, s)
data['PSRL'].append(cumRegrets)

for agent in agents:
cumRegrets = run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
recFreq=100, fileFreq=1000, targetPath=targetPath)
data[agent.__str__()].append(cumRegrets)


# plotting time!
for agent in data:
print(agent)
x = [i*100 for i in range(len(y))]
y = np.mean(data[agent], axis=0)
stdev = np.std(data[agent], axis=0)
pl.plot(x, y, 'k-')
pl.fill_between(x, y-error, y+error)
pl.show()

# Run the experiment
if args.alg == "BRS":
run_random_search_experiment(agent, env, f_ext, args.nEps, args.seed,
recFreq=100, fileFreq=1000, targetPath=targetPath)

else:
run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
recFreq=100, fileFreq=1000, targetPath=targetPath)


# if args.alg == "BRS":
# cumRegrets = run_random_search_experiment(agent, env, f_ext, args.nEps, args.seed,
# recFreq=100, fileFreq=1000, targetPath=targetPath)

# else:
# cumRegrets = run_finite_tabular_experiment(agent, env, f_ext, args.nEps, args.seed,
# recFreq=100, fileFreq=1000, targetPath=targetPath)
2 changes: 1 addition & 1 deletion psrl_experiments_2016/bandit_confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def outputConfidenceKnownP(alg, nextStateMul, nObs):

# Letting the agent know the transitions, but not the rewards
agent.R_prior[0, 0] = (0, 1e9)
agent.P_prior[0, 0][0] = 1e9
agent.P_prior[0, 0][0] = 0
for s in range(1, env.nState):
agent.P_prior[0, 0][s] += 1e9
agent.P_prior[s, 0][s] += 1e9
Expand Down
60 changes: 34 additions & 26 deletions src/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import numpy as np
import pandas as pd

from shutil import copyfile

def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,
Expand All @@ -27,13 +26,15 @@ def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,
Returns:
NULL - data is output to targetPath as csv file
'''
cumRegrets = []
data = []
qVals, qMax = env.compute_qVals()
np.random.seed(seed)

cumRegret = 0
cumReward = 0
empRegret = 0
env.reset()

for ep in xrange(1, nEps + 2):
# Reset the environment
Expand Down Expand Up @@ -81,18 +82,20 @@ def run_finite_tabular_experiment(agent, env, f_ext, nEps, seed=1,

# Logging to dataframe
if ep % recFreq == 0:
data.append([ep, epReward, cumReward, cumRegret, empRegret])
print 'episode:', ep, 'epReward:', epReward, 'cumRegret:', cumRegret

if ep % max(fileFreq, recFreq) == 0:
dt = pd.DataFrame(data,
columns=['episode', 'epReward', 'cumReward',
'cumRegret', 'empRegret'])
print 'Writing to file ' + targetPath
dt.to_csv('tmp.csv', index=False, float_format='%.2f')
copyfile('tmp.csv', targetPath)
print '****************************'

cumRegrets.append(cumRegret)
# data.append([ep, epReward, cumReward, cumRegret, empRegret])
# print 'episode:', ep, 'epReward:', epReward, 'cumRegret:', cumRegret

# if ep % max(fileFreq, recFreq) == 0:
# dt = pd.DataFrame(data,
# columns=['episode', 'epReward', 'cumReward',
# 'cumRegret', 'empRegret'])
# print 'Writing to file ' + targetPath
# dt.to_csv('tmp.csv', index=False, float_format='%.2f')
# copyfile('tmp.csv', targetPath)
# print '****************************'

return cumRegrets
print '**************************************************'
print 'Experiment complete'
print '**************************************************'
Expand Down Expand Up @@ -121,7 +124,8 @@ def run_episode():

return epReward, epRegret


env.reset()
cumRegrets = []
data = []
qVals, qMax = env.compute_qVals()
np.random.seed(seed)
Expand Down Expand Up @@ -169,22 +173,26 @@ def run_episode():
recFreq = 10000

# Logging to dataframe

# FIXME: how often do we want it to record.
if cur_ep % recFreq == 0:
print(cur_ep)
data.append([cur_ep, epRewardPos, cumReward, cumRegret, empRegret])
print 'episode:', cur_ep, 'epRewardPos:', epRewardPos, 'cumRegret:', cumRegret

if cur_ep % max(fileFreq, recFreq) == 0:
dt = pd.DataFrame(data,
columns=['episode', 'epReward', 'cumReward',
'cumRegret', 'empRegret'])
print 'Writing to file ' + targetPath
dt.to_csv('tmp.csv', index=False, float_format='%.2f')
copyfile('tmp.csv', targetPath)
print '****************************'
# data.append([cur_ep, epRewardPos, cumReward, cumRegret, empRegret])
# print 'episode:', cur_ep, 'epRewardPos:', epRewardPos, 'cumRegret:', cumRegret
cumRegrets.append(cumRegret)

# if cur_ep % max(fileFreq, recFreq) == 0:
# dt = pd.DataFrame(data,
# columns=['episode', 'epReward', 'cumReward',
# 'cumRegret', 'empRegret'])
# print 'Writing to file ' + targetPath
# dt.to_csv('tmp.csv', index=False, float_format='%.2f')
# copyfile('tmp.csv', targetPath)
# print '****************************'

agent.theta = agent.theta + (agent.alpha / agent.batch_size)*reward_differences

return cumRegrets

print '**************************************************'
print 'Experiment complete'
print '**************************************************'
Expand Down
7 changes: 7 additions & 0 deletions src/finite_tabular_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ class PSRL(FiniteHorizonTabularAgent):
'''
Posterior Sampling for Reinforcement Learning
'''
def __str__(self):
return "PSRL"

def update_policy(self, h=False):
'''
Expand Down Expand Up @@ -586,6 +588,8 @@ def __init__(self, nState, nAction, epLen,
self.delta = delta
self.scaling = scaling

def __str__(self):
return "UCRL2"

def get_slack(self, time):
'''
Expand Down Expand Up @@ -776,6 +780,9 @@ def __init__(self, nState, nAction, epLen, epsilon=0.1, **kwargs):
alpha0=0.0001, tau0=0.0001)
self.epsilon = epsilon

def __str__(self):
return "EpsilonGreedy"

def update_policy(self, time=False):
'''
Compute UCRL Q-values via extended value iteration.
Expand Down

0 comments on commit 77e29c0

Please sign in to comment.