-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path07_GamblersProblem_MCP.py
66 lines (46 loc) · 1.41 KB
/
07_GamblersProblem_MCP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
'''
07_GamblersProblem_MCP.py : Application of a Monte Carlo solution to Gambler's Problem (prediction)
Cem Karaoguz, 2020
MIT License
'''
import numpy as np
import pylab as pl
from IRL.environments.Gambler import CoinFlipGame
from IRL.agents.MonteCarlo import MonteCarloPrediction
from IRL.utils.Policies import StochasticPolicy
if __name__=="__main__":
nEpisodes = 100000
# Environment
maxCapital = 100
prob_heads = 0.4
# Agent
gamma = 1.0
env = CoinFlipGame(maxCapital, prob_heads)
policy = StochasticPolicy(env.nStates, env.nActions)
agent = MonteCarloPrediction(env.nStates, gamma, doUseAllVisits=False)
#env.printEnv()
for e in range(nEpisodes):
if(e%1000==0):
print("Episode : ", e)
experiences = [{}]
state = env.reset()
done = False
while not done:
action = policy.sampleAction(state, env.getAvailableActions())
experiences[-1]['state'] = state
experiences[-1]['action'] = action
experiences[-1]['done'] = done
new_state, reward, done = env.step(action)
#print("Episode : ", e, " State : ", state, " Action : ", action, " Reward : ", reward, " Next state : ", new_state)
xp = {}
xp['reward'] = reward
xp['state'] = new_state
xp['done'] = done
experiences.append(xp)
state = new_state
agent.evaluate(experiences)
pl.figure()
pl.plot(agent.valueTable[1:-1])
pl.xlabel("Capital")
pl.ylabel("Value estimates")
pl.show()