Skip to content

Commit 8f56145

Browse files
committed
add mc_policy evaluation notebook
1 parent 3ede6ec commit 8f56145

File tree

3 files changed

+144
-3
lines changed

3 files changed

+144
-3
lines changed

chapter-5/blackjack.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ def step(self, action):
136136
else:
137137
return self.get_observation(), -1., True
138138

139-
def render(self):
139+
def render(self, policy=None):
140140
xticklabels = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
141141
yticklabels = range(12, 22)
142-
fig, ax = plt.subplots(2, 1, figsize=(7, 25))
142+
fig, ax = plt.subplots(2, 1, figsize=(7, 15))
143143
sns.heatmap(self.values[12:22, 1:, 0], cmap="RdBu", vmin=-1, vmax=1,
144144
xticklabels=xticklabels, yticklabels=yticklabels, annot=True, ax=ax[0])
145145
ax[0].set_title("V(π) with no usable Ace")

chapter-5/mc_policy_evaluation.ipynb

+142
Large diffs are not rendered by default.

chapter-5/mc_policy_evaluation.py

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def mc_policy_evaluation(env, policy, iterations=10000, first_visit=True):
4242
# update the action-value state
4343
# note that we sum the value and average only at the end of the iteration
4444
env.values[starting_state] += reward
45-
policy[obs] = np.argmax()
4645
counts[starting_state] += 1
4746
print()
4847
return env.values / counts

0 commit comments

Comments
 (0)