add mc_policy evaluation notebook

epignatelli · epignatelli · commit 8f56145ed59d · 2020-07-13T18:32:45.000+01:00
diff --git a/chapter-5/blackjack.py b/chapter-5/blackjack.py
@@ -136,10 +136,10 @@ def step(self, action):
         else:
             return self.get_observation(), -1., True
 
-    def render(self):
+    def render(self, policy=None):
         xticklabels = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
         yticklabels = range(12, 22)
-        fig, ax = plt.subplots(2, 1, figsize=(7, 25))
+        fig, ax = plt.subplots(2, 1, figsize=(7, 15))
         sns.heatmap(self.values[12:22, 1:, 0], cmap="RdBu", vmin=-1, vmax=1,
                     xticklabels=xticklabels, yticklabels=yticklabels, annot=True, ax=ax[0])
         ax[0].set_title("V(π) with no usable Ace")
diff --git a/chapter-5/mc_policy_evaluation.ipynb b/chapter-5/mc_policy_evaluation.ipynb
diff --git a/chapter-5/mc_policy_evaluation.py b/chapter-5/mc_policy_evaluation.py
@@ -42,7 +42,6 @@ def mc_policy_evaluation(env, policy, iterations=10000, first_visit=True):
                 # update the action-value state
                 # note that we sum the value and average only at the end of the iteration
                 env.values[starting_state] += reward
-                policy[obs] = np.argmax()
             counts[starting_state] += 1
     print()
     return env.values / counts