Merge branch 'sursu-where-q_best'

ShangtongZhang · Jun 29, 2019 · af7336a · af7336a
2 parents d593539 + 16f655c
commit af7336a
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/chapter02/ten_armed_testbed.py b/chapter02/ten_armed_testbed.py
@@ -61,15 +61,15 @@ def act(self):
             UCB_estimation = self.q_estimation + \
                 self.UCB_param * np.sqrt(np.log(self.time + 1) / (self.action_count + 1e-5))
             q_best = np.max(UCB_estimation)
-            return np.random.choice([action for action, q in enumerate(UCB_estimation) if q == q_best])
+            return np.random.choice(np.where(UCB_estimation == q_best)[0])
 
         if self.gradient:
             exp_est = np.exp(self.q_estimation)
             self.action_prob = exp_est / np.sum(exp_est)
             return np.random.choice(self.indices, p=self.action_prob)
 
         q_best = np.max(self.q_estimation)
-        return np.random.choice([action for action, q in enumerate(self.q_estimation) if q == q_best])
+        return np.random.choice(np.where(self.q_estimation == q_best)[0])
 
     # take an action, update estimation for this action
     def step(self, action):
@@ -97,8 +97,8 @@ def step(self, action):
 
 
 def simulate(runs, time, bandits):
-    best_action_counts = np.zeros((len(bandits), runs, time))
-    rewards = np.zeros(best_action_counts.shape)
+    rewards = np.zeros((len(bandits), runs, time))
+    best_action_counts = np.zeros(rewards.shape)
     for i, bandit in enumerate(bandits):
         for r in trange(runs):
             bandit.reset()