Added in decaying egreedy. Also printing out kwargs for each progress bar.

ManuelMeraz · ManuelMeraz · commit 63e9b9ea932c · 2019-10-15T06:51:39.000-07:00
diff --git a/rl/agents/policy/decaying_egreedy_policy_agent.py b/rl/agents/policy/decaying_egreedy_policy_agent.py
@@ -30,13 +30,7 @@ def act(self, state: numpy.ndarray, available_actions: numpy.ndarray) -> int:
         :param available_actions: A list of available possible actions (positions on the board to mark)
         :return: an action
         """
-        action, state = self.egreedy_policy(state, available_actions)
-        value = self.value_model(action)
-
-        if value < self.previous_value:
-            self.reset_exploratory_rate()
-
-        return action
+        return self.egreedy_policy(state, available_actions)
 
     def egreedy_policy(self, state: numpy.ndarray, available_actions: numpy.ndarray) -> int:
         """
@@ -50,7 +44,11 @@ def egreedy_policy(self, state: numpy.ndarray, available_actions: numpy.ndarray)
         if e < self.exploratory_rate:
             action: int = numpy.random.choice(available_actions)
         else:
-            action: int = self.greedy_action(state, available_actions)
+            action, state = self.greedy_action(state, available_actions)
+            value = self.value_model(state)
+
+            if value < self.previous_value:
+                self.reset_exploratory_rate()
 
         return action
 
diff --git a/rl/book/chapter_2/bandits.yaml b/rl/book/chapter_2/bandits.yaml
@@ -24,7 +24,7 @@ agents: [
   learning: "WeightedAveraging",
   kwargs: {
     decay_rate: 0.5 ,
-    exploratory_rate: 0.1,
+    exploratory_rate: 0.2,
     learning_rate: 0.1,
   }
 },

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ agents: [`
`24`	`24`	`learning: "WeightedAveraging",`
`25`	`25`	`kwargs: {`
`26`	`26`	`decay_rate: 0.5 ,`
`27`		`- exploratory_rate: 0.1,`
	`27`	`+ exploratory_rate: 0.2,`
`28`	`28`	`learning_rate: 0.1,`
`29`	`29`	`}`
`30`	`30`	`},`