All agents must inheit fro Agent. Added in null learning and policy agent for cases where only learning, or only acting is necessary (e..g random or human agents)

ManuelMeraz · ManuelMeraz · commit 14a83e6e0f2e · 2019-10-11T15:02:34.000-07:00
diff --git a/rl/agents/agent.py b/rl/agents/agent.py
@@ -1,6 +1,25 @@
 #! /usr/bin/env python3
-from abc import ABC
+from abc import ABC, abstractmethod
 
 
 class Agent(ABC):
-    pass
+    """
+    Agent class serves as an interface definition. Every concrete Agent must
+    implement these four functions: act, learn, render, and reset.
+    """
+
+    @abstractmethod
+    def __init__(self, **kwargs):
+        pass
+
+    @abstractmethod
+    def act(self, **kwargs):
+        pass
+
+    @abstractmethod
+    def learn(self, **kwargs):
+        pass
+
+    @abstractmethod
+    def reset(self, **kwargs):
+        pass
diff --git a/rl/agents/learning/learning_agent.py b/rl/agents/learning/learning_agent.py
@@ -1,7 +1,6 @@
 #! /usr/bin/env python3
 from abc import abstractmethod
 from collections import defaultdict, Counter
-from typing import Dict, Tuple, Union
 
 from rl.agents.agent import Agent
 from rl.reprs import Transition
@@ -13,24 +12,19 @@ class LearningAgent(Agent):
     The learning agent implements a learning method and is used for purposes of building a state value map
     """
 
-    def __init__(self, transitions=None):
+    def __init__(self, state_values=None, transitions=None):
         self.trajectory = []
 
-        if not transitions:
+        if state_values is None:
+            self.state_values = defaultdict(Value)
+        else:
+            self.state_values = state_values
+
+        if transitions is None:
             self.transitions = defaultdict(Counter)
         else:
             self.transitions = transitions
 
-    @property
-    @abstractmethod
-    def state_values(self) -> Dict[Tuple[Union[int, float]], Value]:
-        pass
-
-    @state_values.setter
-    @abstractmethod
-    def state_values(self, state_values: Dict[Tuple[Union[int, float]], Value]):
-        pass
-
     @abstractmethod
     def learn_value(self):
         pass
diff --git a/rl/agents/learning/null_learning_agent.py b/rl/agents/learning/null_learning_agent.py
@@ -0,0 +1,15 @@
+#! /usr/bin/env python3
+
+from rl.agents.learning import LearningAgent
+
+
+class NullLearningAgent(LearningAgent):
+    """
+    The learning agent implements a learning method and is used for purposes of building a state value map
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(args, kwargs)
+
+    def learn_value(self):
+        pass
diff --git a/rl/agents/learning/sample_averaging_agent.py b/rl/agents/learning/sample_averaging_agent.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python3
-from collections import defaultdict
 from typing import Dict, Tuple, Union
 
 from rl.agents.learning import LearningAgent
@@ -18,19 +17,7 @@ def __init__(self, state_values: Dict[Tuple[Union[int, float]], Value] = None, t
         Represents an agent learning with temporal difference
         :param state_values: A mapping of states to and their associated values
         """
-        super().__init__(transitions=transitions)
-        if not state_values:
-            self._state_values = defaultdict(Value)
-        else:
-            self._state_values = state_values
-
-    @property
-    def state_values(self) -> Dict[Tuple[Union[int, float]], Value]:
-        return self._state_values
-
-    @state_values.setter
-    def state_values(self, state_values: Dict[Tuple[Union[int, float]], Value]):
-        self._state_values = state_values
+        super().__init__(state_values=state_values, transitions=transitions)
 
     def learn_value(self):
         """
diff --git a/rl/agents/learning/temporal_difference_agent.py b/rl/agents/learning/temporal_difference_agent.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python3
-from collections import defaultdict
 from typing import Dict, Tuple, Union
 
 from rl.agents.learning import LearningAgent
@@ -19,22 +18,9 @@ def __init__(self, learning_rate: float, state_values: Dict[Tuple[Union[int, flo
         :param learning_rate: How much to learn from the most recent action
         :param state_values: A mapping of states to and their associated values
         """
-        super().__init__(transitions=transitions)
-        if not state_values:
-            self._state_values = defaultdict(Value)
-        else:
-            self._state_values = state_values
-
+        super().__init__(state_values=state_values, transitions=transitions)
         self.learning_rate: float = learning_rate
 
-    @property
-    def state_values(self) -> Dict[Tuple[Union[int, float]], Value]:
-        return self._state_values
-
-    @state_values.setter
-    def state_values(self, state_values: Dict[Tuple[Union[int, float]], Value]):
-        self._state_values = state_values
-
     def learn_value(self):
         """
         Apply temporal difference learning and update the state and values of this agent
diff --git a/rl/agents/learning/temporal_difference_averaging_agent.py b/rl/agents/learning/temporal_difference_averaging_agent.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python3
-from collections import defaultdict
 from typing import Dict, Tuple, Union
 
 from rl.agents.learning import LearningAgent
@@ -20,19 +19,7 @@ def __init__(self, state_values: Dict[Tuple[Union[int, float]], Value] = None,
         :param learning_rate: How much to learn from the most recent action
         :param state_values: A mapping of states to and their associated values
         """
-        super().__init__(transitions=transitions)
-        if not state_values:
-            self._state_values = defaultdict(Value)
-        else:
-            self._state_values = state_values
-
-    @property
-    def state_values(self) -> Dict[Tuple[Union[int, float]], Value]:
-        return self._state_values
-
-    @state_values.setter
-    def state_values(self, state_values: Dict[Tuple[Union[int, float]], Value]):
-        self._state_values = state_values
+        super().__init__(state_values=state_values, transitions=transitions)
 
     def learn_value(self):
         """
@@ -46,7 +33,6 @@ def learn_value(self):
 
         if current_value.value != 0:
             for i in range(-2, -1 * len(self.trajectory), -1):
-
                 previous_transition = self.trajectory[i - 1]
                 previous_value: Value = self.state_values[previous_transition.state]
 
diff --git a/rl/agents/learning/weighted_averaging_agent.py b/rl/agents/learning/weighted_averaging_agent.py
@@ -1,5 +1,4 @@
 #! /usr/bin/env python3
-from collections import defaultdict
 from typing import Dict, Tuple, Union
 
 from rl.agents.learning import LearningAgent
@@ -18,22 +17,9 @@ def __init__(self, learning_rate, state_values: Dict[Tuple[Union[int, float]], V
         Represents an agent learning with temporal difference
         :param state_values: A mapping of states to and their associated values
         """
+        super().__init__(state_values=state_values, transitions=transitions)
         self.learning_rate = learning_rate
 
-        super().__init__(transitions=transitions)
-        if not state_values:
-            self._state_values = defaultdict(Value)
-        else:
-            self._state_values = state_values
-
-    @property
-    def state_values(self) -> Dict[Tuple[Union[int, float]], Value]:
-        return self._state_values
-
-    @state_values.setter
-    def state_values(self, state_values: Dict[Tuple[Union[int, float]], Value]):
-        self._state_values = state_values
-
     def learn_value(self):
         """
         Apply temporal difference learning and update the state and values of this agent
diff --git a/rl/agents/policy/null_policy_agent.py b/rl/agents/policy/null_policy_agent.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python3
+
+import numpy
+
+from rl.agents.policy import PolicyAgent
+
+
+class NullPolicyAgent(PolicyAgent):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(args, kwargs)
+
+    def act(self, state: numpy.ndarray):
+        """
+        A policy for this agent that maps an state to an action
+        :param state: The state of the environment
+        """
+        pass
+
+    def available_actions(self, state: numpy.ndarray) -> numpy.ndarray:
+        """
+        Given a state, determine the available actions
+        :param state: The state of the environment
+        """
+        pass
diff --git a/rl/tictactoe/base_agent.py b/rl/tictactoe/base_agent.py
@@ -4,10 +4,11 @@
 import numpy
 
 from rl.agents import RandomPolicyAgent
+from rl.agents.learning.null_learning_agent import NullLearningAgent
 from rl.envs.tictactoe import Mark
 
 
-class BaseAgent(RandomPolicyAgent):
+class BaseAgent(RandomPolicyAgent, NullLearningAgent):
     def available_actions(self, state: numpy.ndarray) -> numpy.ndarray:
         """
         Determines the available actions for the agent given the state
diff --git a/rl/tictactoe/human_agent.py b/rl/tictactoe/human_agent.py
@@ -4,10 +4,16 @@
 import numpy
 
 from rl.agents import HumanPolicyAgent
+from rl.agents.learning.null_learning_agent import NullLearningAgent
 from rl.envs.tictactoe import Mark
 
 
-class HumanAgent(HumanPolicyAgent):
+class HumanAgent(HumanPolicyAgent, NullLearningAgent):
+
+    def __init__(self):
+        HumanPolicyAgent.__init__(self)
+        NullLearningAgent.__init__(self)
+
     def available_actions(self, state: numpy.ndarray) -> numpy.ndarray:
         """
         Determines the available actions for the agent given the state