defaults for agent, separate RequiresArgs into util

heronsystems · jtatusko · Jan 7, 2019 · Dec 28, 2018 · Dec 29, 2018 · Dec 29, 2018
commit 15c8dfacec33284ddc3419623e82c0da807fa0cf
diff --git a/adept/agents/__init__.py b/adept/agents/__init__.py
@@ -15,5 +15,3 @@
 
 from .actor_critic import ActorCritic
 from .impala import ActorCriticVtrace
-
-AGENTS = {'ActorCritic': ActorCritic, 'ActorCriticVtrace': ActorCriticVtrace}
diff --git a/adept/agents/actor_critic.py b/adept/agents/actor_critic.py
@@ -12,19 +12,27 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-from argparse import ArgumentParser
 from collections import OrderedDict
 
 import torch
 from adept.registries.environment import Engines
 from torch.nn import functional as F
 
 from adept.expcaches.rollout import RolloutCache
-from adept.utils.util import listd_to_dlist, parse_bool
-from ._base import Agent
+from adept.utils.util import listd_to_dlist
+from adept.agents.agent_plugin import AgentPlugin
 
 
-class ActorCritic(Agent):
+class ActorCritic(AgentPlugin):
+    defaults = {
+        'nb_rollout': 20,
+        'discount': 0.99,
+        'gae': True,
+        'tau': 1.,
+        'normalize_advantage': False,
+        'entropy_weight': 0.01
+    }
+
     def __init__(
         self,
         network,
@@ -39,7 +47,7 @@ def __init__(
         gae,
         tau,
         normalize_advantage,
-        entropy_weight=0.01
+        entropy_weight
     ):
         self.discount, self.gae, self.tau = discount, gae, tau
         self.normalize_advantage = normalize_advantage
@@ -71,51 +79,7 @@ def from_args(
         return cls(
             network, device, reward_normalizer, gpu_preprocessor, engine,
             action_space, args.nb_env, args.exp_length, args.discount,
-            args.generalized_advantage_estimation, args.tau,
-            args.normalize_advantage
-        )
-
-    @classmethod
-    def add_args(cls, parser: ArgumentParser):
-        parser.add_argument(
-            '-ae',
-            '--exp-length',
-            type=int,
-            default=20,
-            help='Experience length (default: 20)'
-        )
-        parser.add_argument(
-            '-ag',
-            '--generalized-advantage-estimation',
-            type=parse_bool,
-            nargs='?',
-            const=True,
-            default=True,
-            help='Use generalized advantage estimation for the policy loss.'
-                 '(default: True)'
-        )
-        parser.add_argument(
-            '-at',
-            '--tau',
-            type=float,
-            default=1.00,
-            help='parameter for GAE (default: 1.00)'
-        )
-        parser.add_argument(
-            '--entropy-weight',
-            type=float,
-            default=0.01,
-            help='Entropy penalty (default: 0.01)'
-        )
-        parser.add_argument(
-            '--normalize-advantage',
-            type=parse_bool,
-            nargs='?',
-            const=True,
-            default=False,
-            help=
-            'Normalize the advantage when calculating policy loss.'
-            '(default: False)'
+            args.gae, args.tau, args.normalize_advantage, args.entropy_weight
         )
 
     @property

diff --git a/adept/agents/_base.py → adept/agents/agent_plugin.py b/adept/agents/_base.py → adept/agents/agent_plugin.py
@@ -13,16 +13,16 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import abc
+from adept.utils.requires_args import RequiresArgs
 
-import torch
 
-
-class Agent(abc.ABC):
+class AgentPlugin(RequiresArgs, metaclass=abc.ABCMeta):
     """
     An Agent interacts with the environment and accumulates experience.
     """
 
-    @abc.abstractclassmethod
+    @classmethod
+    @abc.abstractmethod
     def from_args(
         cls, network, device, reward_normalizer, gpu_preprocessor, engine,
         action_space, args

diff --git a/adept/agents/impala/actor_critic_vtrace.py b/adept/agents/impala/actor_critic_vtrace.py
@@ -21,10 +21,10 @@
 from adept.expcaches.rollout import RolloutCache
 from adept.utils.util import listd_to_dlist, dlist_to_listd
 from adept.networks._base import ModularNetwork
-from .._base import Agent
+from adept.agents.agent_plugin import AgentPlugin
 
 
-class ActorCriticVtrace(Agent):
+class ActorCriticVtrace(AgentPlugin):
     def __init__(
         self,
         network,

diff --git a/adept/environments/__init__.py b/adept/environments/__init__.py
@@ -16,4 +16,4 @@
 from adept.environments.managers.subproc_env_manager import SubProcEnvManager
 from adept.environments.managers.simple_env_manager import SimpleEnvManager
 from adept.environments._metadata import EnvMetaData
-from adept.environments._env_plugin import EnvPlugin
+from adept.environments.env_plugin import EnvPlugin
diff --git a/adept/environments/deepmind_sc2.py b/adept/environments/deepmind_sc2.py
@@ -26,7 +26,7 @@
 )
 from pysc2.lib.static_data import UNIT_TYPES
 
-from adept.environments._env_plugin import EnvPlugin
+from adept.environments.env_plugin import EnvPlugin
 from adept.environments._spaces import Space, Spaces
 from adept.preprocess.observation import ObsPreprocessor
 from adept.preprocess.ops import BaseOp, FlattenSpace, CastToFloat

diff --git a/adept/environments/_env_plugin.py → adept/environments/env_plugin.py b/adept/environments/_env_plugin.py → adept/environments/env_plugin.py
@@ -13,25 +13,16 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import abc
-import json
 
-from adept.environments._env import EnvBase
+from adept.utils.requires_args import RequiresArgs
+from adept.environments._env import HasEnvMetaData
 
 
-class EnvPlugin(EnvBase, metaclass=abc.ABCMeta):
+class EnvPlugin(HasEnvMetaData, RequiresArgs, metaclass=abc.ABCMeta):
     """
     Implement this class to add your custom environment. Don't forget to
     implement defaults.
     """
-    defaults = None
-
-    @classmethod
-    def check_defaults(cls):
-        if cls.defaults is None:
-            raise NotImplementedError(
-                'Subclass must define class attribute: defaults'
-            )
-
     def __init__(self, action_space, cpu_preprocessor, gpu_preprocessor):
         """
         :param observation_space: ._spaces.Spaces
@@ -78,28 +69,3 @@ def cpu_preprocessor(self):
     @property
     def gpu_preprocessor(self):
         return self._gpu_preprocessor
-
-    @classmethod
-    def prompt(cls):
-        """
-        Display defaults as JSON, prompt user for changes.
-
-        :return: Dict[str, Any] Updated config dictionary.
-        """
-        if not cls.defaults:
-            return cls.defaults
-
-        user_input = input(
-            '\n{} Defaults:\n{}\nPress ENTER to use defaults. Otherwise, '
-            'modify JSON keys then press ENTER.\n'.format(
-                cls.__name__,
-                json.dumps(cls.defaults, indent=2, sort_keys=True)
-            )
-        )
-
-        # use defaults if no changes specified
-        if user_input == '':
-            return cls.defaults
-
-        updates = json.loads(user_input)
-        return {**cls.defaults, **updates}
diff --git a/adept/environments/openai_gym.py b/adept/environments/openai_gym.py
@@ -21,7 +21,7 @@
 from adept.preprocess.ops import (
     CastToFloat, GrayScaleAndMoveChannel, ResizeTo84x84, Divide255, FrameStack
 )
-from adept.environments._env_plugin import EnvPlugin
+from adept.environments.env_plugin import EnvPlugin
 from ._gym_wrappers import (
     NoopResetEnv, MaxAndSkipEnv, EpisodicLifeEnv, FireResetEnv
 )

diff --git a/tests/environments/__init__.py → adept/registries/agent.py b/tests/environments/__init__.py → adept/registries/agent.py
diff --git a/adept/registries/environment.py b/adept/registries/environment.py
@@ -13,7 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from enum import IntEnum
-from adept.environments._env_plugin import EnvPlugin
+from adept.environments import EnvPlugin
 
 
 class Engines(IntEnum):

diff --git a/adept/scripts/local.py b/adept/scripts/local.py
@@ -158,8 +158,11 @@ def main(args, env_registry=EnvPluginRegistry()):
     logger.info('Network Parameter Count: {}'.format(count_parameters(network)))
 
     # construct agent
-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    device = torch.device(
+        "cuda:{}".format(args.gpu_id)
+        if (torch.cuda.is_available() and args.gpu_id >= 0)
+        else "cpu"
+    )
     torch.backends.cudnn.benchmark = True
     agent = make_agent(
         network, device, env.gpu_preprocessor, env.engine, env.action_space,

diff --git a/adept/utils/requires_args.py b/adept/utils/requires_args.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2018 Heron Systems, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import json
+
+
+class RequiresArgs:
+    defaults = None
+
+    @classmethod
+    def check_defaults(cls):
+        if cls.defaults is None:
+            raise NotImplementedError(
+                'Subclass must define class attribute: defaults'
+            )
+
+    @classmethod
+    def prompt(cls):
+        """
+        Display defaults as JSON, prompt user for changes.
+
+        :return: Dict[str, Any] Updated config dictionary.
+        """
+        if not cls.defaults:
+            return cls.defaults
+
+        user_input = input(
+            '\n{} Defaults:\n{}\nPress ENTER to use defaults. Otherwise, '
+            'modify JSON keys then press ENTER.\n'.format(
+                cls.__name__,
+                json.dumps(cls.defaults, indent=2, sort_keys=True)
+            )
+        )
+
+        # use defaults if no changes specified
+        if user_input == '':
+            return cls.defaults
+
+        updates = json.loads(user_input)
+        return {**cls.defaults, **updates}
diff --git a/adept/utils/script_helpers.py b/adept/utils/script_helpers.py
@@ -36,7 +36,7 @@ def make_network(
     for rank, names in nbr.items():
         for name in names:
             if rank == 1:
-                pathways_by_name[name] = c_networks[args.network_discrete]\
+                pathways_by_name[name] = c_networks[args.net1d]\
                     .from_args(ebn[name].shape, args)
             elif rank == 2:
                 raise NotImplementedError('Rank 2 inputs not implemented')

diff --git a/tests/environments/test_env.py b/tests/environments/test_env.py
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,5 +15,3 @@

		from .actor_critic import ActorCritic
		from .impala import ActorCriticVtrace

		AGENTS = {'ActorCritic': ActorCritic, 'ActorCriticVtrace': ActorCriticVtrace}