heronsystems
diff --git a/‎README.md
+7-13 b/‎README.md
+7-13
diff --git a/‎adept/agents/actor_critic.py
+3-5 b/‎adept/agents/actor_critic.py
+3-5
diff --git a/‎adept/agents/agent_module.py
+1-1 b/‎adept/agents/agent_module.py
+1-1
diff --git a/‎adept/agents/agent_registry.py
+3-3 b/‎adept/agents/agent_registry.py
+3-3
diff --git a/‎adept/agents/impala/actor_critic_vtrace.py
+12-37 b/‎adept/agents/impala/actor_critic_vtrace.py
+12-37
diff --git a/‎adept/app.py
+2-2 b/‎adept/app.py
+2-2
diff --git a/‎adept/environments/_spaces.py
+43-26 b/‎adept/environments/_spaces.py
+43-26
@@ -100,30 +100,24 @@ training frames, since we are interested in sample efficiency.
 
 ## API Reference
 ![architecture](images/architecture.png)
+### Containers
+Containers hold all of the application state. Each subprocess gets a container 
+in Towered and IMPALA modes.
 ### Agents
 An Agent acts on and observes the environment.
 Currently only ActorCritic is supported. Other agents, such as DQN or ACER may 
 be added later.
-### Containers
-Containers hold all of the application state. Each subprocess gets a container 
-in Towered and IMPALA modes.
+### Networks
+Networks are not PyTorch modules, they need to implement our abstract 
+NetworkModule or ModularNetwork classes. A ModularNetwork consists of a 
+source nets, body, and heads.
 ### Environments
 Environments run in subprocesses and send their observation, rewards,
 terminals, and infos to the host process. They work pretty much the same way as 
 OpenAI's code.
 ### Experience Caches
 An Experience Cache is a Rollout or Experience Replay that is written to after 
 stepping and read before learning.
-### Modules
-Modules are generally useful PyTorch modules used in Networks.
-### Networks
-Networks are not PyTorch modules, they need to implement our abstract 
-NetworkInterface or ModularNetwork classes. A ModularNetwork consists of a 
-trunk, body, and head. The Trunk can consist of multiple networks for vision 
-or discrete data. It flattens these into an embedding. The Body network 
-operates on the flattened embedding and would typically be an LSTM, Linear 
-layer, or a combination. The Head depends on the Environment and Agent and is 
-created accordingly.
 
 ## Acknowledgements
 We borrow pieces of OpenAI's [gym](https://github.com/openai/gym) and 
 
@@ -65,7 +65,7 @@ def __init__(
         )
         self._device = device
         self.action_space = action_space
-        self._action_keys = list(sorted(action_space.entries_by_name.keys()))
+        self._action_keys = list(sorted(action_space.keys()))
         self._func_id_to_headnames = None
         if self.engine == Engines.SC2:
             from adept.environments.deepmind_sc2 import SC2ActionLookup
@@ -112,10 +112,8 @@ def internals(self, new_internals):
         self._internals = new_internals
 
     @staticmethod
-    def output_shape(action_space):
-        ebn = action_space.entries_by_name
-        actor_outputs = {name: entry.shape[0] for name, entry in ebn.items()}
-        head_dict = {'critic': 1, **actor_outputs}
+    def output_space(action_space):
+        head_dict = {'critic': (1, ), **action_space}
         return head_dict
 
     def act(self, obs):
 
@@ -60,7 +60,7 @@ def internals(self, new_internals):
 
     @staticmethod
     @abc.abstractmethod
-    def output_shape(action_space):
+    def output_space(action_space):
         raise NotImplementedError
 
     @abc.abstractmethod
 
@@ -47,7 +47,7 @@ def register_agent(self, agent_id, agent_class):
         :return:
         """
         assert issubclass(agent_class, AgentModule)
-        agent_class.check_defaults()
+        agent_class.check_args_implemented()
         self._agent_class_by_id[agent_id] = agent_class
 
     def lookup_agent(self, agent_id):
@@ -59,12 +59,12 @@ def lookup_agent(self, agent_id):
         """
         return self._agent_class_by_id[agent_id]
 
-    def lookup_output_shape(self, agent_id, action_space):
+    def lookup_output_space(self, agent_id, action_space):
         """
         For a given agent_id, determine the shapes of the outputs.
 
         :param agent_id: str
         :param action_space:
         :return:
         """
-        return self._agent_class_by_id[agent_id].output_shape(action_space)
+        return self._agent_class_by_id[agent_id].output_space(action_space)
@@ -12,19 +12,21 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-# Use https://github.com/deepmind/scalable_agent/blob/master/vtrace.py for reference
 from collections import OrderedDict
 import torch
 from torch.nn import functional as F
 
 from adept.environments.env_registry import Engines
 from adept.expcaches.rollout import RolloutCache
 from adept.utils.util import listd_to_dlist, dlist_to_listd
-from adept.networks._base import ModularNetwork
 from adept.agents.agent_module import AgentModule
 
 
 class ActorCriticVtrace(AgentModule):
+    """
+    Reference implementation:
+    Use https://github.com/deepmind/scalable_agent/blob/master/vtrace.py
+    """
     args = {
         'nb_rollout': 20,
         'discount': 0.99,
@@ -65,7 +67,7 @@ def __init__(
         )
         self._device = device
         self.action_space = action_space
-        self._action_keys = list(sorted(action_space.entries_by_name.keys()))
+        self._action_keys = list(sorted(action_space.keys()))
         self._func_id_to_headnames = None
         if self.engine == Engines.SC2:
             from adept.environments.deepmind_sc2 import SC2ActionLookup
@@ -111,11 +113,8 @@ def internals(self, new_internals):
         self._internals = new_internals
 
     @staticmethod
-    def output_shape(action_space):
-        ebn = action_space.entries_by_name
-        actor_outputs = {name: entry.shape[0] for name, entry in ebn.items()}
-        head_dict = {'critic': 1, **actor_outputs}
-        return head_dict
+    def output_space(action_space):
+        return {'critic': (1, ), **action_space}
 
     def seq_obs_to_pathways(self, obs, device):
         """
@@ -287,40 +286,16 @@ def act_on_host(
         log_probs_of_action = []
         entropies = []
 
-        seq_len, batch_size = terminal_masks.shape
-
         # if network is modular,
         # trunk can be sped up by combining batch & seq dim
         def get_results_generator():
-            if isinstance(self.network, ModularNetwork):
-                pathway_dict = self.gpu_preprocessor(obs, self.device)
-                # flatten obs
-                flat_obs = {
-                    k: v.view(-1, *v.shape[2:])
-                    for k, v in pathway_dict.items()
-                }
-                embeddings = self.network.trunk.forward(flat_obs)
-                # add back in seq dim
-                seq_embeddings = embeddings.view(
-                    seq_len, batch_size, embeddings.shape[-1]
-                )
-
-                def get_results(seq_ind, internals):
-                    embedding = seq_embeddings[seq_ind]
-                    pre_result, internals = self.network.body.forward(
-                        embedding, internals
-                    )
-                    return self.network.head.forward(pre_result, internals)
-
-                return get_results
-            else:
-                obs_on_device = self.seq_obs_to_pathways(obs, self.device)
+            obs_on_device = self.seq_obs_to_pathways(obs, self.device)
 
-                def get_results(seq_ind, internals):
-                    obs_of_seq_ind = obs_on_device[seq_ind]
-                    return self.network(obs_of_seq_ind, internals)
+            def get_results(seq_ind, internals):
+                obs_of_seq_ind = obs_on_device[seq_ind]
+                return self.network(obs_of_seq_ind, internals)
 
-                return get_results
+            return get_results
 
         result_fn = get_results_generator()
         for seq_ind in range(terminal_masks.shape[0]):
 
@@ -55,7 +55,7 @@ def parse_args():
         exit(call(['python', '-m', 'adept.scripts.local'] + argv, env=env))
     elif args['<command>'] == 'towered':
         nb_mpi_proc = input('Enter number of GPU workers [default: 2]\n')
-        nb_mpi_proc = 2 if not nb_mpi_proc else nb_mpi_proc
+        nb_mpi_proc = 2 if not nb_mpi_proc else int(nb_mpi_proc)
         exit(call([
             'mpiexec',
             '-n',
@@ -66,7 +66,7 @@ def parse_args():
         ] + argv, env=env))
     elif args['<command>'] == 'impala':
         nb_mpi_proc = input('Enter number of GPU workers [default: 2]\n')
-        nb_mpi_proc = 2 if not nb_mpi_proc else nb_mpi_proc
+        nb_mpi_proc = 2 if not nb_mpi_proc else int(nb_mpi_proc)
         exit(call([
               'mpiexec',
               '-n',
 
@@ -12,45 +12,62 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
-from collections.__init__ import namedtuple
-
-import numpy as np
 from gym import spaces
 
-Space = namedtuple('Space', ['shape', 'low', 'high', 'dtype'])
-
 
-class Spaces:
+class Space(dict):
     def __init__(self, entries_by_name):
-        self.entries_by_name = entries_by_name
-        self.names_by_rank = {1: [], 2: [], 3: [], 4: []}
-        for name, entry in entries_by_name.items():
-            self.names_by_rank[len(entry.shape)].append(name)
+        super(Space, self).__init__(entries_by_name)
 
     @classmethod
     def from_gym(cls, gym_space):
-        entries_by_name = Spaces._detect_gym_spaces(gym_space)
+        entries_by_name = Space._detect_gym_spaces(gym_space)
         return cls(entries_by_name)
 
     @staticmethod
-    def _detect_gym_spaces(space):
-        if isinstance(space, spaces.Discrete):
-            return {'Discrete': Space([space.n], 0, 1, np.float32)}
-        elif isinstance(space, spaces.MultiDiscrete):
+    def _detect_gym_spaces(gym_space):
+        if isinstance(gym_space, spaces.Discrete):
+            return {'Discrete': (gym_space.n,)}
+        elif isinstance(gym_space, spaces.MultiDiscrete):
             raise NotImplementedError
-        elif isinstance(space, spaces.MultiBinary):
-            return {'MultiBinary': Space([space.n], 0, 1, space.dtype)}
-        elif isinstance(space, spaces.Box):
+        elif isinstance(gym_space, spaces.MultiBinary):
+            return {'MultiBinary': (gym_space.n,)}
+        elif isinstance(gym_space, spaces.Box):
             return {
-                'Box': Space(space.shape, 0., 255., space.dtype)
-            }  # TODO, is it okay to hardcode 0, 255
-        elif isinstance(space, spaces.Dict):
+                'Box': gym_space.shape
+            }
+        elif isinstance(gym_space, spaces.Dict):
             return {
-                name: list(Spaces._detect_gym_spaces(s).values())[0]
-                for name, s in space.spaces.items()
+                name: list(Space._detect_gym_spaces(s).values())[0]
+                for name, s in gym_space.spaces.items()
             }
-        elif isinstance(space, spaces.Tuple):
+        elif isinstance(gym_space, spaces.Tuple):
             return {
-                idx: list(Spaces._detect_gym_spaces(s).values())[0]
-                for idx, s in enumerate(space.spaces)
+                idx: list(Space._detect_gym_spaces(s).values())[0]
+                for idx, s in enumerate(gym_space.spaces)
             }
+
+    @staticmethod
+    def dtypes_from_gym(gym_space):
+        if isinstance(gym_space, spaces.Discrete):
+            return {'Discrete': gym_space.dtype}
+        elif isinstance(gym_space, spaces.MultiDiscrete):
+            raise NotImplementedError
+        elif isinstance(gym_space, spaces.MultiBinary):
+            return {'MultiBinary': gym_space.dtype}
+        elif isinstance(gym_space, spaces.Box):
+            return {
+                'Box': gym_space.dtype
+            }
+        elif isinstance(gym_space, spaces.Dict):
+            return {
+                name: Space.dtypes_from_gym(s)
+                for name, s in gym_space.spaces.items()
+            }
+        elif isinstance(gym_space, spaces.Tuple):
+            return {
+                idx: Space.dtypes_from_gym(s)
+                for idx, s in enumerate(gym_space.spaces)
+            }
+        else:
+            raise NotImplementedError