Hotfix on Gym and the notebooks (#3496)

vincentpierre · chriselion · web-flow · commit eb2a4a19450c · 2020-02-25T17:24:05.000-08:00
* Hotfix on Gym and the notebooks

* REMOVING THE PROTO GENERATION TESTS

* REMOVING THE PROTO GENERATION TESTS

* Update notebooks/getting-started-gym.ipynb

Co-Authored-By: Chris Elion &lt;chris.elion@unity3d.com&gt;

* Trying to enforce nuget version

* Update notebooks/getting-started-gym.ipynb

Co-Authored-By: Chris Elion &lt;chris.elion@unity3d.com&gt;

* Removing proto_generation

* Update notebooks/getting-started-gym.ipynb

Co-Authored-By: Chris Elion &lt;chris.elion@unity3d.com&gt;

* Update docstring

Co-authored-by: Chris Elion &lt;celion@gmail.com&gt;
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -273,7 +273,7 @@ workflows:
           # Test python 3.7 with the newest supported versions
           pip_constraints: test_constraints_max_tf2_version.txt
       - markdown_link_check
-      - protobuf_generation_check
+      #- protobuf_generation_check
       - pre-commit
       - deploy:
           name: deploy ml-agents-envs
diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
@@ -164,7 +164,7 @@ def reset(self) -> Union[List[np.ndarray], np.ndarray]:
         """Resets the state of the environment and returns an initial observation.
         In the case of multi-agent environments, this is a list.
         Returns: observation (object/list): the initial observation of the
-            space.
+        space.
         """
         step_result = self._step(True)
         n_agents = step_result.n_agents()
@@ -250,7 +250,7 @@ def _single_step(self, info: BatchedStepResult) -> GymSingleStepResult:
             default_observation = self._get_vector_obs(info)[0, :]
         else:
             raise UnityGymException(
-                "The Agent does not have vector observations and the environment was not setup"
+                "The Agent does not have vector observations and the environment was not setup "
                 + "to use visual observations."
             )
 
@@ -345,12 +345,12 @@ def seed(self, seed: Any = None) -> None:
     def _check_agents(self, n_agents: int) -> None:
         if not self._multiagent and n_agents > 1:
             raise UnityGymException(
-                "The environment was launched as a single-agent environment, however"
+                "The environment was launched as a single-agent environment, however "
                 "there is more than one agent in the scene."
             )
         elif self._multiagent and n_agents <= 1:
             raise UnityGymException(
-                "The environment was launched as a mutli-agent environment, however"
+                "The environment was launched as a mutli-agent environment, however "
                 "there is only one agent in the scene."
             )
         if self._n_agents == -1:
@@ -438,7 +438,7 @@ def _step(self, needs_reset: bool = False) -> BatchedStepResult:
         while info.n_agents() - sum(info.done) < self._n_agents:
             if not info.done.all():
                 raise UnityGymException(
-                    "The environment does not have the expected amount of agents."
+                    "The environment does not have the expected amount of agents. "
                     + "Some agents did not request decisions at the same time."
                 )
             for agent_id, reward in zip(info.agent_id, info.reward):
@@ -543,15 +543,16 @@ def register_new_agent_id(self, agent_id: int) -> float:
     def get_id_permutation(self, agent_ids: List[int]) -> List[int]:
         """
         Get the permutation from new agent ids to the order that preserves the positions of previous agents.
-        The result is a list with each integer from 0 to len(agent_ids)-1 appearing exactly once.
+        The result is a list with each integer from 0 to len(_agent_id_to_gym_index)-1
+        appearing exactly once.
         """
         # Map the new agent ids to the their index
         new_agent_ids_to_index = {
             agent_id: idx for idx, agent_id in enumerate(agent_ids)
         }
 
         # Make the output list. We don't write to it sequentially, so start with dummy values.
-        new_permutation = [-1] * len(agent_ids)
+        new_permutation = [-1] * len(self._agent_id_to_gym_index)
 
         # For each agent ID, find the new index of the agent, and write it in the original index.
         for agent_id, original_index in self._agent_id_to_gym_index.items():
diff --git a/notebooks/getting-started-gym.ipynb b/notebooks/getting-started-gym.ipynb
@@ -15,14 +15,33 @@
    "source": [
     "## Single-Agent Environments\n",
     "\n",
-    "The first five steps show how to use the `UnityEnv` wrapper with single-agent environments. See below step five for how to use with multi-agent environments."
+    "These steps show how to use the `UnityEnv` wrapper with single-agent environments. See section below for how to use with multi-agent environments."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 1. Load dependencies\n",
+    "### 1.Set environment parameters\n",
+    "Be sure to set `env_name` to the name of the Unity environment file you want to launch.\n",
+    "\n",
+    "__Note__ : All the example environments have multiple agents by default. You can disable the duplicates in the inspector before building the executable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env_name = \"../envs/GridWorldSingle\"  # Name of the Unity environment binary to launch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Load dependencies\n",
     "\n",
     "The following loads the necessary dependencies and checks the Python version (at runtime). ML-Agents Toolkit (v0.3 onwards) requires Python 3."
    ]
@@ -53,8 +72,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2. Start the environment\n",
-    "`UnityEnv` launches and begins communication with the environment when instantiated. We will be using the `GridWorld` environment. You will need to create an `envs` directory within the  `/python` subfolder of the repository, and build the GridWorld environment to that directory. For more information on building Unity environments, see [here](../docs/Learning-Environment-Executable.md)."
+    "### 3. Start the environment\n",
+    "`UnityEnv` launches and begins communication with the environment when instantiated. We will be using the `GridWorld` environment. You will need to create an `envs` directory in the root directory of the repository, and build the GridWorld environment to that directory. For more information on building Unity environments, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Executable.md)."
    ]
   },
   {
@@ -63,7 +82,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch\n",
     "env = UnityEnv(env_name, worker_id=0, use_visual=True)\n",
     "\n",
     "# Examine environment parameters\n",
@@ -74,7 +92,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 3. Examine the observation and state spaces\n",
+    "### 4. Examine the observation and state spaces\n",
     "We can reset the environment to be provided with an initial observation of the environment."
    ]
   },
@@ -103,7 +121,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 4. Take random actions in the environment\n",
+    "### 5. Take random actions in the environment\n",
     "Once we restart an environment, we can step the environment forward and provide actions to all of the agents within the environment. Here we simply choose random actions using the `env.action_space.sample()` function.\n",
     "\n",
     "Once this cell is executed, 10 messages will be printed that detail how much reward will be accumulated for the next 10 episodes. The Unity environment will then pause, waiting for further signals telling it what to do next. Thus, not seeing any animation is expected when running this cell."
@@ -129,7 +147,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 5. Close the environment when finished\n",
+    "### 6. Close the environment when finished\n",
     "When we are finished using an environment, we can close it with the function below."
    ]
   },
diff --git a/notebooks/getting-started.ipynb b/notebooks/getting-started.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "### 1. Set environment parameters\n",
     "\n",
-    "Be sure to set `env_name` to the name of the Unity environment file you want to launch. Ensure that the environment build is in `../envs`."
+    "Be sure to set `env_name` to the name of the Unity environment file you want to launch. "
    ]
   },
   {
@@ -24,8 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch\n",
-    "train_mode = True  # Whether to run the environment in training or inference mode"
+    "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch"
    ]
   },
   {
@@ -77,7 +76,7 @@
    "outputs": [],
    "source": [
     "engine_configuration_channel = EngineConfigurationChannel()\n",
-    "env = UnityEnvironment(base_port = UnityEnvironment.DEFAULT_EDITOR_PORT, file_name=env_name, side_channels = [engine_configuration_channel])\n",
+    "env = UnityEnvironment(file_name=env_name, side_channels = [engine_configuration_channel])\n",
     "\n",
     "#Reset the environment\n",
     "env.reset()\n",
@@ -110,22 +109,19 @@
     "# Examine the number of observations per Agent\n",
     "print(\"Number of observations : \", len(group_spec.observation_shapes))\n",
     "\n",
-    "# Examine the state space for the first observation for all agents\n",
-    "print(\"Agent state looks like: \\n{}\".format(step_result.obs[0]))\n",
-    "\n",
-    "# Examine the state space for the first observation for the first agent\n",
-    "print(\"Agent state looks like: \\n{}\".format(step_result.obs[0][0]))\n",
-    "\n",
     "# Is there a visual observation ?\n",
     "vis_obs = any([len(shape) == 3 for shape in group_spec.observation_shapes])\n",
     "print(\"Is there a visual observation ?\", vis_obs)\n",
     "\n",
     "# Examine the visual observations\n",
     "if vis_obs:\n",
     "    vis_obs_index = next(i for i,v in enumerate(group_spec.observation_shapes) if len(v) == 3)\n",
-    "    print(\"Agent visual observation look like:\")\n",
+    "    print(\"Agent visual observation looks like:\")\n",
     "    obs = step_result.obs[vis_obs_index]\n",
-    "    plt.imshow(obs[0,:,:,:])\n"
+    "    plt.imshow(obs[0,:,:,:])\n",
+    "else:\n",
+    "    # Examine the state space for the first observation for the first agent\n",
+    "    print(\"First Agent observation looks like: \\n{}\".format(step_result.obs[0][0]))"
    ]
   },
   {
@@ -181,13 +177,6 @@
    "source": [
     "env.close()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {