Unity-Technologies · chriselion · May 15, 2020 · May 13, 2020 · May 13, 2020 · May 12, 2020
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to
 
 ## [1.0.1-preview] - 2020-05-19
 ### Bug Fixes
+- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 

diff --git a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
@@ -124,7 +124,14 @@ static void DrawVectorAction(Rect position, SerializedProperty property)
         static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
         {
             var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = 1;
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (vecActionSize.arraySize != 1)
+            {
+                vecActionSize.arraySize = 1;
+            }
             var continuousActionSize =
                 vecActionSize.GetArrayElementAtIndex(0);
             EditorGUI.PropertyField(
@@ -142,8 +149,17 @@ static void DrawContinuousVectorAction(Rect position, SerializedProperty propert
         static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
         {
             var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = EditorGUI.IntField(
+            var newSize = EditorGUI.IntField(
                 position, "Branches Size", vecActionSize.arraySize);
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (newSize != vecActionSize.arraySize)
+            {
+                vecActionSize.arraySize = newSize;
+            }
+
             position.y += k_LineHeight;
             position.x += 20;
             position.width -= 20;

diff --git a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
@@ -26,10 +26,11 @@ public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
         {
             var agentInfoProto = ai.ToAgentInfoProto();
 
-            var agentActionProto = new AgentActionProto
+            var agentActionProto = new AgentActionProto();
+            if(ai.storedVectorActions != null)
             {
-                VectorActions = { ai.storedVectorActions }
-            };
+                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+            }
 
             return new AgentInfoActionPairProto
             {
@@ -95,12 +96,14 @@ public static BrainParametersProto ToProto(this BrainParameters bp, string name,
             var brainParametersProto = new BrainParametersProto
             {
                 VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType =
-                    (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
                 BrainName = name,
                 IsTraining = isTraining
             };
-            brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            if(bp.VectorActionDescriptions != null)
+            {
+                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            }
             return brainParametersProto;
         }
 
@@ -128,13 +131,14 @@ public static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
         /// </summary>
         public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
         {
+            var demonstrationName = dm.demonstrationName ?? "";
             var demoProto = new DemonstrationMetaProto
             {
                 ApiVersion = DemonstrationMetaData.ApiVersion,
                 MeanReward = dm.meanReward,
                 NumberSteps = dm.numberSteps,
                 NumberEpisodes = dm.numberEpisodes,
-                DemonstrationName = dm.demonstrationName
+                DemonstrationName = demonstrationName
             };
             return demoProto;
         }

diff --git a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
@@ -0,0 +1,37 @@
+using NUnit.Framework;
+using UnityEngine;
+using Unity.MLAgents.Policies;
+using Unity.MLAgents.Demonstrations;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Tests
+{
+    [TestFixture]
+    public class GrpcExtensionsTests
+    {
+        [Test]
+        public void TestDefaultBrainParametersToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var brain = new BrainParameters();
+            brain.ToProto("foo", false);
+        }
+
+        [Test]
+        public void TestDefaultAgentInfoToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var agentInfo = new AgentInfo();
+            agentInfo.ToInfoActionPairProto();
+            agentInfo.ToAgentInfoProto();
+        }
+
+        [Test]
+        public void TestDefaultDemonstrationMetaDataToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var demoMetaData = new DemonstrationMetaData();
+            demoMetaData.ToProto();
+        }
+    }
+}
diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -13,8 +13,10 @@ internal class TestPolicy : IPolicy
     {
         public Action OnRequestDecision;
         ObservationWriter m_ObsWriter = new ObservationWriter();
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
-            foreach(var sensor in sensors){
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
+        {
+            foreach (var sensor in sensors)
+            {
                 sensor.GetObservationProto(m_ObsWriter);
             }
             OnRequestDecision?.Invoke();
@@ -517,8 +519,10 @@ public void AssertStackingReset()
             agent1.SetPolicy(policy);
 
             StackingSensor sensor = null;
-            foreach(ISensor s in agent1.sensors){
-                if (s is  StackingSensor){
+            foreach (ISensor s in agent1.sensors)
+            {
+                if (s is  StackingSensor)
+                {
                     sensor = s as StackingSensor;
                 }
             }
@@ -529,7 +533,6 @@ public void AssertStackingReset()
             {
                 agent1.RequestDecision();
                 aca.EnvironmentStep();
-
             }
 
             policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});

diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
@@ -445,8 +445,8 @@ Below is a list of included `sampler-type` as part of the toolkit.
     `interval_2_max`], ...]
   - **sub-arguments** - `intervals`
 
-The implementation of the samplers can be found at
-`ml-agents-envs/mlagents_envs/sampler_class.py`.
+The implementation of the samplers can be found in the
+[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).
 
 #### Defining a New Sampler Type
 

diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md
@@ -21,10 +21,8 @@ session running on port 6006 a new session can be launched on an open port using
 the --port option.
 
 **Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
-default string, "ppo". All the statistics will be saved to the same sub-folder
-and displayed as one session in TensorBoard. After a few runs, the displays can
-become difficult to interpret in this situation. You can delete the folders
-under the `summaries` directory to clear out old statistics.
+default string, "ppo". You can delete the folders under the `results` directory
+to clear out old statistics.
 
 On the left side of the TensorBoard window, you can select which of the training
 runs you want to display. You can select multiple run-ids to compare statistics.

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
@@ -53,7 +53,6 @@ def __init__(
             self._env.step()
 
         self.visual_obs = None
-        self._n_agents = -1
 
         # Save the step result from the last time all Agents requested decisions.
         self._previous_decision_step: DecisionSteps = None
@@ -172,6 +171,7 @@ def step(self, action: List[Any]) -> GymStepResult:
 
         self._env.step()
         decision_step, terminal_step = self._env.get_steps(self.name)
+        self._check_agents(max(len(decision_step), len(terminal_step)))
         if len(terminal_step) != 0:
             # The agent is done
             self.game_over = True
@@ -264,10 +264,11 @@ def seed(self, seed: Any = None) -> None:
         logger.warning("Could not seed environment %s", self.name)
         return
 
-    def _check_agents(self, n_agents: int) -> None:
-        if self._n_agents > 1:
+    @staticmethod
+    def _check_agents(n_agents: int) -> None:
+        if n_agents > 1:
             raise UnityGymException(
-                "There can only be one Agent in the environment but {n_agents} were detected."
+                f"There can only be one Agent in the environment but {n_agents} were detected."
             )
 
     @property
@@ -290,10 +291,6 @@ def action_space(self):
     def observation_space(self):
         return self._observation_space
 
-    @property
-    def number_agents(self):
-        return self._n_agents
-
 
 class ActionFlattener:
     """

diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py
@@ -415,9 +415,9 @@ def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
         expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
         if action.shape != expected_shape:
             raise UnityActionException(
-                "The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
-                    behavior_name, expected_shape, action.shape
-                )
+                "The behavior {0} needs an input of dimension {1} for "
+                "(<number of agents>, <action size>) but received input of "
+                "dimension {2}".format(behavior_name, expected_shape, action.shape)
             )
         if action.dtype != expected_type:
             action = action.astype(expected_type)

diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -137,6 +137,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None
                     )
                 )
             if reset_global_steps:
+                self._set_step(0)
                 logger.info(
                     "Starting training from step 0 and saving to {}.".format(
                         self.model_path

diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
@@ -86,6 +86,7 @@ def test_load_save(dummy_config, tmp_path):
     trainer_params["model_path"] = path1
     policy = create_policy_mock(trainer_params)
     policy.initialize_or_load()
+    policy._set_step(2000)
     policy.save_model(2000)
 
     assert len(os.listdir(tmp_path)) > 0
@@ -94,6 +95,7 @@ def test_load_save(dummy_config, tmp_path):
     policy2 = create_policy_mock(trainer_params, load=True, seed=1)
     policy2.initialize_or_load()
     _compare_two_policies(policy, policy2)
+    assert policy2.get_current_step() == 2000
 
     # Try initialize from path 1
     trainer_params["model_path"] = path2
@@ -102,6 +104,8 @@ def test_load_save(dummy_config, tmp_path):
     policy3.initialize_or_load()
 
     _compare_two_policies(policy2, policy3)
+    # Assert that the steps are 0.
+    assert policy3.get_current_step() == 0
 
 
 def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None: