Unity-Technologies · vincentpierre · Jan 16, 2020 · Jan 14, 2020 · Jan 15, 2020 · Jan 15, 2020
diff --git a/UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs b/UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
@@ -30,11 +30,6 @@ public override void OnInspectorGUI()
                 maxSteps,
                 new GUIContent(
                     "Max Step", "The per-agent maximum number of steps."));
-            EditorGUILayout.PropertyField(
-                isResetOnDone,
-                new GUIContent(
-                    "Reset On Done",
-                    "If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
             EditorGUILayout.PropertyField(
                 isOdd,
                 new GUIContent(

diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
@@ -11,7 +11,6 @@ public class TestAgent : Agent
         public int collectObservationsCalls;
         public int agentActionCalls;
         public int agentResetCalls;
-        public int agentOnDoneCalls;
         public override void InitializeAgent()
         {
             initializeAgentCalls += 1;
@@ -41,11 +40,6 @@ public override void AgentReset()
             agentResetCalls += 1;
         }
 
-        public override void AgentOnDone()
-        {
-            agentOnDoneCalls += 1;
-        }
-
         public override float[] Heuristic()
         {
             return new float[0];
@@ -460,6 +454,7 @@ public void TestAgent()
     [TestFixture]
     public class EditModeTestMiscellaneous
     {
+
         [SetUp]
         public void SetUp()
         {
@@ -469,73 +464,6 @@ public void SetUp()
             }
         }
 
-        [Test]
-        public void TestResetOnDone()
-        {
-            var agentGo1 = new GameObject("TestAgent");
-            agentGo1.AddComponent<TestAgent>();
-            var agent1 = agentGo1.GetComponent<TestAgent>();
-            var agentGo2 = new GameObject("TestAgent");
-            agentGo2.AddComponent<TestAgent>();
-            var agent2 = agentGo2.GetComponent<TestAgent>();
-
-            var aca = Academy.Instance;
-
-            var agentEnableMethod = typeof(Agent).GetMethod(
-                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-
-            agent1.agentParameters = new AgentParameters();
-            agent2.agentParameters = new AgentParameters();
-            // We use event based so the agent will now try to send anything to the brain
-            agent1.agentParameters.onDemandDecision = false;
-            // agent1 will take an action at every step and request a decision every steps
-            agent1.agentParameters.numberOfActionsBetweenDecisions = 1;
-            // agent2 will request decisions only when RequestDecision is called
-            agent2.agentParameters.onDemandDecision = true;
-            agent1.agentParameters.maxStep = 20;
-            //Here we specify that the agent does not reset when done
-            agent1.agentParameters.resetOnDone = false;
-            agent2.agentParameters.resetOnDone = false;
-
-            agentEnableMethod?.Invoke(agent2, new object[] { });
-            agentEnableMethod?.Invoke(agent1, new object[] { });
-
-            var agent1ResetOnDone = 0;
-            var agent2ResetOnDone = 0;
-            var agent1StepSinceReset = 0;
-            var agent2StepSinceReset = 0;
-
-            for (var i = 0; i < 50; i++)
-            {
-                Assert.AreEqual(i, aca.GetTotalStepCount());
-
-                Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
-                Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
-                Assert.AreEqual(agent1ResetOnDone, agent1.agentOnDoneCalls);
-                Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);
-
-                // we request a decision at each step
-                agent2.RequestDecision();
-                if (agent1ResetOnDone == 0)
-                    agent1StepSinceReset += 1;
-                if (agent2ResetOnDone == 0)
-                    agent2StepSinceReset += 1;
-
-                if ((i > 2) && (i % 21 == 0))
-                {
-                    agent1ResetOnDone = 1;
-                }
-
-                if (i == 31)
-                {
-                    agent2ResetOnDone = 1;
-                    agent2.Done();
-                }
-
-                aca.EnvironmentStep();
-            }
-        }
-
         [Test]
         public void TestCumulativeReward()
         {

diff --git a/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs b/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
@@ -84,10 +84,6 @@ public override float[] Heuristic()
         return new float[] { 0 };
     }
 
-    public override void AgentOnDone()
-    {
-    }
-
     public void FixedUpdate()
     {
         WaitTimeInference();

diff --git a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
@@ -67,9 +67,6 @@ public override void AgentReset()
         SetResetParameters();
     }
 
-    public override void AgentOnDone()
-    {
-    }
 
     void FixedUpdate()
     {

diff --git a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
@@ -271,10 +271,6 @@ void OnCollisionEnter(Collision collision)
         }
     }
 
-    public override void AgentOnDone()
-    {
-    }
-
     public void SetLaserLengths()
     {
         m_LaserLength = Academy.Instance.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);

diff --git a/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs b/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
@@ -121,8 +121,4 @@ void OnCollisionEnter(Collision collision)
             Done();
         }
     }
-
-    public override void AgentOnDone()
-    {
-    }
 }
diff --git a/UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs b/UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
@@ -14,8 +14,4 @@ public override void AgentAction(float[] vectorAction)
     public override void AgentReset()
     {
     }
-
-    public override void AgentOnDone()
-    {
-    }
 }
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
@@ -77,16 +77,6 @@ public class AgentParameters
         /// </remarks>
         public int maxStep;
 
-        /// <summary>
-        /// Determines the behaviour of the agent when done.
-        /// </summary>
-        /// <remarks>
-        /// If true, the agent will reset when done and start a new episode.
-        /// Otherwise, the agent will remain done and its behavior will be
-        /// dictated by the AgentOnDone method.
-        /// </remarks>
-        public bool resetOnDone = true;
-
         /// <summary>
         /// Whether to enable On Demand Decisions or make a decision at
         /// every step.
@@ -207,10 +197,6 @@ public AgentInfo Info
         /// done has not been communicated (required for On Demand Decisions).
         bool m_HasAlreadyReset;
 
-        /// Flag to signify that an agent is done and should not reset until
-        /// the fact that it is done has been communicated.
-        bool m_Terminate;
-
         /// Unique identifier each agent receives at initialization. It is used
         /// to separate between different agents in the environment.
         int m_Id;
@@ -281,9 +267,18 @@ void OnDisable()
                 Academy.Instance.AgentAct -= AgentStep;
                 Academy.Instance.AgentForceReset -= _AgentReset;
             }
+            NotifyAgentDone();
             m_Brain?.Dispose();
         }
 
+        void NotifyAgentDone()
+        {
+            m_Info.done = true;
+            // Request the last decision with no callbacks
+            // We request a decision so Python knows the Agent is disabled
+            m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
+        }
+
         /// <summary>
         /// Updates the Model for the agent. Any model currently assigned to the
         /// agent will be replaced with the provided one. If the arguments are
@@ -573,51 +568,6 @@ void UpdateSensors()
             }
         }
 
-        /// <summary>
-        /// Generate data for each sensor and store it in the observations input.
-        /// NOTE: At the moment, this is only called during training or when using a DemonstrationRecorder;
-        /// during inference the Sensors are used to write directly to the Tensor data. This will likely change in the
-        /// future to be controlled by the type of brain being used.
-        /// </summary>
-        /// <param name="sensors"> List of ISensors that will be used to generate the data.</param>
-        /// <param name="buffer"> A float array that will be used as buffer when generating the observations. Must
-        /// be at least the same length as the total number of uncompressed floats in the observations</param>
-        /// <param name="adapter"> The WriteAdapter that will be used to write the ISensor data to the observations</param>
-        /// <param name="observations"> A list of observations outputs. This argument will be modified by this method.</param>//
-        public static void GenerateSensorData(List<ISensor> sensors, float[] buffer, WriteAdapter adapter, List<Observation> observations)
-        {
-            int floatsWritten = 0;
-            // Generate data for all Sensors
-            for (var i = 0; i < sensors.Count; i++)
-            {
-                var sensor = sensors[i];
-                if (sensor.GetCompressionType() == SensorCompressionType.None)
-                {
-                    // TODO handle in communicator code instead
-                    adapter.SetTarget(buffer, sensor.GetObservationShape(), floatsWritten);
-                    var numFloats = sensor.Write(adapter);
-                    var floatObs = new Observation
-                    {
-                        FloatData = new ArraySegment<float>(buffer, floatsWritten, numFloats),
-                        Shape = sensor.GetObservationShape(),
-                        CompressionType = sensor.GetCompressionType()
-                    };
-                    observations.Add(floatObs);
-                    floatsWritten += numFloats;
-                }
-                else
-                {
-                    var compressedObs = new Observation
-                    {
-                        CompressedData = sensor.GetCompressedObservation(),
-                        Shape = sensor.GetObservationShape(),
-                        CompressionType = sensor.GetCompressionType()
-                    };
-                    observations.Add(compressedObs);
-                }
-            }
-        }
-
         /// <summary>
         /// Collects the (vector, visual) observations of the agent.
         /// The agent observation describes the current environment from the
@@ -791,15 +741,6 @@ public virtual void AgentAction(float[] vectorAction)
         {
         }
 
-        /// <summary>
-        /// Specifies the agent behavior when done and
-        /// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
-        /// used to remove the agent from the scene.
-        /// </summary>
-        public virtual void AgentOnDone()
-        {
-        }
-
         /// <summary>
         /// Specifies the agent behavior when being reset, which can be due to
         /// the agent or Academy being done (i.e. completion of local or global
@@ -889,29 +830,21 @@ void ResetIfDone()
             // request for a decision and an action
             if (IsDone())
             {
-                if (agentParameters.resetOnDone)
+                if (agentParameters.onDemandDecision)
                 {
-                    if (agentParameters.onDemandDecision)
-                    {
-                        if (!m_HasAlreadyReset)
-                        {
-                            // If event based, the agent can reset as soon
-                            // as it is done
-                            _AgentReset();
-                            m_HasAlreadyReset = true;
-                        }
-                    }
-                    else if (m_RequestDecision)
+                    if (!m_HasAlreadyReset)
                     {
-                        // If not event based, the agent must wait to request a
-                        // decision before resetting to keep multiple agents in sync.
+                        // If event based, the agent can reset as soon
+                        // as it is done
                         _AgentReset();
+                        m_HasAlreadyReset = true;
                     }
                 }
-                else
+                else if (m_RequestDecision)
                 {
-                    m_Terminate = true;
-                    RequestDecision();
+                    // If not event based, the agent must wait to request a
+                    // decision before resetting to keep multiple agents in sync.
+                    _AgentReset();
                 }
             }
         }
@@ -936,20 +869,6 @@ void SendInfo()
         /// Used by the brain to make the agent perform a step.
         void AgentStep()
         {
-            if (m_Terminate)
-            {
-                m_Terminate = false;
-                ResetReward();
-                m_Done = false;
-                m_MaxStepReached = false;
-                m_RequestDecision = false;
-                m_RequestAction = false;
-
-                m_HasAlreadyReset = false;
-                OnDisable();
-                AgentOnDone();
-            }
-
             if ((m_RequestAction) && (m_Brain != null))
             {
                 m_RequestAction = false;
-Original file line number
+Diff line change
@@ Expand Up / @@ -67,9 +67,6 @@ public override void AgentReset() @@
             SetResetParameters();
         }
-        public override void AgentOnDone()
-        {
-        }
         void FixedUpdate()
         {
@@ Expand Down @@