Skip to content

Always reset when agent is done #3222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ public override void OnInspectorGUI()
maxSteps,
new GUIContent(
"Max Step", "The per-agent maximum number of steps."));
EditorGUILayout.PropertyField(
isResetOnDone,
new GUIContent(
"Reset On Done",
"If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
EditorGUILayout.PropertyField(
isOdd,
new GUIContent(
Expand Down
74 changes: 1 addition & 73 deletions UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public class TestAgent : Agent
public int collectObservationsCalls;
public int agentActionCalls;
public int agentResetCalls;
public int agentOnDoneCalls;
public override void InitializeAgent()
{
initializeAgentCalls += 1;
Expand Down Expand Up @@ -41,11 +40,6 @@ public override void AgentReset()
agentResetCalls += 1;
}

public override void AgentOnDone()
{
agentOnDoneCalls += 1;
}

public override float[] Heuristic()
{
return new float[0];
Expand Down Expand Up @@ -460,6 +454,7 @@ public void TestAgent()
[TestFixture]
public class EditModeTestMiscellaneous
{

[SetUp]
public void SetUp()
{
Expand All @@ -469,73 +464,6 @@ public void SetUp()
}
}

[Test]
public void TestResetOnDone()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var agentGo2 = new GameObject("TestAgent");
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

var aca = Academy.Instance;

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);

agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
// agent1 will take an action at every step and request a decision every steps
agent1.agentParameters.numberOfActionsBetweenDecisions = 1;
// agent2 will request decisions only when RequestDecision is called
agent2.agentParameters.onDemandDecision = true;
agent1.agentParameters.maxStep = 20;
//Here we specify that the agent does not reset when done
agent1.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;

agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });

var agent1ResetOnDone = 0;
var agent2ResetOnDone = 0;
var agent1StepSinceReset = 0;
var agent2StepSinceReset = 0;

for (var i = 0; i < 50; i++)
{
Assert.AreEqual(i, aca.GetTotalStepCount());

Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(agent1ResetOnDone, agent1.agentOnDoneCalls);
Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);

// we request a decision at each step
agent2.RequestDecision();
if (agent1ResetOnDone == 0)
agent1StepSinceReset += 1;
if (agent2ResetOnDone == 0)
agent2StepSinceReset += 1;

if ((i > 2) && (i % 21 == 0))
{
agent1ResetOnDone = 1;
}

if (i == 31)
{
agent2ResetOnDone = 1;
agent2.Done();
}

aca.EnvironmentStep();
}
}

[Test]
public void TestCumulativeReward()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ public override float[] Heuristic()
return new float[] { 0 };
}

public override void AgentOnDone()
{
}

public void FixedUpdate()
{
WaitTimeInference();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ public override void AgentReset()
SetResetParameters();
}

public override void AgentOnDone()
{
}

void FixedUpdate()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,6 @@ void OnCollisionEnter(Collision collision)
}
}

public override void AgentOnDone()
{
}

public void SetLaserLengths()
{
m_LaserLength = Academy.Instance.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,4 @@ void OnCollisionEnter(Collision collision)
Done();
}
}

public override void AgentOnDone()
{
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,4 @@ public override void AgentAction(float[] vectorAction)
public override void AgentReset()
{
}

public override void AgentOnDone()
{
}
}
117 changes: 18 additions & 99 deletions UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,6 @@ public class AgentParameters
/// </remarks>
public int maxStep;

/// <summary>
/// Determines the behaviour of the agent when done.
/// </summary>
/// <remarks>
/// If true, the agent will reset when done and start a new episode.
/// Otherwise, the agent will remain done and its behavior will be
/// dictated by the AgentOnDone method.
/// </remarks>
public bool resetOnDone = true;

/// <summary>
/// Whether to enable On Demand Decisions or make a decision at
/// every step.
Expand Down Expand Up @@ -207,10 +197,6 @@ public AgentInfo Info
/// done has not been communicated (required for On Demand Decisions).
bool m_HasAlreadyReset;

/// Flag to signify that an agent is done and should not reset until
/// the fact that it is done has been communicated.
bool m_Terminate;

/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
int m_Id;
Expand Down Expand Up @@ -281,9 +267,18 @@ void OnDisable()
Academy.Instance.AgentAct -= AgentStep;
Academy.Instance.AgentForceReset -= _AgentReset;
}
NotifyAgentDone();
m_Brain?.Dispose();
}

void NotifyAgentDone()
{
m_Info.done = true;
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is disabled
m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
}

/// <summary>
/// Updates the Model for the agent. Any model currently assigned to the
/// agent will be replaced with the provided one. If the arguments are
Expand Down Expand Up @@ -573,51 +568,6 @@ void UpdateSensors()
}
}

/// <summary>
/// Generate data for each sensor and store it in the observations input.
/// NOTE: At the moment, this is only called during training or when using a DemonstrationRecorder;
/// during inference the Sensors are used to write directly to the Tensor data. This will likely change in the
/// future to be controlled by the type of brain being used.
/// </summary>
/// <param name="sensors"> List of ISensors that will be used to generate the data.</param>
/// <param name="buffer"> A float array that will be used as buffer when generating the observations. Must
/// be at least the same length as the total number of uncompressed floats in the observations</param>
/// <param name="adapter"> The WriteAdapter that will be used to write the ISensor data to the observations</param>
/// <param name="observations"> A list of observations outputs. This argument will be modified by this method.</param>//
public static void GenerateSensorData(List<ISensor> sensors, float[] buffer, WriteAdapter adapter, List<Observation> observations)
{
int floatsWritten = 0;
// Generate data for all Sensors
for (var i = 0; i < sensors.Count; i++)
{
var sensor = sensors[i];
if (sensor.GetCompressionType() == SensorCompressionType.None)
{
// TODO handle in communicator code instead
adapter.SetTarget(buffer, sensor.GetObservationShape(), floatsWritten);
var numFloats = sensor.Write(adapter);
var floatObs = new Observation
{
FloatData = new ArraySegment<float>(buffer, floatsWritten, numFloats),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
observations.Add(floatObs);
floatsWritten += numFloats;
}
else
{
var compressedObs = new Observation
{
CompressedData = sensor.GetCompressedObservation(),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
observations.Add(compressedObs);
}
}
}

/// <summary>
/// Collects the (vector, visual) observations of the agent.
/// The agent observation describes the current environment from the
Expand Down Expand Up @@ -791,15 +741,6 @@ public virtual void AgentAction(float[] vectorAction)
{
}

/// <summary>
/// Specifies the agent behavior when done and
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
/// used to remove the agent from the scene.
/// </summary>
public virtual void AgentOnDone()
{
}

/// <summary>
/// Specifies the agent behavior when being reset, which can be due to
/// the agent or Academy being done (i.e. completion of local or global
Expand Down Expand Up @@ -889,29 +830,21 @@ void ResetIfDone()
// request for a decision and an action
if (IsDone())
{
if (agentParameters.resetOnDone)
if (agentParameters.onDemandDecision)
{
if (agentParameters.onDemandDecision)
{
if (!m_HasAlreadyReset)
{
// If event based, the agent can reset as soon
// as it is done
_AgentReset();
m_HasAlreadyReset = true;
}
}
else if (m_RequestDecision)
if (!m_HasAlreadyReset)
{
// If not event based, the agent must wait to request a
// decision before resetting to keep multiple agents in sync.
// If event based, the agent can reset as soon
// as it is done
_AgentReset();
m_HasAlreadyReset = true;
}
}
else
else if (m_RequestDecision)
{
m_Terminate = true;
RequestDecision();
// If not event based, the agent must wait to request a
// decision before resetting to keep multiple agents in sync.
_AgentReset();
}
}
}
Expand All @@ -936,20 +869,6 @@ void SendInfo()
/// Used by the brain to make the agent perform a step.
void AgentStep()
{
if (m_Terminate)
{
m_Terminate = false;
ResetReward();
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;
m_RequestAction = false;

m_HasAlreadyReset = false;
OnDisable();
AgentOnDone();
}

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
Expand Down
Loading