Skip to content

Commit 27f7156

Browse files
author
Chris Elion
authored
More misc hybrid action followup (#4777)
1 parent 30ed097 commit 27f7156

20 files changed

+105
-70
lines changed

com.unity.ml-agents/Runtime/Academy.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public class Academy : IDisposable
8989
/// </item>
9090
/// <item>
9191
/// <term>1.3.0</term>
92-
/// <description>Support action spaces with both continuous and discrete actions.</description>
92+
/// <description>Support both continuous and discrete actions.</description>
9393
/// </item>
9494
/// </list>
9595
/// </remarks>
@@ -590,7 +590,7 @@ void EnvironmentReset()
590590
/// NNModel and the InferenceDevice as provided.
591591
/// </summary>
592592
/// <param name="model">The NNModel the ModelRunner must use.</param>
593-
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
593+
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
594594
/// <param name="inferenceDevice">
595595
/// The inference device (CPU or GPU) the ModelRunner will use.
596596
/// </param>

com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
namespace Unity.MLAgents.Actuators
77
{
88
/// <summary>
9-
/// Defines the structure of an Action Space to be used by the Actuator system.
9+
/// Defines the structure of the actions to be used by the Actuator system.
1010
/// </summary>
1111
[Serializable]
1212
public struct ActionSpec
@@ -15,9 +15,9 @@ public struct ActionSpec
1515
int m_NumContinuousActions;
1616

1717
/// <summary>
18-
/// An array of branch sizes for our action space.
18+
/// An array of branch sizes for discrete actions.
1919
///
20-
/// For an IActuator that uses a Discrete <see cref="SpaceType"/>, the number of
20+
/// For an IActuator that uses discrete actions, the number of
2121
/// branches is the Length of the Array and each index contains the branch size.
2222
/// The cumulative sum of the total number of discrete actions can be retrieved
2323
/// by the <see cref="SumOfDiscreteBranchSizes"/> property.
@@ -27,12 +27,12 @@ public struct ActionSpec
2727
public int[] BranchSizes;
2828

2929
/// <summary>
30-
/// The number of actions for a Continuous <see cref="SpaceType"/>.
30+
/// The number of continuous actions that an Agent can take.
3131
/// </summary>
3232
public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
3333

3434
/// <summary>
35-
/// The number of branches for a Discrete <see cref="SpaceType"/>.
35+
/// The number of branches for discrete actions that an Agent can take.
3636
/// </summary>
3737
public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
3838

@@ -57,12 +57,11 @@ public static ActionSpec MakeContinuous(int numActions)
5757
/// Creates a Discrete <see cref="ActionSpec"/> with the array of branch sizes that
5858
/// represents the action space.
5959
/// </summary>
60-
/// <param name="branchSizes">The array of branch sizes for the discrete action space. Each index
60+
/// <param name="branchSizes">The array of branch sizes for the discrete actions. Each index
6161
/// contains the number of actions available for that branch.</param>
6262
/// <returns>An Discrete ActionSpec initialized with the array of branch sizes.</returns>
6363
public static ActionSpec MakeDiscrete(params int[] branchSizes)
6464
{
65-
var numActions = branchSizes.Length;
6665
var actuatorSpace = new ActionSpec(0, branchSizes);
6766
return actuatorSpace;
6867
}

com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public abstract class ActuatorComponent : MonoBehaviour
1515
public abstract IActuator CreateActuator();
1616

1717
/// <summary>
18-
/// The specification of the Action space for this ActuatorComponent.
18+
/// The specification of the possible actions for this ActuatorComponent.
1919
/// This must produce the same results as the corresponding IActuator's ActionSpec.
2020
/// </summary>
2121
/// <seealso cref="ActionSpec"/>

com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void ReadyActuatorsForExecution()
7373
}
7474

7575
/// <summary>
76-
/// This method validates that all <see cref="IActuator"/>s have unique names and equivalent action space types
76+
/// This method validates that all <see cref="IActuator"/>s have unique names
7777
/// if the `DEBUG` preprocessor macro is defined, and allocates the appropriate buffers to manage the actions for
7878
/// all of the <see cref="IActuator"/>s that may live on a particular object.
7979
/// </summary>
@@ -90,7 +90,6 @@ internal void ReadyActuatorsForExecution(IList<IActuator> actuators, int numCont
9090
}
9191
#if DEBUG
9292
// Make sure the names are actually unique
93-
// Make sure all Actuators have the same SpaceType
9493
ValidateActuators();
9594
#endif
9695

@@ -272,7 +271,7 @@ void SortActuators()
272271
}
273272

274273
/// <summary>
275-
/// Validates that the IActuators managed by this object have unique names and equivalent action space types.
274+
/// Validates that the IActuators managed by this object have unique names.
276275
/// Each Actuator needs to have a unique name in order for this object to ensure that the storage of action
277276
/// buffers, and execution of Actuators remains deterministic across different sessions of running.
278277
/// </summary>

com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ public void Clear()
121121
/// <summary>
122122
/// Check if the <see cref="ActionBuffers"/> is empty.
123123
/// </summary>
124+
/// <returns>Whether the buffers are empty.</returns>
124125
public bool IsEmpty()
125126
{
126127
return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();

com.unity.ml-agents/Runtime/Actuators/IActuator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace Unity.MLAgents.Actuators
66
public interface IActuator : IActionReceiver
77
{
88
/// <summary>
9-
/// The specification of the Action space for this IActuator.
9+
/// The specification of the actions for this IActuator.
1010
/// </summary>
1111
/// <seealso cref="ActionSpec"/>
1212
ActionSpec ActionSpec { get; }

com.unity.ml-agents/Runtime/Agent.cs

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ namespace Unity.MLAgents
1919
internal struct AgentInfo
2020
{
2121
/// <summary>
22-
/// Keeps track of the last vector action taken by the Brain.
22+
/// Keeps track of the last actions taken by the Brain.
2323
/// </summary>
24-
public ActionBuffers storedVectorActions;
24+
public ActionBuffers storedActions;
2525

2626
/// <summary>
2727
/// For discrete control, specifies the actions that the agent cannot take.
@@ -52,17 +52,17 @@ internal struct AgentInfo
5252

5353
public void ClearActions()
5454
{
55-
storedVectorActions.Clear();
55+
storedActions.Clear();
5656
}
5757

5858
public void CopyActions(ActionBuffers actionBuffers)
5959
{
60-
var continuousActions = storedVectorActions.ContinuousActions;
60+
var continuousActions = storedActions.ContinuousActions;
6161
for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
6262
{
6363
continuousActions[i] = actionBuffers.ContinuousActions[i];
6464
}
65-
var discreteActions = storedVectorActions.DiscreteActions;
65+
var discreteActions = storedActions.DiscreteActions;
6666
for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
6767
{
6868
discreteActions[i] = actionBuffers.DiscreteActions[i];
@@ -438,7 +438,7 @@ public void LazyInitialize()
438438
InitializeSensors();
439439
}
440440

441-
m_Info.storedVectorActions = new ActionBuffers(
441+
m_Info.storedActions = new ActionBuffers(
442442
new float[m_ActuatorManager.NumContinuousActions],
443443
new int[m_ActuatorManager.NumDiscreteActions]
444444
);
@@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason)
557557
m_CumulativeReward = 0f;
558558
m_RequestAction = false;
559559
m_RequestDecision = false;
560-
m_Info.storedVectorActions.Clear();
560+
m_Info.storedActions.Clear();
561561
}
562562

563563
/// <summary>
@@ -886,12 +886,22 @@ public virtual void Initialize() { }
886886
/// <seealso cref="IActionReceiver.OnActionReceived"/>
887887
public virtual void Heuristic(in ActionBuffers actionsOut)
888888
{
889+
var brainParams = m_PolicyFactory.BrainParameters;
890+
var actionSpec = brainParams.ActionSpec;
891+
// For continuous and discrete actions together, we don't need to fall back to the legacy method
892+
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
893+
{
894+
Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers.");
895+
actionsOut.Clear();
896+
return;
897+
}
898+
889899
// Disable deprecation warnings so we can call the legacy overload.
890900
#pragma warning disable CS0618
891901

892902
// The default implementation of Heuristic calls the
893903
// obsolete version for backward compatibility
894-
switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
904+
switch (brainParams.VectorActionSpaceType)
895905
{
896906
case SpaceType.Continuous:
897907
Heuristic(actionsOut.ContinuousActions.Array);
@@ -1038,7 +1048,7 @@ void SendInfoToBrain()
10381048
CollectObservations(collectObservationsSensor);
10391049
}
10401050
}
1041-
using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
1051+
using (TimerStack.Instance.Scoped("WriteActionMask"))
10421052
{
10431053
m_ActuatorManager.WriteActionMask();
10441054
}
@@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations()
11351145
}
11361146

11371147
/// <summary>
1138-
/// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete
1148+
/// Implement `WriteDiscreteActionMask()` to collects the masks for discrete
11391149
/// actions. When using discrete actions, the agent will not perform the masked
11401150
/// action.
11411151
/// </summary>
@@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations()
11441154
/// </param>
11451155
/// <remarks>
11461156
/// When using Discrete Control, you can prevent the Agent from using a certain
1147-
/// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>.
1157+
/// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>.
11481158
///
11491159
/// See [Agents - Actions] for more information on masking actions.
11501160
///
@@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
11681178
/// on the provided action.
11691179
/// </summary>
11701180
/// <remarks>
1171-
/// An action is passed to this function in the form of an array vector. Your
1172-
/// implementation must use the array to direct the agent's behavior for the
1181+
/// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>.
1182+
/// Your implementation must use the array to direct the agent's behavior for the
11731183
/// current step.
11741184
///
1175-
/// You decide how many elements you need in the action array to control your
1185+
/// You decide how many elements you need in the ActionBuffers to control your
11761186
/// agent and what each element means. For example, if you want to apply a
11771187
/// force to move an agent around the environment, you can arbitrarily pick
1178-
/// three values in the action array to use as the force components. During
1179-
/// training, the agent's policy learns to set those particular elements of
1188+
/// three values in ActionBuffers.ContinuousActions array to use as the force components.
1189+
/// During training, the agent's policy learns to set those particular elements of
11801190
/// the array to maximize the training rewards the agent receives. (Of course,
11811191
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
11821192
/// elements of the action array for the same purpose since there is no learning
11831193
/// involved.)
11841194
///
1185-
/// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
1186-
/// type of action space an agent uses, along with the size of the action array,
1187-
/// in the <see cref="BrainParameters"/> of the agent's associated
1195+
/// An Agent can use continuous and/or discrete actions. Configure this along with the size
1196+
/// of the action array, in the <see cref="BrainParameters"/> of the agent's associated
11881197
/// <see cref="BehaviorParameters"/> component.
11891198
///
1190-
/// When an agent uses the continuous action space, the values in the action
1199+
/// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions
11911200
/// array are floating point numbers. You should clamp the values to the range,
11921201
/// -1..1, to increase numerical stability during training.
11931202
///
1194-
/// When an agent uses the discrete action space, the values in the action array
1203+
/// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array
11951204
/// are integers that each represent a specific, discrete action. For example,
11961205
/// you could define a set of discrete actions such as:
11971206
///
@@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
12041213
/// </code>
12051214
///
12061215
/// When making a decision, the agent picks one of the five actions and puts the
1207-
/// corresponding integer value in the action vector. For example, if the agent
1208-
/// decided to move left, the action vector parameter would contain an array with
1216+
/// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent
1217+
/// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
12091218
/// a single element with the value 1.
12101219
///
12111220
/// You can define multiple sets, or branches, of discrete actions to allow an
12121221
/// agent to perform simultaneous, independent actions. For example, you could
12131222
/// use one branch for movement and another branch for throwing a ball left, right,
12141223
/// up, or down, to allow the agent to do both in the same step.
12151224
///
1216-
/// The action vector of a discrete action space contains one element for each
1217-
/// branch. The value of each element is the integer representing the chosen
1218-
/// action for that branch. The agent always chooses one action for each
1219-
/// branch.
1225+
/// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
1226+
/// element for each branch. The value of each element is the integer representing the
1227+
/// chosen action for that branch. The agent always chooses one action for each branch.
12201228
///
1221-
/// When you use the discrete action space, you can prevent the training process
1229+
/// When you use the discrete actions, you can prevent the training process
12221230
/// or the neural network model from choosing specific actions in a step by
1223-
/// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/>
1224-
/// function. For example, if your agent is next to a wall, you could mask out any
1231+
/// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/>
1232+
/// method. For example, if your agent is next to a wall, you could mask out any
12251233
/// actions that would result in the agent trying to move into the wall.
12261234
///
12271235
/// For more information about implementing agent actions see [Agents - Actions].
@@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
12331241
/// </param>
12341242
public virtual void OnActionReceived(ActionBuffers actions)
12351243
{
1244+
var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
1245+
// For continuous and discrete actions together, we don't need to fall back to the legacy method
1246+
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
1247+
{
1248+
// Nothing implemented.
1249+
return;
1250+
}
1251+
12361252
if (!actions.ContinuousActions.IsEmpty())
12371253
{
12381254
m_LegacyActionCache = actions.ContinuousActions.Array;

com.unity.ml-agents/Runtime/Agent.deprecated.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,14 @@ public virtual void OnActionReceived(float[] vectorAction) { }
4242
[Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")]
4343
public float[] GetAction()
4444
{
45-
var storedAction = m_Info.storedVectorActions;
45+
var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
46+
// For continuous and discrete actions together, this shouldn't be called because we can only return one.
47+
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
48+
{
49+
Debug.LogWarning("Agent.GetAction() when both continuous and discrete actions are in use. Use Agent.GetStoredActionBuffers() instead.");
50+
}
51+
52+
var storedAction = m_Info.storedActions;
4653
if (!storedAction.ContinuousActions.IsEmpty())
4754
{
4855
return storedAction.ContinuousActions.Array;

com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public static bool IsAnalyticsEnabled()
8484
/// <param name="behaviorName">The BehaviorName of the Agent using the model</param>
8585
/// <param name="inferenceDevice">Whether inference is being performed on the CPU or GPU</param>
8686
/// <param name="sensors">List of ISensors for the Agent. Used to generate information about the observation space.</param>
87-
/// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the action space.</param>
87+
/// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the actions.</param>
8888
/// <returns></returns>
8989
public static void InferenceModelSet(
9090
NNModel nnModel,

0 commit comments

Comments
 (0)