@@ -19,9 +19,9 @@ namespace Unity.MLAgents
19
19
internal struct AgentInfo
20
20
{
21
21
/// <summary>
22
- /// Keeps track of the last vector action taken by the Brain.
22
+ /// Keeps track of the last actions taken by the Brain.
23
23
/// </summary>
24
- public ActionBuffers storedVectorActions ;
24
+ public ActionBuffers storedActions ;
25
25
26
26
/// <summary>
27
27
/// For discrete control, specifies the actions that the agent cannot take.
@@ -52,17 +52,17 @@ internal struct AgentInfo
52
52
53
53
public void ClearActions ( )
54
54
{
55
- storedVectorActions . Clear ( ) ;
55
+ storedActions . Clear ( ) ;
56
56
}
57
57
58
58
public void CopyActions ( ActionBuffers actionBuffers )
59
59
{
60
- var continuousActions = storedVectorActions . ContinuousActions ;
60
+ var continuousActions = storedActions . ContinuousActions ;
61
61
for ( var i = 0 ; i < actionBuffers . ContinuousActions . Length ; i ++ )
62
62
{
63
63
continuousActions [ i ] = actionBuffers . ContinuousActions [ i ] ;
64
64
}
65
- var discreteActions = storedVectorActions . DiscreteActions ;
65
+ var discreteActions = storedActions . DiscreteActions ;
66
66
for ( var i = 0 ; i < actionBuffers . DiscreteActions . Length ; i ++ )
67
67
{
68
68
discreteActions [ i ] = actionBuffers . DiscreteActions [ i ] ;
@@ -438,7 +438,7 @@ public void LazyInitialize()
438
438
InitializeSensors ( ) ;
439
439
}
440
440
441
- m_Info . storedVectorActions = new ActionBuffers (
441
+ m_Info . storedActions = new ActionBuffers (
442
442
new float [ m_ActuatorManager . NumContinuousActions ] ,
443
443
new int [ m_ActuatorManager . NumDiscreteActions ]
444
444
) ;
@@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason)
557
557
m_CumulativeReward = 0f ;
558
558
m_RequestAction = false ;
559
559
m_RequestDecision = false ;
560
- m_Info . storedVectorActions . Clear ( ) ;
560
+ m_Info . storedActions . Clear ( ) ;
561
561
}
562
562
563
563
/// <summary>
@@ -886,12 +886,22 @@ public virtual void Initialize() { }
886
886
/// <seealso cref="IActionReceiver.OnActionReceived"/>
887
887
public virtual void Heuristic ( in ActionBuffers actionsOut )
888
888
{
889
+ var brainParams = m_PolicyFactory . BrainParameters ;
890
+ var actionSpec = brainParams . ActionSpec ;
891
+ // For continuous and discrete actions together, we don't need to fall back to the legacy method
892
+ if ( actionSpec . NumContinuousActions > 0 && actionSpec . NumDiscreteActions > 0 )
893
+ {
894
+ Debug . LogWarning ( "Heuristic method called but not implemented. Clearing ActionBuffers." ) ;
895
+ actionsOut . Clear ( ) ;
896
+ return ;
897
+ }
898
+
889
899
// Disable deprecation warnings so we can call the legacy overload.
890
900
#pragma warning disable CS0618
891
901
892
902
// The default implementation of Heuristic calls the
893
903
// obsolete version for backward compatibility
894
- switch ( m_PolicyFactory . BrainParameters . VectorActionSpaceType )
904
+ switch ( brainParams . VectorActionSpaceType )
895
905
{
896
906
case SpaceType . Continuous :
897
907
Heuristic ( actionsOut . ContinuousActions . Array ) ;
@@ -1038,7 +1048,7 @@ void SendInfoToBrain()
1038
1048
CollectObservations ( collectObservationsSensor ) ;
1039
1049
}
1040
1050
}
1041
- using ( TimerStack . Instance . Scoped ( "CollectDiscreteActionMasks " ) )
1051
+ using ( TimerStack . Instance . Scoped ( "WriteActionMask " ) )
1042
1052
{
1043
1053
m_ActuatorManager . WriteActionMask ( ) ;
1044
1054
}
@@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations()
1135
1145
}
1136
1146
1137
1147
/// <summary>
1138
- /// Implement `CollectDiscreteActionMasks ()` to collects the masks for discrete
1148
+ /// Implement `WriteDiscreteActionMask ()` to collects the masks for discrete
1139
1149
/// actions. When using discrete actions, the agent will not perform the masked
1140
1150
/// action.
1141
1151
/// </summary>
@@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations()
1144
1154
/// </param>
1145
1155
/// <remarks>
1146
1156
/// When using Discrete Control, you can prevent the Agent from using a certain
1147
- /// action by masking it with <see cref="DiscreteActionMasker.SetMask (int, IEnumerable{int})"/>.
1157
+ /// action by masking it with <see cref="IDiscreteActionMask.WriteMask (int, IEnumerable{int})"/>.
1148
1158
///
1149
1159
/// See [Agents - Actions] for more information on masking actions.
1150
1160
///
@@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
1168
1178
/// on the provided action.
1169
1179
/// </summary>
1170
1180
/// <remarks>
1171
- /// An action is passed to this function in the form of an array vector. Your
1172
- /// implementation must use the array to direct the agent's behavior for the
1181
+ /// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>.
1182
+ /// Your implementation must use the array to direct the agent's behavior for the
1173
1183
/// current step.
1174
1184
///
1175
- /// You decide how many elements you need in the action array to control your
1185
+ /// You decide how many elements you need in the ActionBuffers to control your
1176
1186
/// agent and what each element means. For example, if you want to apply a
1177
1187
/// force to move an agent around the environment, you can arbitrarily pick
1178
- /// three values in the action array to use as the force components. During
1179
- /// training, the agent's policy learns to set those particular elements of
1188
+ /// three values in ActionBuffers.ContinuousActions array to use as the force components.
1189
+ /// During training, the agent's policy learns to set those particular elements of
1180
1190
/// the array to maximize the training rewards the agent receives. (Of course,
1181
1191
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
1182
1192
/// elements of the action array for the same purpose since there is no learning
1183
1193
/// involved.)
1184
1194
///
1185
- /// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
1186
- /// type of action space an agent uses, along with the size of the action array,
1187
- /// in the <see cref="BrainParameters"/> of the agent's associated
1195
+ /// An Agent can use continuous and/or discrete actions. Configure this along with the size
1196
+ /// of the action array, in the <see cref="BrainParameters"/> of the agent's associated
1188
1197
/// <see cref="BehaviorParameters"/> component.
1189
1198
///
1190
- /// When an agent uses the continuous action space , the values in the action
1199
+ /// When an agent uses continuous actions , the values in the ActionBuffers.ContinuousActions
1191
1200
/// array are floating point numbers. You should clamp the values to the range,
1192
1201
/// -1..1, to increase numerical stability during training.
1193
1202
///
1194
- /// When an agent uses the discrete action space , the values in the action array
1203
+ /// When an agent uses discrete actions , the values in the ActionBuffers.DiscreteActions array
1195
1204
/// are integers that each represent a specific, discrete action. For example,
1196
1205
/// you could define a set of discrete actions such as:
1197
1206
///
@@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
1204
1213
/// </code>
1205
1214
///
1206
1215
/// When making a decision, the agent picks one of the five actions and puts the
1207
- /// corresponding integer value in the action vector . For example, if the agent
1208
- /// decided to move left, the action vector parameter would contain an array with
1216
+ /// corresponding integer value in the ActionBuffers.DiscreteActions array . For example, if the agent
1217
+ /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
1209
1218
/// a single element with the value 1.
1210
1219
///
1211
1220
/// You can define multiple sets, or branches, of discrete actions to allow an
1212
1221
/// agent to perform simultaneous, independent actions. For example, you could
1213
1222
/// use one branch for movement and another branch for throwing a ball left, right,
1214
1223
/// up, or down, to allow the agent to do both in the same step.
1215
1224
///
1216
- /// The action vector of a discrete action space contains one element for each
1217
- /// branch. The value of each element is the integer representing the chosen
1218
- /// action for that branch. The agent always chooses one action for each
1219
- /// branch.
1225
+ /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
1226
+ /// element for each branch. The value of each element is the integer representing the
1227
+ /// chosen action for that branch. The agent always chooses one action for each branch.
1220
1228
///
1221
- /// When you use the discrete action space , you can prevent the training process
1229
+ /// When you use the discrete actions , you can prevent the training process
1222
1230
/// or the neural network model from choosing specific actions in a step by
1223
- /// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker )"/>
1224
- /// function . For example, if your agent is next to a wall, you could mask out any
1231
+ /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask )"/>
1232
+ /// method . For example, if your agent is next to a wall, you could mask out any
1225
1233
/// actions that would result in the agent trying to move into the wall.
1226
1234
///
1227
1235
/// For more information about implementing agent actions see [Agents - Actions].
@@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
1233
1241
/// </param>
1234
1242
public virtual void OnActionReceived ( ActionBuffers actions )
1235
1243
{
1244
+ var actionSpec = m_PolicyFactory . BrainParameters . ActionSpec ;
1245
+ // For continuous and discrete actions together, we don't need to fall back to the legacy method
1246
+ if ( actionSpec . NumContinuousActions > 0 && actionSpec . NumDiscreteActions > 0 )
1247
+ {
1248
+ // Nothing implemented.
1249
+ return ;
1250
+ }
1251
+
1236
1252
if ( ! actions . ContinuousActions . IsEmpty ( ) )
1237
1253
{
1238
1254
m_LegacyActionCache = actions . ContinuousActions . Array ;
0 commit comments