Skip to content

Commit 6dc68df

Browse files
author
Chris Elion
authored
Don't drop multiple stats from the same step (#4236)
1 parent 3380908 commit 6dc68df

File tree

10 files changed

+82
-64
lines changed

10 files changed

+82
-64
lines changed

Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,17 +2178,18 @@ MonoBehaviour:
21782178
m_Name:
21792179
m_EditorClassIdentifier:
21802180
m_BrainParameters:
2181-
vectorObservationSize: 4
2182-
numStackedVectorObservations: 1
2183-
vectorActionSize: 03000000030000000300000002000000
2184-
vectorActionDescriptions: []
2185-
vectorActionSpaceType: 0
2181+
VectorObservationSize: 4
2182+
NumStackedVectorObservations: 1
2183+
VectorActionSize: 03000000030000000300000002000000
2184+
VectorActionDescriptions: []
2185+
VectorActionSpaceType: 0
21862186
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
21872187
m_InferenceDevice: 0
21882188
m_BehaviorType: 0
21892189
m_BehaviorName: FoodCollector
21902190
TeamId: 0
21912191
m_UseChildSensors: 1
2192+
m_ObservableAttributeHandling: 0
21922193
--- !u!114 &114176228333253036
21932194
MonoBehaviour:
21942195
m_ObjectHideFlags: 0
@@ -2204,7 +2205,7 @@ MonoBehaviour:
22042205
agentParameters:
22052206
maxStep: 0
22062207
hasUpgradedFromAgentParameters: 1
2207-
maxStep: 5000
2208+
MaxStep: 5000
22082209
area: {fileID: 1819751139121548}
22092210
turnSpeed: 300
22102211
moveSpeed: 2
@@ -2213,7 +2214,7 @@ MonoBehaviour:
22132214
goodMaterial: {fileID: 2100000, guid: c67450f290f3e4897bc40276a619e78d, type: 2}
22142215
frozenMaterial: {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
22152216
myLaser: {fileID: 1081721624670010}
2216-
contribute: 0
2217+
contribute: 1
22172218
useVectorObs: 1
22182219
--- !u!114 &114725457980523372
22192220
MonoBehaviour:
@@ -2260,7 +2261,6 @@ MonoBehaviour:
22602261
m_EditorClassIdentifier:
22612262
DecisionPeriod: 5
22622263
TakeActionsBetweenDecisions: 1
2263-
offsetStep: 0
22642264
--- !u!114 &1222199865870203693
22652265
MonoBehaviour:
22662266
m_ObjectHideFlags: 0
@@ -2273,6 +2273,7 @@ MonoBehaviour:
22732273
m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
22742274
m_Name:
22752275
m_EditorClassIdentifier:
2276+
debugCommandLineOverride:
22762277
--- !u!1 &1482701732800114
22772278
GameObject:
22782279
m_ObjectHideFlags: 0
@@ -2517,17 +2518,18 @@ MonoBehaviour:
25172518
m_Name:
25182519
m_EditorClassIdentifier:
25192520
m_BrainParameters:
2520-
vectorObservationSize: 4
2521-
numStackedVectorObservations: 1
2522-
vectorActionSize: 03000000030000000300000002000000
2523-
vectorActionDescriptions: []
2524-
vectorActionSpaceType: 0
2521+
VectorObservationSize: 4
2522+
NumStackedVectorObservations: 1
2523+
VectorActionSize: 03000000030000000300000002000000
2524+
VectorActionDescriptions: []
2525+
VectorActionSpaceType: 0
25252526
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
25262527
m_InferenceDevice: 0
25272528
m_BehaviorType: 0
25282529
m_BehaviorName: FoodCollector
25292530
TeamId: 0
25302531
m_UseChildSensors: 1
2532+
m_ObservableAttributeHandling: 0
25312533
--- !u!114 &114711827726849508
25322534
MonoBehaviour:
25332535
m_ObjectHideFlags: 0
@@ -2543,7 +2545,7 @@ MonoBehaviour:
25432545
agentParameters:
25442546
maxStep: 0
25452547
hasUpgradedFromAgentParameters: 1
2546-
maxStep: 5000
2548+
MaxStep: 5000
25472549
area: {fileID: 1819751139121548}
25482550
turnSpeed: 300
25492551
moveSpeed: 2
@@ -2599,7 +2601,6 @@ MonoBehaviour:
25992601
m_EditorClassIdentifier:
26002602
DecisionPeriod: 5
26012603
TakeActionsBetweenDecisions: 1
2602-
offsetStep: 0
26032604
--- !u!1 &1528397385587768
26042605
GameObject:
26052606
m_ObjectHideFlags: 0
@@ -2848,17 +2849,18 @@ MonoBehaviour:
28482849
m_Name:
28492850
m_EditorClassIdentifier:
28502851
m_BrainParameters:
2851-
vectorObservationSize: 4
2852-
numStackedVectorObservations: 1
2853-
vectorActionSize: 03000000030000000300000002000000
2854-
vectorActionDescriptions: []
2855-
vectorActionSpaceType: 0
2852+
VectorObservationSize: 4
2853+
NumStackedVectorObservations: 1
2854+
VectorActionSize: 03000000030000000300000002000000
2855+
VectorActionDescriptions: []
2856+
VectorActionSpaceType: 0
28562857
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
28572858
m_InferenceDevice: 0
28582859
m_BehaviorType: 0
28592860
m_BehaviorName: FoodCollector
28602861
TeamId: 0
28612862
m_UseChildSensors: 1
2863+
m_ObservableAttributeHandling: 0
28622864
--- !u!114 &114542632553128056
28632865
MonoBehaviour:
28642866
m_ObjectHideFlags: 0
@@ -2874,7 +2876,7 @@ MonoBehaviour:
28742876
agentParameters:
28752877
maxStep: 0
28762878
hasUpgradedFromAgentParameters: 1
2877-
maxStep: 5000
2879+
MaxStep: 5000
28782880
area: {fileID: 1819751139121548}
28792881
turnSpeed: 300
28802882
moveSpeed: 2
@@ -2930,7 +2932,6 @@ MonoBehaviour:
29302932
m_EditorClassIdentifier:
29312933
DecisionPeriod: 5
29322934
TakeActionsBetweenDecisions: 1
2933-
offsetStep: 0
29342935
--- !u!1 &1617924810425504
29352936
GameObject:
29362937
m_ObjectHideFlags: 0
@@ -3442,17 +3443,18 @@ MonoBehaviour:
34423443
m_Name:
34433444
m_EditorClassIdentifier:
34443445
m_BrainParameters:
3445-
vectorObservationSize: 4
3446-
numStackedVectorObservations: 1
3447-
vectorActionSize: 03000000030000000300000002000000
3448-
vectorActionDescriptions: []
3449-
vectorActionSpaceType: 0
3446+
VectorObservationSize: 4
3447+
NumStackedVectorObservations: 1
3448+
VectorActionSize: 03000000030000000300000002000000
3449+
VectorActionDescriptions: []
3450+
VectorActionSpaceType: 0
34503451
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
34513452
m_InferenceDevice: 0
34523453
m_BehaviorType: 0
34533454
m_BehaviorName: FoodCollector
34543455
TeamId: 0
34553456
m_UseChildSensors: 1
3457+
m_ObservableAttributeHandling: 0
34563458
--- !u!114 &114189751434580810
34573459
MonoBehaviour:
34583460
m_ObjectHideFlags: 0
@@ -3468,7 +3470,7 @@ MonoBehaviour:
34683470
agentParameters:
34693471
maxStep: 0
34703472
hasUpgradedFromAgentParameters: 1
3471-
maxStep: 5000
3473+
MaxStep: 5000
34723474
area: {fileID: 1819751139121548}
34733475
turnSpeed: 300
34743476
moveSpeed: 2
@@ -3524,7 +3526,6 @@ MonoBehaviour:
35243526
m_EditorClassIdentifier:
35253527
DecisionPeriod: 5
35263528
TakeActionsBetweenDecisions: 1
3527-
offsetStep: 0
35283529
--- !u!1 &1688105343773098
35293530
GameObject:
35303531
m_ObjectHideFlags: 0
@@ -3759,17 +3760,18 @@ MonoBehaviour:
37593760
m_Name:
37603761
m_EditorClassIdentifier:
37613762
m_BrainParameters:
3762-
vectorObservationSize: 4
3763-
numStackedVectorObservations: 1
3764-
vectorActionSize: 03000000030000000300000002000000
3765-
vectorActionDescriptions: []
3766-
vectorActionSpaceType: 0
3763+
VectorObservationSize: 4
3764+
NumStackedVectorObservations: 1
3765+
VectorActionSize: 03000000030000000300000002000000
3766+
VectorActionDescriptions: []
3767+
VectorActionSpaceType: 0
37673768
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
37683769
m_InferenceDevice: 0
37693770
m_BehaviorType: 0
37703771
m_BehaviorName: FoodCollector
37713772
TeamId: 0
37723773
m_UseChildSensors: 1
3774+
m_ObservableAttributeHandling: 0
37733775
--- !u!114 &114235147148547996
37743776
MonoBehaviour:
37753777
m_ObjectHideFlags: 0
@@ -3785,7 +3787,7 @@ MonoBehaviour:
37853787
agentParameters:
37863788
maxStep: 0
37873789
hasUpgradedFromAgentParameters: 1
3788-
maxStep: 5000
3790+
MaxStep: 5000
37893791
area: {fileID: 1819751139121548}
37903792
turnSpeed: 300
37913793
moveSpeed: 2
@@ -3841,7 +3843,6 @@ MonoBehaviour:
38413843
m_EditorClassIdentifier:
38423844
DecisionPeriod: 5
38433845
TakeActionsBetweenDecisions: 1
3844-
offsetStep: 0
38453846
--- !u!1 &1729825611722018
38463847
GameObject:
38473848
m_ObjectHideFlags: 0

com.unity.ml-agents/CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@ and this project adheres to
99
## [Unreleased]
1010

1111
### Major Changes
12+
#### com.unity.ml-agents (C#)
13+
#### ml-agents / ml-agents-envs / gym-unity (Python)
1214

1315
### Minor Changes
16+
#### com.unity.ml-agents (C#)
17+
#### ml-agents / ml-agents-envs / gym-unity (Python)
18+
- StatsSideChannel now stores multiple values per key. This means that multiple
19+
calls to `StatsRecorder.Add()` with the same key in the same step will no
20+
longer overwrite each other. (#4236)
1421
- Model checkpoints are now also saved as .nn files during training. (#4127)
1522
- Model checkpoint info is saved in TrainingStatus.json after training is concluded (#4127)
1623

@@ -20,6 +27,7 @@ and this project adheres to
2027
recursively (for example, by an Agent's CollectObservations method).
2128
Previously, this would result in an infinite loop and cause the editor to hang.
2229
(#4226)
30+
#### ml-agents / ml-agents-envs / gym-unity (Python)
2331

2432
## [1.2.0-preview] - 2020-07-15
2533

com.unity.ml-agents/Runtime/StatsRecorder.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ public enum StatAggregationMethod
99
{
1010
/// <summary>
1111
/// Values within the summary period are averaged before reporting.
12-
/// Note that values from the same C# environment in the same step may replace each other.
1312
/// </summary>
1413
Average = 0,
1514

docs/Python-API.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ in python, run:
6767
from mlagents_envs.environment import UnityEnvironment
6868
# This is a non-blocking call that only loads the environment.
6969
env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
70-
# Start interacting with the evironment.
70+
# Start interacting with the environment.
7171
env.reset()
7272
behavior_names = env.behavior_specs.keys()
7373
...

ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
from mlagents_envs.side_channel import SideChannel, IncomingMessage
21
import uuid
3-
from typing import Dict, Tuple
2+
from typing import Tuple, List, Mapping
43
from enum import Enum
4+
from collections import defaultdict
5+
6+
from mlagents_envs.side_channel import SideChannel, IncomingMessage
57

68

79
# Determines the behavior of how multiple stats within the same summary period are combined.
@@ -13,6 +15,10 @@ class StatsAggregationMethod(Enum):
1315
MOST_RECENT = 1
1416

1517

18+
StatList = List[Tuple[float, StatsAggregationMethod]]
19+
EnvironmentStats = Mapping[str, StatList]
20+
21+
1622
class StatsSideChannel(SideChannel):
1723
"""
1824
Side channel that receives (string, float) pairs from the environment, so that they can eventually
@@ -24,7 +30,7 @@ def __init__(self) -> None:
2430
# UUID('a1d8f7b7-cec8-50f9-b78b-d3e165a78520')
2531
super().__init__(uuid.UUID("a1d8f7b7-cec8-50f9-b78b-d3e165a78520"))
2632

27-
self.stats: Dict[str, Tuple[float, StatsAggregationMethod]] = {}
33+
self.stats: EnvironmentStats = defaultdict(list)
2834

2935
def on_message_received(self, msg: IncomingMessage) -> None:
3036
"""
@@ -36,13 +42,13 @@ def on_message_received(self, msg: IncomingMessage) -> None:
3642
val = msg.read_float32()
3743
agg_type = StatsAggregationMethod(msg.read_int32())
3844

39-
self.stats[key] = (val, agg_type)
45+
self.stats[key].append((val, agg_type))
4046

41-
def get_and_reset_stats(self) -> Dict[str, Tuple[float, StatsAggregationMethod]]:
47+
def get_and_reset_stats(self) -> EnvironmentStats:
4248
"""
4349
Returns the current stats, and resets the internal storage of the stats.
4450
:return:
4551
"""
4652
s = self.stats
47-
self.stats = {}
53+
self.stats = defaultdict(list)
4854
return s

ml-agents-envs/mlagents_envs/tests/test_side_channel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,6 @@ def test_stats_channel():
253253
stats = receiver.get_and_reset_stats()
254254

255255
assert len(stats) == 1
256-
val, method = stats["stats-1"]
256+
val, method = stats["stats-1"][0]
257257
assert val - 42.0 < 1e-8
258258
assert method == StatsAggregationMethod.MOST_RECENT

ml-agents/mlagents/trainers/agent_processor.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
TerminalSteps,
1010
TerminalStep,
1111
)
12-
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
12+
from mlagents_envs.side_channel.stats_side_channel import (
13+
StatsAggregationMethod,
14+
EnvironmentStats,
15+
)
1316
from mlagents.trainers.trajectory import Trajectory, AgentExperience
1417
from mlagents.trainers.policy.tf_policy import TFPolicy
1518
from mlagents.trainers.policy import Policy
@@ -306,7 +309,7 @@ def __init__(
306309
self.publish_trajectory_queue(self.trajectory_queue)
307310

308311
def record_environment_stats(
309-
self, env_stats: Dict[str, Tuple[float, StatsAggregationMethod]], worker_id: int
312+
self, env_stats: EnvironmentStats, worker_id: int
310313
) -> None:
311314
"""
312315
Pass stats from the environment to the StatsReporter.
@@ -316,11 +319,12 @@ def record_environment_stats(
316319
:param worker_id:
317320
:return:
318321
"""
319-
for stat_name, (val, agg_type) in env_stats.items():
320-
if agg_type == StatsAggregationMethod.AVERAGE:
321-
self.stats_reporter.add_stat(stat_name, val)
322-
elif agg_type == StatsAggregationMethod.MOST_RECENT:
323-
# In order to prevent conflicts between multiple environments,
324-
# only stats from the first environment are recorded.
325-
if worker_id == 0:
326-
self.stats_reporter.set_stat(stat_name, val)
322+
for stat_name, value_list in env_stats.items():
323+
for val, agg_type in value_list:
324+
if agg_type == StatsAggregationMethod.AVERAGE:
325+
self.stats_reporter.add_stat(stat_name, val)
326+
elif agg_type == StatsAggregationMethod.MOST_RECENT:
327+
# In order to prevent conflicts between multiple environments,
328+
# only stats from the first environment are recorded.
329+
if worker_id == 0:
330+
self.stats_reporter.set_stat(stat_name, val)

ml-agents/mlagents/trainers/env_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
BehaviorSpec,
77
BehaviorName,
88
)
9-
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
9+
from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats
1010

1111
from mlagents.trainers.policy.tf_policy import TFPolicy
1212
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
@@ -23,7 +23,7 @@ class EnvironmentStep(NamedTuple):
2323
current_all_step_result: AllStepResult
2424
worker_id: int
2525
brain_name_to_action_info: Dict[BehaviorName, ActionInfo]
26-
environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]]
26+
environment_stats: EnvironmentStats
2727

2828
@property
2929
def name_behavior_ids(self) -> Iterable[BehaviorName]:

0 commit comments

Comments
 (0)