Don't drop multiple stats from the same step (#4236)

Chris Elion · web-flow · commit 6dc68df34838 · 2020-07-16T15:56:33.000-07:00
diff --git a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
@@ -2178,17 +2178,18 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
   m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
   m_InferenceDevice: 0
   m_BehaviorType: 0
   m_BehaviorName: FoodCollector
   TeamId: 0
   m_UseChildSensors: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114176228333253036
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -2204,7 +2205,7 @@ MonoBehaviour:
   agentParameters:
     maxStep: 0
   hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
   area: {fileID: 1819751139121548}
   turnSpeed: 300
   moveSpeed: 2
@@ -2213,7 +2214,7 @@ MonoBehaviour:
   goodMaterial: {fileID: 2100000, guid: c67450f290f3e4897bc40276a619e78d, type: 2}
   frozenMaterial: {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
   myLaser: {fileID: 1081721624670010}
-  contribute: 0
+  contribute: 1
   useVectorObs: 1
 --- !u!114 &114725457980523372
 MonoBehaviour:
@@ -2260,7 +2261,6 @@ MonoBehaviour:
   m_EditorClassIdentifier: 
   DecisionPeriod: 5
   TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &1222199865870203693
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -2273,6 +2273,7 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
   m_Name: 
   m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1482701732800114
 GameObject:
   m_ObjectHideFlags: 0
@@ -2517,17 +2518,18 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
   m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
   m_InferenceDevice: 0
   m_BehaviorType: 0
   m_BehaviorName: FoodCollector
   TeamId: 0
   m_UseChildSensors: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114711827726849508
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -2543,7 +2545,7 @@ MonoBehaviour:
   agentParameters:
     maxStep: 0
   hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
   area: {fileID: 1819751139121548}
   turnSpeed: 300
   moveSpeed: 2
@@ -2599,7 +2601,6 @@ MonoBehaviour:
   m_EditorClassIdentifier: 
   DecisionPeriod: 5
   TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!1 &1528397385587768
 GameObject:
   m_ObjectHideFlags: 0
@@ -2848,17 +2849,18 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
   m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
   m_InferenceDevice: 0
   m_BehaviorType: 0
   m_BehaviorName: FoodCollector
   TeamId: 0
   m_UseChildSensors: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114542632553128056
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -2874,7 +2876,7 @@ MonoBehaviour:
   agentParameters:
     maxStep: 0
   hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
   area: {fileID: 1819751139121548}
   turnSpeed: 300
   moveSpeed: 2
@@ -2930,7 +2932,6 @@ MonoBehaviour:
   m_EditorClassIdentifier: 
   DecisionPeriod: 5
   TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!1 &1617924810425504
 GameObject:
   m_ObjectHideFlags: 0
@@ -3442,17 +3443,18 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
   m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
   m_InferenceDevice: 0
   m_BehaviorType: 0
   m_BehaviorName: FoodCollector
   TeamId: 0
   m_UseChildSensors: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114189751434580810
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -3468,7 +3470,7 @@ MonoBehaviour:
   agentParameters:
     maxStep: 0
   hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
   area: {fileID: 1819751139121548}
   turnSpeed: 300
   moveSpeed: 2
@@ -3524,7 +3526,6 @@ MonoBehaviour:
   m_EditorClassIdentifier: 
   DecisionPeriod: 5
   TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!1 &1688105343773098
 GameObject:
   m_ObjectHideFlags: 0
@@ -3759,17 +3760,18 @@ MonoBehaviour:
   m_Name: 
   m_EditorClassIdentifier: 
   m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
   m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
   m_InferenceDevice: 0
   m_BehaviorType: 0
   m_BehaviorName: FoodCollector
   TeamId: 0
   m_UseChildSensors: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114235147148547996
 MonoBehaviour:
   m_ObjectHideFlags: 0
@@ -3785,7 +3787,7 @@ MonoBehaviour:
   agentParameters:
     maxStep: 0
   hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
   area: {fileID: 1819751139121548}
   turnSpeed: 300
   moveSpeed: 2
@@ -3841,7 +3843,6 @@ MonoBehaviour:
   m_EditorClassIdentifier: 
   DecisionPeriod: 5
   TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!1 &1729825611722018
 GameObject:
   m_ObjectHideFlags: 0
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -9,8 +9,15 @@ and this project adheres to
 ## [Unreleased]
 
 ### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
 
 ### Minor Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+- StatsSideChannel now stores multiple values per key. This means that multiple
+calls to `StatsRecorder.Add()` with the same key in the same step will no
+longer overwrite each other. (#4236)
 - Model checkpoints are now also saved as .nn files during training. (#4127)
 - Model checkpoint info is saved in TrainingStatus.json after training is concluded (#4127)
 
@@ -20,6 +27,7 @@ and this project adheres to
 recursively (for example, by an Agent's CollectObservations method).
 Previously, this would result in an infinite loop and cause the editor to hang.
 (#4226)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
 
 ## [1.2.0-preview] - 2020-07-15
 
diff --git a/com.unity.ml-agents/Runtime/StatsRecorder.cs b/com.unity.ml-agents/Runtime/StatsRecorder.cs
@@ -9,7 +9,6 @@ public enum StatAggregationMethod
     {
         /// <summary>
         /// Values within the summary period are averaged before reporting.
-        /// Note that values from the same C# environment in the same step may replace each other.
         /// </summary>
         Average = 0,
 
diff --git a/docs/Python-API.md b/docs/Python-API.md
@@ -67,7 +67,7 @@ in python, run:
 from mlagents_envs.environment import UnityEnvironment
 # This is a non-blocking call that only loads the environment.
 env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
-# Start interacting with the evironment.
+# Start interacting with the environment.
 env.reset()
 behavior_names = env.behavior_specs.keys()
 ...
diff --git a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
@@ -1,7 +1,9 @@
-from mlagents_envs.side_channel import SideChannel, IncomingMessage
 import uuid
-from typing import Dict, Tuple
+from typing import Tuple, List, Mapping
 from enum import Enum
+from collections import defaultdict
+
+from mlagents_envs.side_channel import SideChannel, IncomingMessage
 
 
 # Determines the behavior of how multiple stats within the same summary period are combined.
@@ -13,6 +15,10 @@ class StatsAggregationMethod(Enum):
     MOST_RECENT = 1
 
 
+StatList = List[Tuple[float, StatsAggregationMethod]]
+EnvironmentStats = Mapping[str, StatList]
+
+
 class StatsSideChannel(SideChannel):
     """
     Side channel that receives (string, float) pairs from the environment, so that they can eventually
@@ -24,7 +30,7 @@ def __init__(self) -> None:
         # UUID('a1d8f7b7-cec8-50f9-b78b-d3e165a78520')
         super().__init__(uuid.UUID("a1d8f7b7-cec8-50f9-b78b-d3e165a78520"))
 
-        self.stats: Dict[str, Tuple[float, StatsAggregationMethod]] = {}
+        self.stats: EnvironmentStats = defaultdict(list)
 
     def on_message_received(self, msg: IncomingMessage) -> None:
         """
@@ -36,13 +42,13 @@ def on_message_received(self, msg: IncomingMessage) -> None:
         val = msg.read_float32()
         agg_type = StatsAggregationMethod(msg.read_int32())
 
-        self.stats[key] = (val, agg_type)
+        self.stats[key].append((val, agg_type))
 
-    def get_and_reset_stats(self) -> Dict[str, Tuple[float, StatsAggregationMethod]]:
+    def get_and_reset_stats(self) -> EnvironmentStats:
         """
         Returns the current stats, and resets the internal storage of the stats.
         :return:
         """
         s = self.stats
-        self.stats = {}
+        self.stats = defaultdict(list)
         return s
diff --git a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
@@ -253,6 +253,6 @@ def test_stats_channel():
     stats = receiver.get_and_reset_stats()
 
     assert len(stats) == 1
-    val, method = stats["stats-1"]
+    val, method = stats["stats-1"][0]
     assert val - 42.0 < 1e-8
     assert method == StatsAggregationMethod.MOST_RECENT
diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
@@ -9,7 +9,10 @@
     TerminalSteps,
     TerminalStep,
 )
-from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+from mlagents_envs.side_channel.stats_side_channel import (
+    StatsAggregationMethod,
+    EnvironmentStats,
+)
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.policy import Policy
@@ -306,7 +309,7 @@ def __init__(
         self.publish_trajectory_queue(self.trajectory_queue)
 
     def record_environment_stats(
-        self, env_stats: Dict[str, Tuple[float, StatsAggregationMethod]], worker_id: int
+        self, env_stats: EnvironmentStats, worker_id: int
     ) -> None:
         """
         Pass stats from the environment to the StatsReporter.
@@ -316,11 +319,12 @@ def record_environment_stats(
         :param worker_id:
         :return:
         """
-        for stat_name, (val, agg_type) in env_stats.items():
-            if agg_type == StatsAggregationMethod.AVERAGE:
-                self.stats_reporter.add_stat(stat_name, val)
-            elif agg_type == StatsAggregationMethod.MOST_RECENT:
-                # In order to prevent conflicts between multiple environments,
-                # only stats from the first environment are recorded.
-                if worker_id == 0:
-                    self.stats_reporter.set_stat(stat_name, val)
+        for stat_name, value_list in env_stats.items():
+            for val, agg_type in value_list:
+                if agg_type == StatsAggregationMethod.AVERAGE:
+                    self.stats_reporter.add_stat(stat_name, val)
+                elif agg_type == StatsAggregationMethod.MOST_RECENT:
+                    # In order to prevent conflicts between multiple environments,
+                    # only stats from the first environment are recorded.
+                    if worker_id == 0:
+                        self.stats_reporter.set_stat(stat_name, val)
diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py
@@ -6,7 +6,7 @@
     BehaviorSpec,
     BehaviorName,
 )
-from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats
 
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
@@ -23,7 +23,7 @@ class EnvironmentStep(NamedTuple):
     current_all_step_result: AllStepResult
     worker_id: int
     brain_name_to_action_info: Dict[BehaviorName, ActionInfo]
-    environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]]
+    environment_stats: EnvironmentStats
 
     @property
     def name_behavior_ids(self) -> Iterable[BehaviorName]:
diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py
diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py

Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,6 @@ public enum StatAggregationMethod`
`9`	`9`	`{`
`10`	`10`	`/// <summary>`
`11`	`11`	`/// Values within the summary period are averaged before reporting.`
`12`		`- /// Note that values from the same C# environment in the same step may replace each other.`
`13`	`12`	`/// </summary>`
`14`	`13`	`Average = 0,`
`15`	`14`