Stats SideChannel (for custom TensorBoard metrics) (#3660)

Chris Elion · web-flow · commit 399ad3c20228 · 2020-03-24T17:46:59.000-07:00
diff --git a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs
@@ -2,6 +2,7 @@
 using UnityEngine;
 using UnityEngine.UI;
 using MLAgents;
+using MLAgents.SideChannels;
 
 public class FoodCollectorSettings : MonoBehaviour
 {
@@ -13,9 +14,12 @@ public class FoodCollectorSettings : MonoBehaviour
     public int totalScore;
     public Text scoreText;
 
+    StatsSideChannel m_statsSideChannel;
+
     public void Awake()
     {
         Academy.Instance.OnEnvironmentReset += EnvironmentReset;
+        m_statsSideChannel = Academy.Instance.GetSideChannel<StatsSideChannel>();
     }
 
     public void EnvironmentReset()
@@ -44,5 +48,13 @@ void ClearObjects(GameObject[] objects)
     public void Update()
     {
         scoreText.text = $"Score: {totalScore}";
+
+        // Send stats via SideChannel so that they'll appear in TensorBoard.
+        // These values get averaged every summary_frequency steps, so we don't
+        // need to send every Update() call.
+        if ((Time.frameCount % 100)== 0)
+        {
+            m_statsSideChannel?.AddStat("TotalScore", totalScore);
+        }
     }
 }
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ### Minor Changes
  - Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)
  - Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650)
+ - Added a feature to allow sending stats from C# environments to TensorBoard (and other python StatsWriters). To do this from your code, use `Academy.Instance.GetSideChannel<StatsSideChannel>().AddStat(key, value)` (#3660)
  - Renamed 'Generalization' feature to 'Environment Parameter Randomization'.
  - Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
  - The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
@@ -235,6 +235,33 @@ public void UnregisterSideChannel(SideChannel channel)
             Communicator?.UnregisterSideChannel(channel);
         }
 
+        /// <summary>
+        /// Returns the SideChannel of Type T if there is one registered, or null if it doesn't.
+        /// If there are multiple SideChannels of the same type registered, the returned instance is arbitrary.
+        /// </summary>
+        /// <typeparam name="T"></typeparam>
+        /// <returns></returns>
+        public T GetSideChannel<T>() where T: SideChannel
+        {
+            return Communicator?.GetSideChannel<T>();
+        }
+
+        /// <summary>
+        /// Returns all SideChannels of Type T that are registered. Use <see cref="GetSideChannel{T}()"/> if possible,
+        /// as that does not make any memory allocations.
+        /// </summary>
+        /// <typeparam name="T"></typeparam>
+        /// <returns></returns>
+        public List<T> GetSideChannels<T>() where T: SideChannel
+        {
+            if (Communicator == null)
+            {
+                // Make sure we return a non-null List.
+                return new List<T>();
+            }
+            return Communicator.GetSideChannels<T>();
+        }
+
         /// <summary>
         /// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be
         /// stepped manually by the user by calling Academy.EnvironmentStep().
@@ -334,6 +361,7 @@ void InitializeEnvironment()
             {
                 Communicator.RegisterSideChannel(new EngineConfigurationChannel());
                 Communicator.RegisterSideChannel(floatProperties);
+                Communicator.RegisterSideChannel(new StatsSideChannel());
                 // We try to exchange the first message with Python. If this fails, it means
                 // no Python Process is ready to train the environment. In this case, the
                 //environment must use Inference.
diff --git a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
@@ -167,5 +167,21 @@ internal interface ICommunicator : IDisposable
         /// </summary>
         /// <param name="sideChannel"> The side channel to be unregistered.</param>
         void UnregisterSideChannel(SideChannel sideChannel);
+
+        /// <summary>
+        /// Returns the SideChannel of Type T if there is one registered, or null if it doesn't.
+        /// If there are multiple SideChannels of the same type registered, the returned instance is arbitrary.
+        /// </summary>
+        /// <typeparam name="T"></typeparam>
+        /// <returns></returns>
+        T GetSideChannel<T>() where T : SideChannel;
+
+        /// <summary>
+        /// Returns all SideChannels of Type T that are registered. Use <see cref="GetSideChannel{T}()"/> if possible,
+        /// as that does not make any memory allocations.
+        /// </summary>
+        /// <typeparam name="T"></typeparam>
+        /// <returns></returns>
+        List<T> GetSideChannels<T>() where T : SideChannel;
     }
 }
diff --git a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
@@ -544,6 +544,34 @@ public void UnregisterSideChannel(SideChannel sideChannel)
             }
         }
 
+        /// <inheritdoc/>
+        public T GetSideChannel<T>() where T: SideChannel
+        {
+            foreach (var sc in m_SideChannels.Values)
+            {
+                if (sc.GetType() == typeof(T))
+                {
+                    return (T) sc;
+                }
+            }
+            return null;
+        }
+
+        /// <inheritdoc/>
+        public List<T> GetSideChannels<T>() where T: SideChannel
+        {
+            var output = new List<T>();
+
+            foreach (var sc in m_SideChannels.Values)
+            {
+                if (sc.GetType() == typeof(T))
+                {
+                    output.Add((T) sc);
+                }
+            }
+            return output;
+        }
+
         /// <summary>
         /// Grabs the messages that the registered side channels will send to Python at the current step
         /// into a singe byte array.
diff --git a/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
@@ -8,12 +8,13 @@ namespace MLAgents.SideChannels
     /// </summary>
     public class EngineConfigurationChannel : SideChannel
     {
-        private const string k_EngineConfigId = "e951342c-4f7e-11ea-b238-784f4387d1f7";
+        const string k_EngineConfigId = "e951342c-4f7e-11ea-b238-784f4387d1f7";
 
         /// <summary>
-        /// Initializes the side channel.
+        /// Initializes the side channel. The constructor is internal because only one instance is
+        /// supported at a time, and is created by the Academy.
         /// </summary>
-        public EngineConfigurationChannel()
+        internal EngineConfigurationChannel()
         {
             ChannelId = new Guid(k_EngineConfigId);
         }
diff --git a/com.unity.ml-agents/Runtime/SideChannels/StatsSideChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/StatsSideChannel.cs
@@ -0,0 +1,72 @@
+using System;
+namespace MLAgents.SideChannels
+{
+    /// <summary>
+    /// Determines the behavior of how multiple stats within the same summary period are combined.
+    /// </summary>
+    public enum StatAggregationMethod
+    {
+        /// <summary>
+        /// Values within the summary period are averaged before reporting.
+        /// Note that values from the same C# environment in the same step may replace each other.
+        /// </summary>
+        Average = 0,
+
+        /// <summary>
+        /// Only the most recent value is reported.
+        /// To avoid conflicts between multiple environments, the ML Agents environment will only
+        /// keep stats from worker index 0.
+        /// </summary>
+        MostRecent = 1
+    }
+
+    /// <summary>
+    /// Add stats (key-value pairs) for reporting. The ML Agents environment will send these to a StatsReporter
+    /// instance, which means the values will appear in the Tensorboard summary, as well as trainer gauges.
+    /// Note that stats are only written every summary_frequency steps; See <see cref="StatAggregationMethod"/>
+    /// for options on how multiple values are handled.
+    /// </summary>
+    public class StatsSideChannel : SideChannel
+    {
+        const string k_StatsSideChannelDefaultId = "a1d8f7b7-cec8-50f9-b78b-d3e165a78520";
+
+        /// <summary>
+        /// Initializes the side channel with the provided channel ID.
+        /// The constructor is internal because only one instance is
+        /// supported at a time, and is created by the Academy.
+        /// </summary>
+        internal StatsSideChannel()
+        {
+            ChannelId = new Guid(k_StatsSideChannelDefaultId);
+        }
+
+        /// <summary>
+        /// Add a stat value for reporting. This will appear in the Tensorboard summary and trainer gauges.
+        /// You can nest stats in Tensorboard with "/".
+        /// Note that stats are only written to Tensorboard each summary_frequency steps; if a stat is
+        /// received multiple times, only the most recent version is used.
+        /// To avoid conflicts between multiple environments, only stats from worker index 0 are used.
+        /// </summary>
+        /// <param name="key">The stat name.</param>
+        /// <param name="value">The stat value. You can nest stats in Tensorboard by using "/". </param>
+        /// <param name="aggregationMethod">How multiple values should be treated.</param>
+        public void AddStat(
+            string key, float value, StatAggregationMethod aggregationMethod = StatAggregationMethod.Average
+            )
+        {
+            using (var msg = new OutgoingMessage())
+            {
+                msg.WriteString(key);
+                msg.WriteFloat32(value);
+                msg.WriteInt32((int)aggregationMethod);
+                QueueMessageToSend(msg);
+            }
+        }
+
+        /// <inheritdoc/>
+        public override void OnMessageReceived(IncomingMessage msg)
+        {
+            throw new UnityAgentsException("StatsSideChannel should never receive messages.");
+        }
+    }
+}
diff --git a/com.unity.ml-agents/Runtime/SideChannels/StatsSideChannel.cs.meta b/com.unity.ml-agents/Runtime/SideChannels/StatsSideChannel.cs.meta
diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md
@@ -87,3 +87,10 @@ The ML-Agents training program saves the following statistics:
   taken between two observations.
 
 * `Losses/Cloning Loss` (BC) - The mean magnitude of the behavioral cloning loss. Corresponds to how well the model imitates the demonstration data.
+
+## Custom Metrics from C#
+To get custom metrics from a C# environment into Tensorboard, you can use the StatsSideChannel:
+```csharp
+var statsSideChannel = Academy.Instance.GetSideChannel<StatsSideChannel>();
+statsSideChannel.AddStat("MyMetric", 1.0);
+```
diff --git a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
@@ -0,0 +1,48 @@
+from mlagents_envs.side_channel import SideChannel, IncomingMessage
+import uuid
+from typing import Dict, Tuple
+from enum import Enum
+
+
+# Determines the behavior of how multiple stats within the same summary period are combined.
+class StatsAggregationMethod(Enum):
+    # Values within the summary period are averaged before reporting.
+    AVERAGE = 0
+
+    # Only the most recent value is reported.
+    MOST_RECENT = 1
+
+
+class StatsSideChannel(SideChannel):
+    """
+    Side channel that receives (string, float) pairs from the environment, so that they can eventually
+    be passed to a StatsReporter.
+    """
+
+    def __init__(self) -> None:
+        # >>> uuid.uuid5(uuid.NAMESPACE_URL, "com.unity.ml-agents/StatsSideChannel")
+        # UUID('a1d8f7b7-cec8-50f9-b78b-d3e165a78520')
+        super().__init__(uuid.UUID("a1d8f7b7-cec8-50f9-b78b-d3e165a78520"))
+
+        self.stats: Dict[str, Tuple[float, StatsAggregationMethod]] = {}
+
+    def on_message_received(self, msg: IncomingMessage) -> None:
+        """
+        Receive the message from the environment, and save it for later retrieval.
+        :param msg:
+        :return:
+        """
+        key = msg.read_string()
+        val = msg.read_float32()
+        agg_type = StatsAggregationMethod(msg.read_int32())
+
+        self.stats[key] = (val, agg_type)
+
+    def get_and_reset_stats(self) -> Dict[str, Tuple[float, StatsAggregationMethod]]:
+        """
+        Returns the current stats, and resets the internal storage of the stats.
+        :return:
+        """
+        s = self.stats
+        self.stats = {}
+        return s
diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
@@ -3,6 +3,7 @@
 from collections import defaultdict, Counter, deque
 
 from mlagents_envs.base_env import BatchedStepResult, StepResult
+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.policy import Policy
@@ -267,3 +268,23 @@ def __init__(
             self.behavior_id
         )
         self.publish_trajectory_queue(self.trajectory_queue)
+
+    def record_environment_stats(
+        self, env_stats: Dict[str, Tuple[float, StatsAggregationMethod]], worker_id: int
+    ) -> None:
+        """
+        Pass stats from the environment to the StatsReporter.
+        Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
+        The worker_id is used to determin whether StatsReporter.set_stat should be used.
+        :param env_stats:
+        :param worker_id:
+        :return:
+        """
+        for stat_name, (val, agg_type) in env_stats.items():
+            if agg_type == StatsAggregationMethod.AVERAGE:
+                self.stats_reporter.add_stat(stat_name, val)
+            elif agg_type == StatsAggregationMethod.MOST_RECENT:
+                # In order to prevent conflicts between multiple environments,
+                # only stats from the first environment are recorded.
+                if worker_id == 0:
+                    self.stats_reporter.set_stat(stat_name, val)
diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py
@@ -1,7 +1,8 @@
 from abc import ABC, abstractmethod
 import logging
-from typing import List, Dict, NamedTuple, Iterable
+from typing import List, Dict, NamedTuple, Iterable, Tuple
 from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec, AgentGroup
+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
 from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
@@ -17,14 +18,15 @@ class EnvironmentStep(NamedTuple):
     current_all_step_result: AllStepResult
     worker_id: int
     brain_name_to_action_info: Dict[AgentGroup, ActionInfo]
+    environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]]
 
     @property
     def name_behavior_ids(self) -> Iterable[AgentGroup]:
         return self.current_all_step_result.keys()
 
     @staticmethod
     def empty(worker_id: int) -> "EnvironmentStep":
-        return EnvironmentStep({}, worker_id, {})
+        return EnvironmentStep({}, worker_id, {}, {})
 
 
 class EnvManager(ABC):
@@ -108,4 +110,8 @@ def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
                         name_behavior_id, ActionInfo.empty()
                     ),
                 )
+
+                self.agent_managers[name_behavior_id].record_environment_stats(
+                    step_info.environment_stats, step_info.worker_id
+                )
         return len(step_infos)
diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py
@@ -31,7 +31,9 @@ def _step(self) -> List[EnvironmentStep]:
         self.env.step()
         all_step_result = self._generate_all_results()
 
-        step_info = EnvironmentStep(all_step_result, 0, self.previous_all_action_info)
+        step_info = EnvironmentStep(
+            all_step_result, 0, self.previous_all_action_info, {}
+        )
         self.previous_step = step_info
         return [step_info]
 
@@ -43,7 +45,7 @@ def _reset_env(
                 self.shared_float_properties.set_property(k, v)
         self.env.reset()
         all_step_result = self._generate_all_results()
-        self.previous_step = EnvironmentStep(all_step_result, 0, {})
+        self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
         return [self.previous_step]
 
     @property
diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py
diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
diff --git a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`using UnityEngine;`
`3`	`3`	`using UnityEngine.UI;`
`4`	`4`	`using MLAgents;`
	`5`	`+using MLAgents.SideChannels;`
`5`	`6`
`6`	`7`	`public class FoodCollectorSettings : MonoBehaviour`
`7`	`8`	`{`
`@@ -13,9 +14,12 @@ public class FoodCollectorSettings : MonoBehaviour`
`13`	`14`	`public int totalScore;`
`14`	`15`	`public Text scoreText;`
`15`	`16`
	`17`	`+ StatsSideChannel m_statsSideChannel;`
	`18`	`+`
`16`	`19`	`public void Awake()`
`17`	`20`	`{`
`18`	`21`	`Academy.Instance.OnEnvironmentReset += EnvironmentReset;`
	`22`	`+ m_statsSideChannel = Academy.Instance.GetSideChannel<StatsSideChannel>();`
`19`	`23`	`}`
`20`	`24`
`21`	`25`	`public void EnvironmentReset()`
`@@ -44,5 +48,13 @@ void ClearObjects(GameObject[] objects)`
`44`	`48`	`public void Update()`
`45`	`49`	`{`
`46`	`50`	`scoreText.text = $"Score: {totalScore}";`
	`51`	`+`
	`52`	`+ // Send stats via SideChannel so that they'll appear in TensorBoard.`
	`53`	`+ // These values get averaged every summary_frequency steps, so we don't`
	`54`	`+ // need to send every Update() call.`
	`55`	`+ if ((Time.frameCount % 100)== 0)`
	`56`	`+ {`
	`57`	`+ m_statsSideChannel?.AddStat("TotalScore", totalScore);`
	`58`	`+ }`
`47`	`59`	`}`
`48`	`60`	`}`
Original file line number	Diff line number	Diff line change
`@@ -8,12 +8,13 @@ namespace MLAgents.SideChannels`
`8`	`8`	`/// </summary>`
`9`	`9`	`public class EngineConfigurationChannel : SideChannel`
`10`	`10`	`{`
`11`		`- private const string k_EngineConfigId = "e951342c-4f7e-11ea-b238-784f4387d1f7";`
	`11`	`+ const string k_EngineConfigId = "e951342c-4f7e-11ea-b238-784f4387d1f7";`
`12`	`12`
`13`	`13`	`/// <summary>`
`14`		`- /// Initializes the side channel.`
	`14`	`+ /// Initializes the side channel. The constructor is internal because only one instance is`
	`15`	`+ /// supported at a time, and is created by the Academy.`
`15`	`16`	`/// </summary>`
`16`		`- public EngineConfigurationChannel()`
	`17`	`+ internal EngineConfigurationChannel()`
`17`	`18`	`{`
`18`	`19`	`ChannelId = new Guid(k_EngineConfigId);`
`19`	`20`	`}`