Remove TrainerMetrics and add CSVWriter using new StatsWriter API (#3108)

Ervin T · web-flow · commit b04395c69afd · 2019-12-20T15:42:38.000-08:00
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -17,7 +17,7 @@
 from mlagents.trainers.exception import TrainerError
 from mlagents.trainers.meta_curriculum import MetaCurriculum
 from mlagents.trainers.trainer_util import load_config, TrainerFactory
-from mlagents.trainers.stats import TensorboardWriter, StatsReporter
+from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter
 from mlagents_envs.environment import UnityEnvironment
 from mlagents.trainers.sampler_class import SamplerManager
 from mlagents.trainers.exception import SamplerException
@@ -250,9 +250,15 @@ def run_training(
     trainer_config = load_config(trainer_config_path)
     port = options.base_port + (sub_id * options.num_envs)
 
-    # Configure Tensorboard Writers and StatsReporter
+    # Configure CSV, Tensorboard Writers and StatsReporter
+    # We assume reward and episode length are needed in the CSV.
+    csv_writer = CSVWriter(
+        summaries_dir,
+        required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],
+    )
     tb_writer = TensorboardWriter(summaries_dir)
     StatsReporter.add_writer(tb_writer)
+    StatsReporter.add_writer(csv_writer)
 
     if options.env_path is None:
         port = 5004  # This is the in Editor Training Port
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -177,10 +177,6 @@ def update_policy(self):
         The reward signal generators must be updated in this method at their own pace.
         """
         buffer_length = self.update_buffer.num_experiences
-        self.trainer_metrics.start_policy_update_timer(
-            number_experiences=buffer_length,
-            mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
-        )
         self.cumulative_returns_since_policy_update.clear()
 
         # Make sure batch_size is a multiple of sequence length. During training, we
@@ -221,7 +217,6 @@ def update_policy(self):
             for stat, val in update_stats.items():
                 self.stats_reporter.add_stat(stat, val)
         self.clear_update_buffer()
-        self.trainer_metrics.end_policy_update()
 
 
 def discount_rewards(r, gamma=0.99, value_next=0.0):
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -207,13 +207,8 @@ def update_policy(self) -> None:
         If reward_signal_train_interval is met, update the reward signals from the buffer.
         """
         if self.step % self.train_interval == 0:
-            self.trainer_metrics.start_policy_update_timer(
-                number_experiences=self.update_buffer.num_experiences,
-                mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
-            )
             self.update_sac_policy()
             self.update_reward_signals()
-            self.trainer_metrics.end_policy_update()
 
     def update_sac_policy(self) -> None:
         """
diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
@@ -2,19 +2,28 @@
 from typing import List, Dict, NamedTuple
 import numpy as np
 import abc
+import csv
 import os
 
 from mlagents.tf_utils import tf
 
 
+class StatsSummary(NamedTuple):
+    mean: float
+    std: float
+    num: int
+
+
 class StatsWriter(abc.ABC):
     """
     A StatsWriter abstract class. A StatsWriter takes in a category, key, scalar value, and step
     and writes it out by some method.
     """
 
     @abc.abstractmethod
-    def write_stats(self, category: str, key: str, value: float, step: int) -> None:
+    def write_stats(
+        self, category: str, values: Dict[str, StatsSummary], step: int
+    ) -> None:
         pass
 
     @abc.abstractmethod
@@ -24,15 +33,23 @@ def write_text(self, category: str, text: str, step: int) -> None:
 
 class TensorboardWriter(StatsWriter):
     def __init__(self, base_dir: str):
+        """
+        A StatsWriter that writes to a Tensorboard summary.
+        :param base_dir: The directory within which to place all the summaries. Tensorboard files will be written to a
+        {base_dir}/{category} directory.
+        """
         self.summary_writers: Dict[str, tf.summary.FileWriter] = {}
         self.base_dir: str = base_dir
 
-    def write_stats(self, category: str, key: str, value: float, step: int) -> None:
+    def write_stats(
+        self, category: str, values: Dict[str, StatsSummary], step: int
+    ) -> None:
         self._maybe_create_summary_writer(category)
-        summary = tf.Summary()
-        summary.value.add(tag="{}".format(key), simple_value=value)
-        self.summary_writers[category].add_summary(summary, step)
-        self.summary_writers[category].flush()
+        for key, value in values.items():
+            summary = tf.Summary()
+            summary.value.add(tag="{}".format(key), simple_value=value.mean)
+            self.summary_writers[category].add_summary(summary, step)
+            self.summary_writers[category].flush()
 
     def _maybe_create_summary_writer(self, category: str) -> None:
         if category not in self.summary_writers:
@@ -47,10 +64,59 @@ def write_text(self, category: str, text: str, step: int) -> None:
         self.summary_writers[category].add_summary(text, step)
 
 
-class StatsSummary(NamedTuple):
-    mean: float
-    std: float
-    num: int
+class CSVWriter(StatsWriter):
+    def __init__(self, base_dir: str, required_fields: List[str] = None):
+        """
+        A StatsWriter that writes to a Tensorboard summary.
+        :param base_dir: The directory within which to place the CSV file, which will be {base_dir}/{category}.csv.
+        :param required_fields: If provided, the CSV writer won't write until these fields have statistics to write for
+        them.
+        """
+        # We need to keep track of the fields in the CSV, as all rows need the same fields.
+        self.csv_fields: Dict[str, List[str]] = {}
+        self.required_fields = required_fields if required_fields else []
+        self.base_dir: str = base_dir
+
+    def write_stats(
+        self, category: str, values: Dict[str, StatsSummary], step: int
+    ) -> None:
+        if self._maybe_create_csv_file(category, list(values.keys())):
+            row = [str(step)]
+            # Only record the stats that showed up in the first valid row
+            for key in self.csv_fields[category]:
+                _val = values.get(key, None)
+                row.append(str(_val.mean) if _val else "None")
+            with open(self._get_filepath(category), "a") as file:
+                writer = csv.writer(file)
+                writer.writerow(row)
+
+    def _maybe_create_csv_file(self, category: str, keys: List[str]) -> bool:
+        """
+        If no CSV file exists and the keys have the required values,
+        make the CSV file and write hte title row.
+        Returns True if there is now (or already is) a valid CSV file.
+        """
+        if category not in self.csv_fields:
+            summary_dir = self.base_dir
+            os.makedirs(summary_dir, exist_ok=True)
+            # Only store if the row contains the required fields
+            if all(item in keys for item in self.required_fields):
+                self.csv_fields[category] = keys
+                with open(self._get_filepath(category), "w") as file:
+                    title_row = ["Steps"]
+                    title_row.extend(keys)
+                    writer = csv.writer(file)
+                    writer.writerow(title_row)
+                return True
+            return False
+        return True
+
+    def _get_filepath(self, category: str) -> str:
+        file_dir = os.path.join(self.base_dir, category + ".csv")
+        return file_dir
+
+    def write_text(self, category: str, text: str, step: int) -> None:
+        pass
 
 
 class StatsReporter:
@@ -87,11 +153,13 @@ def write_stats(self, step: int) -> None:
         :param category: The category which to write out the stats.
         :param step: Training step which to write these stats as.
         """
+        values: Dict[str, StatsSummary] = {}
         for key in StatsReporter.stats_dict[self.category]:
             if len(StatsReporter.stats_dict[self.category][key]) > 0:
-                stat_mean = float(np.mean(StatsReporter.stats_dict[self.category][key]))
-                for writer in StatsReporter.writers:
-                    writer.write_stats(self.category, key, stat_mean, step)
+                stat_summary = self.get_stats_summaries(key)
+                values[key] = stat_summary
+        for writer in StatsReporter.writers:
+            writer.write_stats(self.category, values, step)
         del StatsReporter.stats_dict[self.category]
 
     def write_text(self, text: str, step: int) -> None:
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -2,8 +2,14 @@
 import os
 import pytest
 import tempfile
+import csv
 
-from mlagents.trainers.stats import StatsReporter, TensorboardWriter
+from mlagents.trainers.stats import (
+    StatsReporter,
+    TensorboardWriter,
+    CSVWriter,
+    StatsSummary,
+)
 
 
 def test_stat_reporter_add_summary_write():
@@ -35,8 +41,12 @@ def test_stat_reporter_add_summary_write():
     # Test write_stats
     step = 10
     statsreporter1.write_stats(step)
-    mock_writer1.write_stats.assert_called_once_with("category1", "key1", 4.5, step)
-    mock_writer2.write_stats.assert_called_once_with("category1", "key1", 4.5, step)
+    mock_writer1.write_stats.assert_called_once_with(
+        "category1", {"key1": statssummary1}, step
+    )
+    mock_writer2.write_stats.assert_called_once_with(
+        "category1", {"key1": statssummary1}, step
+    )
 
 
 def test_stat_reporter_text():
@@ -61,7 +71,8 @@ def test_tensorboard_writer(mock_filewriter, mock_summary):
     category = "category1"
     with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
         tb_writer = TensorboardWriter(base_dir)
-        tb_writer.write_stats("category1", "key1", 1.0, 10)
+        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+        tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
 
         # Test that the filewriter has been created and the directory has been created.
         filewriter_dir = "{basedir}/{category}".format(
@@ -78,3 +89,43 @@ def test_tensorboard_writer(mock_filewriter, mock_summary):
             mock_summary.return_value, 10
         )
         mock_filewriter.return_value.flush.assert_called_once()
+
+
+def test_csv_writer():
+    # Test write_stats
+    category = "category1"
+    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
+        csv_writer = CSVWriter(base_dir, required_fields=["key1", "key2"])
+        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+        csv_writer.write_stats("category1", {"key1": statssummary1}, 10)
+
+        # Test that the filewriter has been created and the directory has been created.
+        filewriter_dir = "{basedir}/{category}.csv".format(
+            basedir=base_dir, category=category
+        )
+        # The required keys weren't in the stats
+        assert not os.path.exists(filewriter_dir)
+
+        csv_writer.write_stats(
+            "category1", {"key1": statssummary1, "key2": statssummary1}, 10
+        )
+        csv_writer.write_stats(
+            "category1", {"key1": statssummary1, "key2": statssummary1}, 20
+        )
+
+        # The required keys were in the stats
+        assert os.path.exists(filewriter_dir)
+
+        with open(filewriter_dir) as csv_file:
+            csv_reader = csv.reader(csv_file, delimiter=",")
+            line_count = 0
+            for row in csv_reader:
+                if line_count == 0:
+                    assert "key1" in row
+                    assert "key2" in row
+                    assert "Steps" in row
+                    line_count += 1
+                else:
+                    assert len(row) == 3
+                    line_count += 1
+            assert line_count == 3
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py b/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
@@ -5,7 +5,6 @@
 
 import mlagents.trainers.trainer_util as trainer_util
 from mlagents.trainers.trainer_util import load_config, _load_config
-from mlagents.trainers.trainer_metrics import TrainerMetrics
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.exception import TrainerConfigError
 from mlagents.trainers.brain import BrainParameters
@@ -119,7 +118,6 @@ def mock_constructor(
         run_id,
         multi_gpu,
     ):
-        self.trainer_metrics = TrainerMetrics("", "")
         assert brain == brain_params_mock
         assert trainer_parameters == expected_config
         assert reward_buff_cap == expected_reward_buff_cap
@@ -178,7 +176,6 @@ def mock_constructor(
         run_id,
         multi_gpu,
     ):
-        self.trainer_metrics = TrainerMetrics("", "")
         assert brain == brain_params_mock
         assert trainer_parameters == expected_config
         assert reward_buff_cap == expected_reward_buff_cap
diff --git a/ml-agents/mlagents/trainers/trainer.py b/ml-agents/mlagents/trainers/trainer.py
@@ -8,7 +8,6 @@
 
 from mlagents_envs.exception import UnityException
 from mlagents_envs.timers import set_gauge
-from mlagents.trainers.trainer_metrics import TrainerMetrics
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.trajectory import Trajectory
@@ -52,9 +51,6 @@ def __init__(
         self.stats_reporter = StatsReporter(self.summary_path)
         self.cumulative_returns_since_policy_update: List[float] = []
         self.is_training = training
-        self.trainer_metrics = TrainerMetrics(
-            path=self.summary_path + ".csv", brain_name=self.brain_name
-        )
         self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
         self.policy: TFPolicy = None  # type: ignore  # this will always get set
         self.step: int = 0
@@ -170,13 +166,6 @@ def export_model(self) -> None:
         """
         self.policy.export_model()
 
-    def write_training_metrics(self) -> None:
-        """
-        Write training metrics to a CSV  file
-        :return:
-        """
-        self.trainer_metrics.write_training_metrics()
-
     def write_summary(self, global_step: int, delta_train_start: float) -> None:
         """
         Saves training statistics to Tensorboard.
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
diff --git a/ml-agents/mlagents/trainers/trainer_metrics.py b/ml-agents/mlagents/trainers/trainer_metrics.py