[Tune] Post-Experiment Tools (ray-project#4351)

Icefishc · May 4, 2019 · 36b71d1 · 36b71d1
1 parent 406c429
commit 36b71d1
Show file tree

Hide file tree

Showing 11 changed files with 290 additions and 192 deletions.
diff --git a/doc/source/tune-package-ref.rst b/doc/source/tune-package-ref.rst
@@ -37,6 +37,12 @@ ray.tune.suggest
     :private-members:
     :show-inheritance:
 
+ray.tune.analysis
+-----------------
+
+.. autoclass:: ray.tune.analysis.ExperimentAnalysis
+    :members:
+
 
 ray.tune.logger
 ---------------

diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst
@@ -327,10 +327,21 @@ The following fields will automatically show up on the console output, if provid
     Example_0:  TERMINATED [pid=68248], 179 s, 2 iter, 60000 ts, 94 rew
 
 
-Logging and Visualizing Results
--------------------------------
+Logging, Analyzing, and Visualizing Results
+-------------------------------------------
 
-All results reported by the trainable will be logged locally to a unique directory per experiment, e.g. ``~/ray_results/my_experiment`` in the above example. On a cluster, incremental results will be synced to local disk on the head node. The log records are compatible with a number of visualization tools:
+All results reported by the trainable will be logged locally to a unique directory per experiment, e.g. ``~/ray_results/my_experiment`` in the above example. On a cluster, incremental results will be synced to local disk on the head node.
+
+Tune provides an ``ExperimentAnalysis`` object for analyzing results which can be used by providing the directory path as follows:
+
+.. code-block:: python
+
+    from ray.tune.analysis import ExperimentAnalysis
+
+    ea = ExperimentAnalysis("~/ray_results/my_experiment")
+    trials_dataframe = ea.dataframe()
+
+You can check out `experiment_analysis.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/analysis/experiment_analysis.py>`__ for more interesting analysis operations.
 
 To visualize learning in tensorboard, install TensorFlow:
 
@@ -355,14 +366,6 @@ To use rllab's VisKit (you may have to install some dependencies), run:
 
 .. image:: ray-tune-viskit.png
 
-Finally, to view the results with a `parallel coordinates visualization <https://en.wikipedia.org/wiki/Parallel_coordinates>`__, open `ParallelCoordinatesVisualization.ipynb <https://github.com/ray-project/ray/blob/master/python/ray/tune/ParallelCoordinatesVisualization.ipynb>`__ as follows and run its cells:
-
-.. code-block:: bash
-
-    $ cd $RAY_HOME/python/ray/tune
-    $ jupyter-notebook ParallelCoordinatesVisualization.ipynb
-
-.. image:: ray-tune-parcoords.png
 
 Custom Loggers
 ~~~~~~~~~~~~~~

diff --git a/doc/source/tune.rst b/doc/source/tune.rst
@@ -27,7 +27,7 @@ Features
 
 *  Mix and match different hyperparameter optimization approaches - such as using `HyperOpt with HyperBand`_ or `Nevergrad with HyperBand`_.
 
-*  Visualize results with `TensorBoard <https://www.tensorflow.org/get_started/summaries_and_tensorboard>`__, `parallel coordinates (Plot.ly) <https://plot.ly/python/parallel-coordinates-plot/>`__, and `rllab's VisKit <https://media.readthedocs.org/pdf/rllab/latest/rllab.pdf>`__.
+*  Visualize results with `TensorBoard <https://www.tensorflow.org/get_started/summaries_and_tensorboard>`__ and `rllab's VisKit <https://github.com/vitchyr/viskit>`__.
 
 *  Scale to running on a large distributed cluster without changing your code.
 

diff --git a/python/ray/tune/ParallelCoordinatesVisualization.ipynb b/python/ray/tune/ParallelCoordinatesVisualization.ipynb
diff --git a/python/ray/tune/analysis/__init__.py b/python/ray/tune/analysis/__init__.py
@@ -0,0 +1,7 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ray.tune.analysis.experiment_analysis import ExperimentAnalysis
+
+__all__ = ["ExperimentAnalysis"]
diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py
@@ -0,0 +1,108 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import glob
+import json
+import logging
+import os
+import pandas as pd
+
+from ray.tune.error import TuneError
+from ray.tune.util import flatten_dict
+
+logger = logging.getLogger(__name__)
+
+UNNEST_KEYS = ("config", "last_result")
+
+
+def unnest_checkpoints(checkpoints):
+    checkpoint_dicts = []
+    for g in checkpoints:
+        checkpoint = copy.deepcopy(g)
+        for key in UNNEST_KEYS:
+            if key not in checkpoint:
+                continue
+            try:
+                unnest_dict = flatten_dict(checkpoint.pop(key))
+                checkpoint.update(unnest_dict)
+            except Exception:
+                logger.debug("Failed to flatten dict.")
+        checkpoint = flatten_dict(checkpoint)
+        checkpoint_dicts.append(checkpoint)
+    return checkpoint_dicts
+
+
+class ExperimentAnalysis(object):
+    """Analyze results from a Tune experiment.
+
+    Parameters:
+        experiment_path (str): Path to where experiment is located.
+            Corresponds to Experiment.local_dir/Experiment.name
+
+    Example:
+        >>> tune.run(my_trainable, name="my_exp", local_dir="~/tune_results")
+        >>> analysis = ExperimentAnalysis(
+        >>>     experiment_path="~/tune_results/my_exp")
+    """
+
+    def __init__(self, experiment_path):
+        experiment_path = os.path.expanduser(experiment_path)
+        if not os.path.isdir(experiment_path):
+            raise TuneError(
+                "{} is not a valid directory.".format(experiment_path))
+        experiment_state_paths = glob.glob(
+            os.path.join(experiment_path, "experiment_state*.json"))
+        if not experiment_state_paths:
+            raise TuneError("No experiment state found!")
+        experiment_filename = max(
+            list(experiment_state_paths))  # if more than one, pick latest
+        with open(os.path.join(experiment_path, experiment_filename)) as f:
+            self._experiment_state = json.load(f)
+
+        if "checkpoints" not in self._experiment_state:
+            raise TuneError("Experiment state invalid; no checkpoints found.")
+        self._checkpoints = self._experiment_state["checkpoints"]
+        self._scrubbed_checkpoints = unnest_checkpoints(self._checkpoints)
+
+    def dataframe(self):
+        """Returns a pandas.DataFrame object constructed from the trials."""
+        return pd.DataFrame(self._scrubbed_checkpoints)
+
+    def stats(self):
+        """Returns a dictionary of the statistics of the experiment."""
+        return self._experiment_state.get("stats")
+
+    def runner_data(self):
+        """Returns a dictionary of the TrialRunner data."""
+        return self._experiment_state.get("runner_data")
+
+    def trial_dataframe(self, trial_id):
+        """Returns a pandas.DataFrame constructed from one trial."""
+        for checkpoint in self._checkpoints:
+            if checkpoint["trial_id"] == trial_id:
+                logdir = checkpoint["logdir"]
+                progress = max(glob.glob(os.path.join(logdir, "progress.csv")))
+                return pd.read_csv(progress)
+        raise ValueError("Trial id {} not found".format(trial_id))
+
+    def get_best_trainable(self, metric, trainable_cls):
+        """Returns the best Trainable based on the experiment metric."""
+        return trainable_cls(config=self.get_best_config(metric))
+
+    def get_best_config(self, metric):
+        """Retrieve the best config from the best trial."""
+        return self._get_best_trial(metric)["config"]
+
+    def _get_best_trial(self, metric):
+        """Retrieve the best trial based on the experiment metric."""
+        return max(
+            self._checkpoints, key=lambda d: d["last_result"].get(metric, 0))
+
+    def _get_sorted_trials(self, metric):
+        """Retrive trials in sorted order based on the experiment metric."""
+        return sorted(
+            self._checkpoints,
+            key=lambda d: d["last_result"].get(metric, 0),
+            reverse=True)