ray-project · ericl · Jan 25, 2018 · Jan 23, 2018 · Jan 23, 2018 · Jan 23, 2018
@@ -262,6 +262,7 @@ in the ``config`` section of the experiments.
 
 .. code-block:: python
 
+    import ray
     from ray.tune.tune import run_experiments
     from ray.tune.variant_generator import grid_search
 
@@ -286,6 +287,7 @@ in the ``config`` section of the experiments.
         # put additional experiments to run concurrently here
     }
 
+    ray.init()
     run_experiments(experiment)
 
 Contributing to RLlib

@@ -16,8 +16,9 @@ You can find the code for Ray Tune `here on GitHub <https://github.com/ray-proje
 Getting Started
 ---------------
 
-::
+.. code-block:: python
 
+    import ray
     from ray.tune import register_trainable, grid_search, run_experiments
 
     def my_func(config, reporter):
@@ -30,6 +31,7 @@ Getting Started
 
     register_trainable("my_func", my_func)
 
+    ray.init()
     run_experiments({
         "my_experiment": {
             "run": "my_func",
@@ -67,7 +69,7 @@ Ray Tune logs trial results to a unique directory per experiment, e.g. ``~/ray_r
 
 To visualize learning in tensorboard, run:
 
-::
+.. code-block:: bash
 
     $ pip install tensorboard
     $ tensorboard --logdir=~/ray_results/my_experiment
@@ -76,7 +78,7 @@ To visualize learning in tensorboard, run:
 
 To use rllab's VisKit (you may have to install some dependencies), run:
 
-::
+.. code-block:: bash
 
     $ git clone https://github.com/rll/rllab.git
     $ python rllab/rllab/viskit/frontend.py ~/ray_results/my_experiment
@@ -85,7 +87,7 @@ To use rllab's VisKit (you may have to install some dependencies), run:
 
 Finally, to view the results with a `parallel coordinates visualization <https://en.wikipedia.org/wiki/Parallel_coordinates>`__, open `ParalleCoordinatesVisualization.ipynb <https://github.com/ray-project/ray/blob/master/python/ray/tune/ParallelCoordinatesVisualization.ipynb>`__ as follows and run its cells:
 
-::
+.. code-block:: bash
 
     $ cd $RAY_HOME/python/ray/tune
     $ jupyter-notebook ParallelCoordinatesVisualization.ipynb
@@ -97,7 +99,7 @@ In the above example, we specified a grid search over two parameters using the `
 
 The following shows grid search over two nested parameters combined with random sampling from two lambda functions. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions.
 
-::
+.. code-block:: python
 
     "config": {
         "alpha": lambda spec: np.random.uniform(100),
@@ -118,44 +120,61 @@ Early Stopping
 
 To reduce costs, long-running trials can often be early stopped if their initial performance is not promising. Ray Tune allows early stopping algorithms to be plugged in on top of existing grid or random searches. This can be enabled by setting the ``scheduler`` parameter of ``run_experiments``, e.g.
 
-::
+.. code-block:: python
 
-    run_experiments({...}, scheduler=MedianStoppingRule())
+    run_experiments({...}, scheduler=HyperBandScheduler())
 
-Currently we support the following early stopping algorithms, or you can write your own that implements the `TrialScheduler <https://github.com/ray-project/ray/blob/master/python/ray/tune/trial_scheduler.py>`__ interface:
+An example of this can be found in `hyperband_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__. The progress of one such HyperBand run is shown below.
+
+Note that some trial schedulers such as HyperBand require your Trainable to support checkpointing, which is described in the next section. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster.
+
+::
+
+    == Status ==
+    Using HyperBand: num_stopped=0 total_brackets=5
+    Round #0:
+      Bracket(n=5, r=100, completed=80%): {'PAUSED': 4, 'PENDING': 1}
+      Bracket(n=8, r=33, completed=23%): {'PAUSED': 4, 'PENDING': 4}
+      Bracket(n=15, r=11, completed=4%): {'RUNNING': 2, 'PAUSED': 2, 'PENDING': 11}
+      Bracket(n=34, r=3, completed=0%): {'RUNNING': 2, 'PENDING': 32}
+      Bracket(n=81, r=1, completed=0%): {'PENDING': 38}
+    Resources used: 4/4 CPUs, 0/0 GPUs
+    Result logdir: /home/eric/ray_results/hyperband_test
+    PAUSED trials:
+     - my_class_0_height=99,width=43:	PAUSED [pid=11664], 0 s, 100 ts, 97.1 rew
+     - my_class_11_height=85,width=81:	PAUSED [pid=11771], 0 s, 33 ts, 32.8 rew
+     - my_class_12_height=0,width=52:	PAUSED [pid=11785], 0 s, 33 ts, 0 rew
+     - my_class_19_height=44,width=88:	PAUSED [pid=11811], 0 s, 11 ts, 5.47 rew
+     - my_class_27_height=96,width=84:	PAUSED [pid=11840], 0 s, 11 ts, 12.5 rew
+      ... 5 more not shown
+    PENDING trials:
+     - my_class_10_height=12,width=25:	PENDING
+     - my_class_13_height=90,width=45:	PENDING
+     - my_class_14_height=69,width=45:	PENDING
+     - my_class_15_height=41,width=11:	PENDING
+     - my_class_16_height=57,width=69:	PENDING
+      ... 81 more not shown
+    RUNNING trials:
+     - my_class_23_height=75,width=51:	RUNNING [pid=11843], 0 s, 1 ts, 1.47 rew
+     - my_class_26_height=16,width=48:	RUNNING
+     - my_class_31_height=40,width=10:	RUNNING
+     - my_class_53_height=28,width=96:	RUNNING
+
+Currently we support the following early stopping algorithms, or you can write your own that implements the `TrialScheduler <https://github.com/ray-project/ray/blob/master/python/ray/tune/trial_scheduler.py>`__ interface.
 
 .. autoclass:: ray.tune.median_stopping_rule.MedianStoppingRule
 .. autoclass:: ray.tune.hyperband.HyperBandScheduler
 
-Checkpointing support
----------------------
+Trial Checkpointing
+-------------------
 
-To enable checkpoint / resume, the full ``Trainable`` API must be implemented (though as shown in the examples above, you can get away with just supplying a ``train(config, reporter)`` func if you don't need checkpointing). Implementing this interface is required to support resource multiplexing in schedulers such as HyperBand. For example, all `RLlib agents <https://github.com/ray-project/ray/blob/master/python/ray/rllib/agent.py>`__ implement the ``Trainable`` API.
+To enable checkpoint / resume, you must subclass ``Trainable`` and implement its ``_train``, ``_save``, and ``_restore`` abstract methods `(example) <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__: Implementing this interface is required to support resource multiplexing in schedulers such as HyperBand.
 
 .. autoclass:: ray.tune.trainable.Trainable
-    :members:
 
 Resource Allocation
 -------------------
 
 Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded.
 
-If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/trial.py>`__.
-
-Command-line JSON/YAML API
---------------------------
-
-The JSON config passed to ``run_experiments`` can also be put in a JSON or YAML file, and the experiments run using the ``tune.py`` script. This supports the same functionality as the Python API, e.g.:
-
-::
-
-    cd ray/python/tune
-    ./tune.py -f examples/tune_mnist_ray.yaml --scheduler=MedianStoppingRule
-
-
-For more examples of experiments described by YAML files, see `RLlib tuned examples <https://github.com/ray-project/ray/tree/master/python/ray/rllib/tuned_examples>`__.
-
-Running in a large cluster
---------------------------
-
-The ``run_experiments`` also takes any arguments that ``ray.init()`` does. This can be used to pass in the redis address of a multi-node Ray cluster. For more details, check out the `tune.py script <https://github.com/ray-project/ray/blob/master/python/ray/tune/tune.py>`__.
+If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/trial.py>`__. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 5, "driver_gpu_limit": 1``.
diff --git a/examples/carla/a3c_lane_keep.py b/examples/carla/a3c_lane_keep.py
@@ -2,6 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import ray
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
@@ -25,6 +26,7 @@
 register_env(env_name, lambda env_config: CarlaEnv(env_config))
 register_carla_model()
 
+ray.init()
 run_experiments({
     "carla-a3c": {
         "run": "A3C",

diff --git a/examples/carla/dqn_lane_keep.py b/examples/carla/dqn_lane_keep.py
@@ -2,6 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import ray
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
@@ -25,6 +26,7 @@
 register_env(env_name, lambda env_config: CarlaEnv(env_config))
 register_carla_model()
 
+ray.init()
 run_experiments({
     "carla-dqn": {
         "run": "DQN",

diff --git a/examples/carla/ppo_lane_keep.py b/examples/carla/ppo_lane_keep.py
@@ -2,6 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import ray
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
@@ -25,6 +26,7 @@
 register_env(env_name, lambda env_config: CarlaEnv(env_config))
 register_carla_model()
 
+ray.init()
 run_experiments({
     "carla-ppo": {
         "run": "PPO",

diff --git a/examples/carla/train_a3c.py b/examples/carla/train_a3c.py
@@ -27,6 +27,7 @@
 register_carla_model()
 redis_address = ray.services.get_node_ip_address() + ":6379"
 
+ray.init(redis_address=redis_address)
 run_experiments({
     "carla-a3c": {
         "run": "A3C",
@@ -50,4 +51,4 @@
             "num_workers": 2,
         },
     },
-}, redis_address=redis_address)
+})
diff --git a/examples/carla/train_dqn.py b/examples/carla/train_dqn.py
@@ -2,6 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import ray
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
@@ -23,6 +24,7 @@
 register_env(env_name, lambda env_config: CarlaEnv(env_config))
 register_carla_model()
 
+ray.init()
 run_experiments({
     "carla-dqn": {
         "run": "DQN",

diff --git a/examples/carla/train_ppo.py b/examples/carla/train_ppo.py
@@ -2,6 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import ray
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
@@ -22,6 +23,7 @@
 register_env(env_name, lambda env_config: CarlaEnv(env_config))
 register_carla_model()
 
+ray.init(redirect_output=True)
 run_experiments({
     "carla": {
         "run": "PPO",
@@ -55,4 +57,4 @@
             }
         },
     },
-}, redirect_output=True)
+})
diff --git a/examples/custom_env/custom_env.py b/examples/custom_env/custom_env.py
@@ -8,6 +8,7 @@
 from gym.spaces import Discrete, Box
 from gym.envs.registration import EnvSpec
 
+import ray
 from ray.tune import run_experiments
 from ray.tune.registry import register_env
 
@@ -41,6 +42,7 @@ def _step(self, action):
 if __name__ == "__main__":
     env_creator_name = "corridor"
     register_env(env_creator_name, lambda config: SimpleCorridor(config))
+    ray.init()
     run_experiments({
         "demo": {
             "run": "PPO",

diff --git a/python/ray/rllib/__init__.py b/python/ray/rllib/__init__.py
@@ -2,15 +2,16 @@
 from __future__ import division
 from __future__ import print_function
 
-# Note: do not introduce unnecessary library dependencies here, e.g. gym
+# Note: do not introduce unnecessary library dependencies here, e.g. gym.
+# This file is imported from the tune module in order to register RLlib agents.
 from ray.tune.registry import register_trainable
-from ray.rllib.agent import get_agent_class
 
 
 def _register_all():
     for key in [
             "PPO", "ES", "DQN", "A3C", "BC", "__fake", "__sigmoid_fake_data"]:
         try:
+            from ray.rllib.agent import get_agent_class
             register_trainable(key, get_agent_class(key))
         except ImportError as e:
             print("Warning: could not import {}: {}".format(key, e))