[RLlib] AlgorithmConfig: Next steps (volume 01); Algos, RolloutWorker…

…, PolicyMap, WorkerSet use AlgorithmConfig objects under the hood. (ray-project#29395)
anoopsaha · Oct 26, 2022 · 182744b · 182744b
1 parent 37de814
commit 182744b
Show file tree

Hide file tree

Showing 94 changed files with 2,566 additions and 1,997 deletions.
diff --git a/doc/source/rllib/rllib-saving-and-loading-algos-and-policies.rst b/doc/source/rllib/rllib-saving-and-loading-algos-and-policies.rst
@@ -82,7 +82,7 @@ handle any checkpoints created with Ray 2.0 or any version up to ``V``.
 
 .. code-block:: shell
 
-    $ mode rllib_checkpoint.json
+    $ more rllib_checkpoint.json
     {"type": "Algorithm", "checkpoint_version": "1.0"}
 
 Now, let's check out the `policies/` sub-directory:

diff --git a/rllib/algorithms/a2c/a2c.py b/rllib/algorithms/a2c/a2c.py
@@ -35,11 +35,12 @@ class A2CConfig(A3CConfig):
         >>> from ray import tune
         >>> config = A2CConfig().training(lr=0.01, grad_clip=30.0)\
         ...     .resources(num_gpus=0)\
-        ...     .rollouts(num_rollout_workers=2)
+        ...     .rollouts(num_rollout_workers=2)\
+        ...     .environment("CartPole-v1")
         >>> print(config.to_dict())
         >>> # Build a Algorithm object from the config and run 1 training iteration.
-        >>> trainer = config.build(env="CartPole-v1")
-        >>> trainer.train()
+        >>> algo = config.build()
+        >>> algo.train()
 
     Example:
         >>> import ray.air as air

diff --git a/rllib/algorithms/a2c/tests/test_a2c.py b/rllib/algorithms/a2c/tests/test_a2c.py
@@ -27,13 +27,14 @@ def test_a2c_compilation(self):
         # Test against all frameworks.
         for _ in framework_iterator(config, with_eager_tracing=True):
             for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]:
-                trainer = config.build(env=env)
+                config.environment(env)
+                algo = config.build()
                 for i in range(num_iterations):
-                    results = trainer.train()
+                    results = algo.train()
                     check_train_results(results)
                     print(results)
-                check_compute_single_action(trainer)
-                trainer.stop()
+                check_compute_single_action(algo)
+                algo.stop()
 
     def test_a2c_exec_impl(self):
         config = (
@@ -43,12 +44,12 @@ def test_a2c_exec_impl(self):
         )
 
         for _ in framework_iterator(config):
-            trainer = config.build()
-            results = trainer.train()
+            algo = config.build()
+            results = algo.train()
             check_train_results(results)
             print(results)
-            check_compute_single_action(trainer)
-            trainer.stop()
+            check_compute_single_action(algo)
+            algo.stop()
 
     def test_a2c_exec_impl_microbatch(self):
         config = (
@@ -59,12 +60,12 @@ def test_a2c_exec_impl_microbatch(self):
         )
 
         for _ in framework_iterator(config):
-            trainer = config.build()
-            results = trainer.train()
+            algo = config.build()
+            results = algo.train()
             check_train_results(results)
             print(results)
-            check_compute_single_action(trainer)
-            trainer.stop()
+            check_compute_single_action(algo)
+            algo.stop()
 
 
 if __name__ == "__main__":

diff --git a/rllib/algorithms/a3c/a3c.py b/rllib/algorithms/a3c/a3c.py
@@ -35,11 +35,12 @@ class A3CConfig(AlgorithmConfig):
         >>> from ray import tune
         >>> config = A3CConfig().training(lr=0.01, grad_clip=30.0)\
         ...     .resources(num_gpus=0)\
-        ...     .rollouts(num_rollout_workers=4)
+        ...     .rollouts(num_rollout_workers=4)\
+        ...     .environment("CartPole-v1")
         >>> print(config.to_dict())
         >>> # Build a Algorithm object from the config and run 1 training iteration.
-        >>> trainer = config.build(env="CartPole-v1")
-        >>> trainer.train()
+        >>> algo = config.build()
+        >>> algo.train()
 
     Example:
         >>> config = A3CConfig()

diff --git a/rllib/algorithms/a3c/tests/test_a3c.py b/rllib/algorithms/a3c/tests/test_a3c.py
@@ -31,15 +31,15 @@ def test_a3c_compilation(self):
             for env in ["CartPole-v1", "Pendulum-v1", "PongDeterministic-v0"]:
                 print("env={}".format(env))
                 config.model["use_lstm"] = env == "CartPole-v1"
-                trainer = config.build(env=env)
+                algo = config.build(env=env)
                 for i in range(num_iterations):
-                    results = trainer.train()
+                    results = algo.train()
                     check_train_results(results)
                     print(results)
                 check_compute_single_action(
-                    trainer, include_state=config.model["use_lstm"]
+                    algo, include_state=config.model["use_lstm"]
                 )
-                trainer.stop()
+                algo.stop()
 
     def test_a3c_entropy_coeff_schedule(self):
         """Test A3C entropy coeff schedule support."""
@@ -78,17 +78,17 @@ def _step_n_times(trainer, n: int):
 
         # Test against all frameworks.
         for _ in framework_iterator(config):
-            trainer = config.build(env="CartPole-v1")
+            algo = config.build(env="CartPole-v1")
 
-            coeff = _step_n_times(trainer, 1)  # 20 timesteps
+            coeff = _step_n_times(algo, 1)  # 20 timesteps
             # Should be close to the starting coeff of 0.01
             self.assertGreaterEqual(coeff, 0.005)
 
-            coeff = _step_n_times(trainer, 10)  # 200 timesteps
+            coeff = _step_n_times(algo, 10)  # 200 timesteps
             # Should have annealed to the final coeff of 0.0001.
             self.assertLessEqual(coeff, 0.00011)
 
-            trainer.stop()
+            algo.stop()
 
 
 if __name__ == "__main__":