Skip to content

Commit

Permalink
[RLlib] AlgorithmConfig: Next steps (volume 01); Algos, RolloutWorker…
Browse files Browse the repository at this point in the history
…, PolicyMap, WorkerSet use AlgorithmConfig objects under the hood. (ray-project#29395)
  • Loading branch information
sven1977 authored Oct 26, 2022
1 parent 37de814 commit 182744b
Show file tree
Hide file tree
Showing 94 changed files with 2,566 additions and 1,997 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ handle any checkpoints created with Ray 2.0 or any version up to ``V``.

.. code-block:: shell
$ mode rllib_checkpoint.json
$ more rllib_checkpoint.json
{"type": "Algorithm", "checkpoint_version": "1.0"}
Now, let's check out the `policies/` sub-directory:
Expand Down
7 changes: 4 additions & 3 deletions rllib/algorithms/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ class A2CConfig(A3CConfig):
>>> from ray import tune
>>> config = A2CConfig().training(lr=0.01, grad_clip=30.0)\
... .resources(num_gpus=0)\
... .rollouts(num_rollout_workers=2)
... .rollouts(num_rollout_workers=2)\
... .environment("CartPole-v1")
>>> print(config.to_dict())
>>> # Build a Algorithm object from the config and run 1 training iteration.
>>> trainer = config.build(env="CartPole-v1")
>>> trainer.train()
>>> algo = config.build()
>>> algo.train()
Example:
>>> import ray.air as air
Expand Down
25 changes: 13 additions & 12 deletions rllib/algorithms/a2c/tests/test_a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ def test_a2c_compilation(self):
# Test against all frameworks.
for _ in framework_iterator(config, with_eager_tracing=True):
for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]:
trainer = config.build(env=env)
config.environment(env)
algo = config.build()
for i in range(num_iterations):
results = trainer.train()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()

def test_a2c_exec_impl(self):
config = (
Expand All @@ -43,12 +44,12 @@ def test_a2c_exec_impl(self):
)

for _ in framework_iterator(config):
trainer = config.build()
results = trainer.train()
algo = config.build()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()

def test_a2c_exec_impl_microbatch(self):
config = (
Expand All @@ -59,12 +60,12 @@ def test_a2c_exec_impl_microbatch(self):
)

for _ in framework_iterator(config):
trainer = config.build()
results = trainer.train()
algo = config.build()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()


if __name__ == "__main__":
Expand Down
7 changes: 4 additions & 3 deletions rllib/algorithms/a3c/a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ class A3CConfig(AlgorithmConfig):
>>> from ray import tune
>>> config = A3CConfig().training(lr=0.01, grad_clip=30.0)\
... .resources(num_gpus=0)\
... .rollouts(num_rollout_workers=4)
... .rollouts(num_rollout_workers=4)\
... .environment("CartPole-v1")
>>> print(config.to_dict())
>>> # Build a Algorithm object from the config and run 1 training iteration.
>>> trainer = config.build(env="CartPole-v1")
>>> trainer.train()
>>> algo = config.build()
>>> algo.train()
Example:
>>> config = A3CConfig()
Expand Down
16 changes: 8 additions & 8 deletions rllib/algorithms/a3c/tests/test_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ def test_a3c_compilation(self):
for env in ["CartPole-v1", "Pendulum-v1", "PongDeterministic-v0"]:
print("env={}".format(env))
config.model["use_lstm"] = env == "CartPole-v1"
trainer = config.build(env=env)
algo = config.build(env=env)
for i in range(num_iterations):
results = trainer.train()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(
trainer, include_state=config.model["use_lstm"]
algo, include_state=config.model["use_lstm"]
)
trainer.stop()
algo.stop()

def test_a3c_entropy_coeff_schedule(self):
"""Test A3C entropy coeff schedule support."""
Expand Down Expand Up @@ -78,17 +78,17 @@ def _step_n_times(trainer, n: int):

# Test against all frameworks.
for _ in framework_iterator(config):
trainer = config.build(env="CartPole-v1")
algo = config.build(env="CartPole-v1")

coeff = _step_n_times(trainer, 1) # 20 timesteps
coeff = _step_n_times(algo, 1) # 20 timesteps
# Should be close to the starting coeff of 0.01
self.assertGreaterEqual(coeff, 0.005)

coeff = _step_n_times(trainer, 10) # 200 timesteps
coeff = _step_n_times(algo, 10) # 200 timesteps
# Should have annealed to the final coeff of 0.0001.
self.assertLessEqual(coeff, 0.00011)

trainer.stop()
algo.stop()


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 182744b

Please sign in to comment.