Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] MADDPG: Move into main algorithms folder and add proper unit and learning tests. #24579

Merged
merged 18 commits into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,17 @@ py_test(
args = ["--yaml-dir=tuned_examples/impala"]
)

# MADDPG
py_test(
name = "learning_tests_two_step_game_maddpg",
main = "tests/run_regression_tests.py",
tags = ["team:ml", "tf_only", "no_tf_eager_tracing", "learning_tests", "learning_tests_discrete"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we maybe instead add this to the weekly ci -- I feel like we don't need to as widely support this algorithm until there is a customer use case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question: I think we should move toward:

  • CI-tests: small tasks that can be learnt in 1-2 min (CartPole, TwoStepGame, maybe Pendulum or simpler cont. actions problem)
  • nightly tests: hard tasks, requiring GPU + ~1h of completion on >1 workers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave this test here for now to roughly match the other algos' coverage (everyone has CartPole/Pendulum tests in the CI, which is ok).

size = "large",
srcs = ["tests/run_regression_tests.py"],
data = ["tuned_examples/maddpg/two-step-game-maddpg.yaml"],
args = ["--yaml-dir=tuned_examples/maddpg", "--framework=tf"]
)

# Working, but takes a long time to learn (>15min).
# Removed due to Higher API conflicts with Pytorch-Import tests
## MB-MPO
Expand Down Expand Up @@ -729,7 +740,7 @@ py_test(
py_test(
name = "test_dreamer",
tags = ["team:ml", "trainers_dir"],
size = "small",
size = "medium",
srcs = ["algorithms/dreamer/tests/test_dreamer.py"]
)

Expand Down Expand Up @@ -775,6 +786,14 @@ py_test(
srcs = ["algorithms/marwil/tests/test_bc.py"]
)

# MADDPGTrainer
py_test(
name = "test_maddpg",
tags = ["team:ml", "trainers_dir"],
size = "medium",
srcs = ["algorithms/maddpg/tests/test_maddpg.py"]
)

# MAMLTrainer
py_test(
name = "test_maml",
Expand Down Expand Up @@ -2957,15 +2976,6 @@ py_test(
args = ["--as-test", "--mixed-torch-tf", "--stop-reward=450.0"]
)

py_test(
name = "examples/two_step_game_maddpg",
main = "examples/two_step_game.py",
tags = ["team:ml", "examples", "examples_T"],
size = "medium",
srcs = ["examples/two_step_game.py"],
args = ["--as-test", "--stop-reward=7.1", "--run=MADDPG"]
)

py_test(
name = "examples/two_step_game_pg_tf",
main = "examples/two_step_game.py",
Expand Down
20 changes: 18 additions & 2 deletions rllib/agents/maddpg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
from ray.rllib.agents.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG
from ray.rllib.algorithms.maddpg.maddpg import (
MADDPGTrainer,
MADDPGTFPolicy,
DEFAULT_CONFIG,
)

__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]
__all__ = [
"MADDPGTrainer",
"MADDPGTFPolicy",
"DEFAULT_CONFIG",
]

from ray.rllib.utils.deprecation import deprecation_warning

deprecation_warning(
"ray.rllib.agents.maddpg",
"ray.rllib.algorithms.maddpg",
error=False,
)
File renamed without changes.
3 changes: 3 additions & 0 deletions rllib/algorithms/maddpg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ray.rllib.agents.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG

__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
"capacity": int(1e6),
# How many steps of the model to sample before learning starts.
"learning_starts": 1024 * 25,
# Force lockstep replay mode for MADDPG.
"replay_mode": "lockstep",
},
# Observation compression. Note that compression makes simulation slow in
# MPE.
Expand All @@ -86,10 +88,6 @@
# timesteps. Otherwise, the replay will proceed at the native ratio
# determined by (train_batch_size / rollout_fragment_length).
"training_intensity": None,
# Force lockstep replay mode for MADDPG.
"multiagent": merge_dicts(COMMON_CONFIG["multiagent"], {
"replay_mode": "lockstep",
}),

# === Optimization ===
# Learning rate for the critic (Q-function) optimizer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def _make_continuous_space(space):
)

obs_space_n = [
_make_continuous_space(space)
_make_continuous_space(space or obs_space)
for _, (_, space, _, _) in config["multiagent"]["policies"].items()
]
act_space_n = [
_make_continuous_space(space)
_make_continuous_space(space or act_space)
for _, (_, _, space, _) in config["multiagent"]["policies"].items()
]

Expand Down
57 changes: 57 additions & 0 deletions rllib/algorithms/maddpg/tests/test_maddpg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import unittest

import ray
import ray.rllib.agents.maddpg as maddpg
from ray.rllib.examples.env.two_step_game import TwoStepGame
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.utils.test_utils import (
check_train_results,
framework_iterator,
)


class TestMADDPG(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
ray.init()

@classmethod
def tearDownClass(cls) -> None:
ray.shutdown()

def test_maddpg_compilation(self):
"""Test whether an MADDPGTrainer can be built with all frameworks."""
config = maddpg.DEFAULT_CONFIG.copy()
config["env"] = TwoStepGame
config["env_config"] = {
"actions_are_logits": True,
}
config["multiagent"] = {
"policies": {
"pol1": PolicySpec(
config={"agent_id": 0},
),
"pol2": PolicySpec(
config={"agent_id": 1},
),
},
"policy_mapping_fn": (lambda aid, **kwargs: "pol2" if aid else "pol1"),
}

num_iterations = 1

# Only working for tf right now.
for _ in framework_iterator(config, frameworks="tf"):
trainer = maddpg.MADDPGTrainer(config)
for i in range(num_iterations):
results = trainer.train()
check_train_results(results)
print(results)
trainer.stop()


if __name__ == "__main__":
import pytest
import sys

sys.exit(pytest.main(["-v", __file__]))
4 changes: 2 additions & 2 deletions rllib/tuned_examples/maddpg/two-step-game-maddpg.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
two-step-game-qmix-with-qmix-mixer:
two-step-game-maddpg:
env: ray.rllib.examples.env.two_step_game.TwoStepGame
run: MADDPG
stop:
episode_reward_mean: 8.0
episode_reward_mean: 7.2
timesteps_total: 20000
config:
# MADDPG only supports tf for now.
Expand Down