ray-project · sven1977 · May 24, 2022 · May 5, 2022 · May 6, 2022 · May 6, 2022
@@ -379,6 +379,17 @@ py_test(
     args = ["--yaml-dir=tuned_examples/impala"]
 )
 
+# MADDPG
+py_test(
+    name = "learning_tests_two_step_game_maddpg",
+    main = "tests/run_regression_tests.py",
+    tags = ["team:ml", "tf_only", "no_tf_eager_tracing", "learning_tests", "learning_tests_discrete"],
+    size = "large",
+    srcs = ["tests/run_regression_tests.py"],
+    data = ["tuned_examples/maddpg/two-step-game-maddpg.yaml"],
+    args = ["--yaml-dir=tuned_examples/maddpg", "--framework=tf"]
+)
+
 # Working, but takes a long time to learn (>15min).
 # Removed due to Higher API conflicts with Pytorch-Import tests
 ## MB-MPO
@@ -729,7 +740,7 @@ py_test(
 py_test(
     name = "test_dreamer",
     tags = ["team:ml", "trainers_dir"],
-    size = "small",
+    size = "medium",
     srcs = ["algorithms/dreamer/tests/test_dreamer.py"]
 )
 
@@ -775,6 +786,14 @@ py_test(
     srcs = ["algorithms/marwil/tests/test_bc.py"]
 )
 
+# MADDPGTrainer
+py_test(
+    name = "test_maddpg",
+    tags = ["team:ml", "trainers_dir"],
+    size = "medium",
+    srcs = ["algorithms/maddpg/tests/test_maddpg.py"]
+)
+
 # MAMLTrainer
 py_test(
     name = "test_maml",
@@ -2957,15 +2976,6 @@ py_test(
     args = ["--as-test", "--mixed-torch-tf", "--stop-reward=450.0"]
 )
 
-py_test(
-    name = "examples/two_step_game_maddpg",
-    main = "examples/two_step_game.py",
-    tags = ["team:ml", "examples", "examples_T"],
-    size = "medium",
-    srcs = ["examples/two_step_game.py"],
-    args = ["--as-test", "--stop-reward=7.1", "--run=MADDPG"]
-)
-
 py_test(
     name = "examples/two_step_game_pg_tf",
     main = "examples/two_step_game.py",

@@ -1,3 +1,19 @@
-from ray.rllib.agents.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG
+from ray.rllib.algorithms.maddpg.maddpg import (
+    MADDPGTrainer,
+    MADDPGTFPolicy,
+    DEFAULT_CONFIG,
+)
 
-__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]
+__all__ = [
+    "MADDPGTrainer",
+    "MADDPGTFPolicy",
+    "DEFAULT_CONFIG",
+]
+
+from ray.rllib.utils.deprecation import deprecation_warning
+
+deprecation_warning(
+    "ray.rllib.agents.maddpg",
+    "ray.rllib.algorithms.maddpg",
+    error=False,
+)
@@ -0,0 +1,3 @@
+from ray.rllib.agents.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG
+
+__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]
@@ -77,6 +77,8 @@
         "capacity": int(1e6),
         # How many steps of the model to sample before learning starts.
         "learning_starts": 1024 * 25,
+        # Force lockstep replay mode for MADDPG.
+        "replay_mode": "lockstep",
     },
     # Observation compression. Note that compression makes simulation slow in
     # MPE.
@@ -86,10 +88,6 @@
     # timesteps. Otherwise, the replay will proceed at the native ratio
     # determined by (train_batch_size / rollout_fragment_length).
     "training_intensity": None,
-    # Force lockstep replay mode for MADDPG.
-    "multiagent": merge_dicts(COMMON_CONFIG["multiagent"], {
-        "replay_mode": "lockstep",
-    }),
 
     # === Optimization ===
     # Learning rate for the critic (Q-function) optimizer.

@@ -69,11 +69,11 @@ def _make_continuous_space(space):
                 )
 
         obs_space_n = [
-            _make_continuous_space(space)
+            _make_continuous_space(space or obs_space)
             for _, (_, space, _, _) in config["multiagent"]["policies"].items()
         ]
         act_space_n = [
-            _make_continuous_space(space)
+            _make_continuous_space(space or act_space)
             for _, (_, _, space, _) in config["multiagent"]["policies"].items()
         ]
 

@@ -0,0 +1,57 @@
+import unittest
+
+import ray
+import ray.rllib.agents.maddpg as maddpg
+from ray.rllib.examples.env.two_step_game import TwoStepGame
+from ray.rllib.policy.policy import PolicySpec
+from ray.rllib.utils.test_utils import (
+    check_train_results,
+    framework_iterator,
+)
+
+
+class TestMADDPG(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        ray.init()
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        ray.shutdown()
+
+    def test_maddpg_compilation(self):
+        """Test whether an MADDPGTrainer can be built with all frameworks."""
+        config = maddpg.DEFAULT_CONFIG.copy()
+        config["env"] = TwoStepGame
+        config["env_config"] = {
+            "actions_are_logits": True,
+        }
+        config["multiagent"] = {
+            "policies": {
+                "pol1": PolicySpec(
+                    config={"agent_id": 0},
+                ),
+                "pol2": PolicySpec(
+                    config={"agent_id": 1},
+                ),
+            },
+            "policy_mapping_fn": (lambda aid, **kwargs: "pol2" if aid else "pol1"),
+        }
+
+        num_iterations = 1
+
+        # Only working for tf right now.
+        for _ in framework_iterator(config, frameworks="tf"):
+            trainer = maddpg.MADDPGTrainer(config)
+            for i in range(num_iterations):
+                results = trainer.train()
+                check_train_results(results)
+                print(results)
+            trainer.stop()
+
+
+if __name__ == "__main__":
+    import pytest
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))
@@ -1,8 +1,8 @@
-two-step-game-qmix-with-qmix-mixer:
+two-step-game-maddpg:
     env: ray.rllib.examples.env.two_step_game.TwoStepGame
     run: MADDPG
     stop:
-        episode_reward_mean: 8.0
+        episode_reward_mean: 7.2
         timesteps_total: 20000
     config:
         # MADDPG only supports tf for now.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from ray.rllib.agents.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG

		__all__ = ["MADDPGTrainer", "DEFAULT_CONFIG"]