[RLlib] Fix broken RLlib tests in master. (ray-project#7894)

aannadi · Apr 5, 2020 · 82c2d9f · 82c2d9f
1 parent 38fad27
commit 82c2d9f
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 54 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -192,7 +192,7 @@ matrix:
         - ./ci/suppress_output ./ci/travis/install-ray.sh
       script:
         - if [ $RAY_CI_RLLIB_AFFECTED != "1" ]; then exit; fi
-        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
+        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
 
     # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
     # Requested by Edi (MS): Test all learning capabilities with tf1.x
@@ -213,7 +213,7 @@ matrix:
         - ./ci/suppress_output ./ci/travis/install-ray.sh
       script:
         - if [ $RAY_CI_RLLIB_FULL_AFFECTED != "1" ]; then exit; fi
-        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
+        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
 
     # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
     # Agent single tests (compilation, loss-funcs, etc..).

diff --git a/rllib/BUILD b/rllib/BUILD
@@ -41,12 +41,23 @@
 # --------------------------------------------------------------------
 
 py_test(
-    name = "run_regression_tests",
+    name = "run_regression_tests_cartpole",
     main = "tests/run_regression_tests.py",
-    tags = ["learning_tests"],
+    tags = ["learning_tests", "learning_tests_cartpole"],
     size = "enormous",  # = 60min timeout
     srcs = ["tests/run_regression_tests.py"],
-    data = glob(["tuned_examples/regression_tests/*.yaml"]),
+    data = glob(["tuned_examples/regression_tests/cartpole*.yaml"]),
+    # Pass `BAZEL` option and the path to look for yaml regression files.
+    args = ["BAZEL", "tuned_examples/regression_tests"]
+)
+
+py_test(
+    name = "run_regression_tests_pendulum",
+    main = "tests/run_regression_tests.py",
+    tags = ["learning_tests", "learning_tests_pendulum"],
+    size = "enormous",  # = 60min timeout
+    srcs = ["tests/run_regression_tests.py"],
+    data = glob(["tuned_examples/regression_tests/pendulum*.yaml"]),
     # Pass `BAZEL` option and the path to look for yaml regression files.
     args = ["BAZEL", "tuned_examples/regression_tests"]
 )

diff --git a/rllib/agents/dqn/tests/test_dqn.py b/rllib/agents/dqn/tests/test_dqn.py
@@ -97,37 +97,17 @@ def test_dqn_exploration_and_soft_q_config(self):
                 actions.append(trainer.compute_action(obs))
             check(np.std(actions), 0.0, false=True)
 
-            if eager_mode_ctx:
-                eager_mode_ctx.__exit__(None, None, None)
-
     def test_dqn_parameter_noise_exploration(self):
         """Tests, whether a DQN Agent works with ParameterNoise."""
         obs = np.array(0)
+        core_config = dqn.DEFAULT_CONFIG.copy()
+        core_config["num_workers"] = 0  # Run locally.
+        core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"}
 
-        for fw in ["eager", "tf", "torch"]:
-            if fw == "torch":
-                continue
-            print("framework={}".format(fw))
-
-            core_config = dqn.DEFAULT_CONFIG.copy()
-            core_config["num_workers"] = 0  # Run locally.
-            core_config["env_config"] = {
-                "is_slippery": False,
-                "map_name": "4x4"
-            }
-            core_config["eager"] = fw == "eager"
-            core_config["use_pytorch"] = fw == "torch"
+        for fw in framework_iterator(core_config, ["tf", "eager"]):
 
             config = core_config.copy()
 
-            eager_mode_ctx = None
-            if fw == "tf":
-                assert not tf.executing_eagerly()
-            elif fw == "eager":
-                eager_mode_ctx = eager_mode()
-                eager_mode_ctx.__enter__()
-                assert tf.executing_eagerly()
-
             # DQN with ParameterNoise exploration (config["explore"]=True).
             # ----
             config["exploration_config"] = {"type": "ParameterNoise"}
@@ -258,9 +238,6 @@ def test_dqn_parameter_noise_exploration(self):
                 a = trainer.compute_action(obs, explore=True)
                 check(a, a_)
 
-            if eager_mode_ctx:
-                eager_mode_ctx.__exit__(None, None, None)
-
     def _get_current_noise(self, policy, fw):
         # If noise not even created yet, return 0.0.
         if policy.exploration.noise is None:

diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py
@@ -51,7 +51,7 @@
 
         passed = False
         for i in range(3):
-            trials = run_experiments(experiments, resume=False)
+            trials = run_experiments(experiments, resume=False, verbose=1)
 
             for t in trials:
                 if (t.last_result["episode_reward_mean"] >=

diff --git a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml b/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml
diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py
@@ -68,7 +68,7 @@ def do_test_explorations(run,
             # Make sure actions drawn are different
             # (around some mean value), given constant observations.
             actions = []
-            for _ in range(50):
+            for _ in range(100):
                 actions.append(
                     trainer.compute_action(
                         observation=dummy_obs,