[train] New persistence mode: Migrate 🐙 Tune tests and examples (medium) (ray-project#39081)

justinvyu · matthewdeng · commit 926b8b169330 · 2023-08-29T20:58:41.000-07:00
Signed-off-by: Justin Yu &lt;justinvyu@anyscale.com&gt;
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
@@ -293,8 +293,7 @@
     - TUNE_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
-      --test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
-      --test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
+      --test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode
       python/ray/tune/...
 
 - label: ":octopus: :spiral_note_pad: New output: Tune tests and examples (small)"
@@ -320,9 +319,8 @@
     - TUNE_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
     - ./ci/env/env_info.sh
     - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
-      --test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
+      --test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode
       --test_env=AIR_VERBOSITY=1
-      --test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
       python/ray/tune/...
 
 
@@ -369,18 +367,6 @@
       --test_env=RAY_AIR_NEW_PERSISTENCE_MODE=1
       python/ray/tune/...
 
-- label: ":octopus: :floppy_disk: New persistence mode: Tune tests and examples (medium)"
-  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TUNE_AFFECTED"]
-  instance_size: medium
-  commands:
-    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
-    - TUNE_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
-    - ./ci/env/env_info.sh
-    - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
-      --test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-no_new_storage
-      --test_env=RAY_AIR_NEW_PERSISTENCE_MODE=1
-      python/ray/tune/...
-
 
 ###### END STORAGE REFACTOR
 
diff --git a/python/ray/tune/BUILD b/python/ray/tune/BUILD
@@ -238,7 +238,7 @@ py_test(
     size = "large",
     srcs = ["tests/test_sample.py"],
     deps = [":tune_lib"],
-    tags = ["team:ml", "exclusive", "medium_instance", "no_new_storage"],
+    tags = ["team:ml", "exclusive", "medium_instance"],
 )
 
 py_test(
@@ -254,7 +254,7 @@ py_test(
     size = "large",
     srcs = ["tests/test_searchers.py"],
     deps = [":tune_lib"],
-    tags = ["team:ml", "exclusive", "medium_instance", "no_new_storage"],
+    tags = ["team:ml", "exclusive", "medium_instance"],
 )
 
 py_test(
@@ -334,15 +334,15 @@ py_test(
     size = "large",
     srcs = ["tests/test_trial_scheduler.py"],
     deps = [":tune_lib"],
-    tags = ["team:ml", "exclusive", "medium_instance", "new_storage"],
+    tags = ["team:ml", "exclusive", "medium_instance"],
 )
 
 py_test(
     name = "test_trial_scheduler_pbt",
     size = "large",
     srcs = ["tests/test_trial_scheduler_pbt.py"],
     deps = [":tune_lib"],
-    tags = ["team:ml", "exclusive", "medium_instance", "new_storage"],
+    tags = ["team:ml", "exclusive", "medium_instance"],
 )
 
 py_test(
@@ -390,7 +390,7 @@ py_test(
     size = "large",
     srcs = ["tests/test_tuner.py"],
     deps = [":tune_lib"],
-    tags = ["team:ml", "exclusive", "medium_instance", "no_new_storage"],
+    tags = ["team:ml", "exclusive", "medium_instance"],
 )
 
 py_test(
diff --git a/python/ray/tune/search/searcher.py b/python/ray/tune/search/searcher.py
@@ -229,12 +229,22 @@ def add_evaluated_trials(
         # lazy imports to avoid circular dependencies
         from ray.tune.experiment import Trial
         from ray.tune.analysis import ExperimentAnalysis
+        from ray.tune.analysis.experiment_analysis import NewExperimentAnalysis
         from ray.tune.result import DONE
 
-        if isinstance(trials_or_analysis, Trial):
-            trials_or_analysis = [trials_or_analysis]
-        elif isinstance(trials_or_analysis, ExperimentAnalysis):
-            trials_or_analysis = trials_or_analysis.trials
+        if isinstance(trials_or_analysis, (list, tuple)):
+            trials = trials_or_analysis
+        elif isinstance(trials_or_analysis, Trial):
+            trials = [trials_or_analysis]
+        elif isinstance(
+            trials_or_analysis, (ExperimentAnalysis, NewExperimentAnalysis)
+        ):
+            trials = trials_or_analysis.trials
+        else:
+            raise NotImplementedError(
+                "Expected input to be a `Trial`, a list of `Trial`s, or "
+                f"`ExperimentAnalysis`, got: {trials_or_analysis}"
+            )
 
         any_trial_had_metric = False
 
@@ -261,7 +271,7 @@ def trial_to_points(trial: Trial) -> Dict[str, Any]:
                 intermediate_values=None,  # we do not save those
             )
 
-        for trial in trials_or_analysis:
+        for trial in trials:
             kwargs = trial_to_points(trial)
             if kwargs:
                 self.add_evaluated_point(**kwargs)
diff --git a/python/ray/tune/tests/test_sample.py b/python/ray/tune/tests/test_sample.py
@@ -12,14 +12,14 @@
 
 import ray
 import ray.tune.search.sample
-from ray import tune
+from ray import train, tune
 from ray.tune import Experiment
 from ray.tune.search.util import logger
 from ray.tune.search.variant_generator import generate_variants
 
 
 def _mock_objective(config):
-    tune.report(**config)
+    train.report(config)
 
 
 def assertDictAlmostEqual(a, b):
diff --git a/python/ray/tune/tests/test_searchers.py b/python/ray/tune/tests/test_searchers.py
@@ -2,13 +2,16 @@
 from copy import deepcopy
 import numpy as np
 import os
+from packaging.version import Version
+import pandas
+import pytest
 import shutil
 import tempfile
 import unittest
 from unittest.mock import patch
 
 import ray
-from ray import tune
+from ray import train, tune
 from ray.air.constants import TRAINING_ITERATION
 from ray.tune.search import ConcurrencyLimiter
 
@@ -18,21 +21,21 @@ def _invalid_objective(config):
     metric = "point" if "point" in config else "report"
 
     if config[metric] > 4:
-        tune.report(float("inf"))
+        train.report({"_metric": float("inf")})
     elif config[metric] > 3:
-        tune.report(float("-inf"))
+        train.report({"_metric": float("-inf")})
     elif config[metric] > 2:
-        tune.report(np.nan)
+        train.report({"_metric": np.nan})
     else:
-        tune.report(float(config[metric]) or 0.1)
+        train.report({"_metric": float(config[metric]) or 0.1})
 
 
 def _multi_objective(config):
-    tune.report(a=config["a"] * 100, b=config["b"] * -100, c=config["c"])
+    train.report(dict(a=config["a"] * 100, b=config["b"] * -100, c=config["c"]))
 
 
 def _dummy_objective(config):
-    tune.report(metric=config["report"])
+    train.report(dict(metric=config["report"]))
 
 
 class InvalidValuesTest(unittest.TestCase):
@@ -224,6 +227,9 @@ def testDragonfly(self):
         self.assertCorrectExperimentOutput(out)
 
     def testHEBO(self):
+        if Version(pandas.__version__) >= Version("2.0.0"):
+            pytest.skip("HEBO does not support pandas>=2.0.0")
+
         from ray.tune.search.hebo import HEBOSearch
 
         with self.check_searcher_checkpoint_errors_scope():
@@ -507,6 +513,9 @@ def dbr_space(trial):
             dbr_searcher.add_evaluated_point(point, 1.0)
 
     def testHEBO(self):
+        if Version(pandas.__version__) >= Version("2.0.0"):
+            pytest.skip("HEBO does not support pandas>=2.0.0")
+
         from ray.tune.search.hebo import HEBOSearch
 
         searcher = HEBOSearch(
@@ -684,6 +693,9 @@ def testDragonfly(self):
         self._restore(searcher)
 
     def testHEBO(self):
+        if Version(pandas.__version__) >= Version("2.0.0"):
+            pytest.skip("HEBO does not support pandas>=2.0.0")
+
         from ray.tune.search.hebo import HEBOSearch
 
         searcher = HEBOSearch(
@@ -816,7 +828,6 @@ def testOptuna(self):
 
 
 if __name__ == "__main__":
-    import pytest
     import sys
 
     sys.exit(pytest.main(["-v", __file__]))
diff --git a/python/ray/tune/tests/test_tuner.py b/python/ray/tune/tests/test_tuner.py
@@ -3,7 +3,6 @@
 from unittest.mock import patch
 
 import pytest
-import shutil
 import unittest
 from typing import Optional
 
@@ -23,7 +22,6 @@
 from ray.train.trainer import BaseTrainer
 from ray.train.xgboost import XGBoostTrainer
 from ray.tune import Callback, CLIReporter
-from ray.tune.result import DEFAULT_RESULTS_DIR
 from ray.tune.tune_config import TuneConfig
 from ray.tune.tuner import Tuner
 
@@ -106,6 +104,12 @@ def gen_dataset_func_eager():
 class TunerTest(unittest.TestCase):
     """The e2e test for hparam tuning using Tuner API."""
 
+    @pytest.fixture(autouse=True)
+    def local_dir(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("RAY_AIR_LOCAL_CACHE_DIR", str(tmp_path / "ray_results"))
+        self.local_dir = str(tmp_path / "ray_results")
+        yield self.local_dir
+
     def setUp(self):
         ray.init()
 
@@ -114,9 +118,6 @@ def tearDown(self):
 
     def test_tuner_with_xgboost_trainer(self):
         """Test a successful run."""
-        shutil.rmtree(
-            os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True
-        )
         trainer = XGBoostTrainer(
             label_column="target",
             params={},
@@ -156,10 +157,6 @@ def test_tuner_with_xgboost_trainer(self):
     def test_tuner_with_xgboost_trainer_driver_fail_and_resume(self):
         # So that we have some global checkpointing happening.
         os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "1"
-        shutil.rmtree(
-            os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail"),
-            ignore_errors=True,
-        )
         trainer = XGBoostTrainer(
             label_column="target",
             params={},
@@ -211,18 +208,16 @@ def on_step_end(self, iteration, trials, **kwargs):
             tuner.fit()
 
         # Test resume
-        restore_path = os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail")
-        tuner = Tuner.restore(restore_path, trainable=trainer)
+        restore_path = os.path.join(self.local_dir, "test_tuner_driver_fail")
+        tuner = Tuner.restore(restore_path, trainable=trainer, param_space=param_space)
         # A hack before we figure out RunConfig semantics across resumes.
         tuner._local_tuner._run_config.callbacks = None
         results = tuner.fit()
         assert len(results) == 4
+        assert not results.errors
 
     def test_tuner_with_torch_trainer(self):
         """Test a successful run using torch trainer."""
-        shutil.rmtree(
-            os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_torch"), ignore_errors=True
-        )
         # The following two should be tunable.
         config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 10}
         scaling_config = ScalingConfig(num_workers=1, use_gpu=False)
@@ -387,6 +382,8 @@ def test_nonserializable_trainable():
         Tuner(lambda config: print(lock))
 
 
+# TODO(justinvyu): [chdir_to_trial_dir]
+@pytest.mark.skip("chdir_to_trial_dir is not implemented yet.")
 @pytest.mark.parametrize("runtime_env", [{}, {"working_dir": "."}])
 def test_tuner_no_chdir_to_trial_dir(shutdown_only, chdir_tmpdir, runtime_env):
     """Tests that setting `chdir_to_trial_dir=False` in `TuneConfig` allows for
diff --git a/python/ray/tune/tests/test_tuner_restore.py b/python/ray/tune/tests/test_tuner_restore.py