Skip to content

Commit 19ec5b5

Browse files
[train] Turn on new persistence mode by default (ray-project#38844)
Signed-off-by: Justin Yu <justinvyu@anyscale.com> Signed-off-by: Matthew Deng <matt@anyscale.com> Signed-off-by: matthewdeng <matt@anyscale.com> Co-authored-by: Matthew Deng <matt@anyscale.com>
1 parent 420ce95 commit 19ec5b5

12 files changed

+82
-30
lines changed

.buildkite/pipeline.build.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
--test_env=DOCKER_CERT_PATH=/certs/client
9696
--test_env=DOCKER_TLS_CERTDIR=/certs
9797
--test_env=RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=0
98+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
9899
$(cat test_shard.txt)
99100

100101
- label: ":serverless: Serve Tests (streaming and routing FFs off)"
@@ -129,6 +130,7 @@
129130
--test_env=DOCKER_TLS_CERTDIR=/certs
130131
--test_env=RAY_SERVE_ENABLE_NEW_ROUTING=0
131132
--test_env=RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=0
133+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
132134
$(cat test_shard.txt)
133135

134136
- label: ":python: Minimal install Python {{matrix}}"
@@ -211,6 +213,7 @@
211213
--test_env=CONDA_SHLVL
212214
--test_env=CONDA_PREFIX
213215
--test_env=CONDA_DEFAULT_ENV
216+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
214217
python/ray/tests/...
215218

216219
- label: ":book: Doctest (CPU)"
@@ -227,7 +230,6 @@
227230
- ./ci/env/env_info.sh
228231
- bazel test --config=ci $(./scripts/bazel_export_options)
229232
--test_tag_filters=doctest,-gpu
230-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=1
231233
python/ray/... doc/...
232234

233235
- label: ":python: Ray on Spark Test"
@@ -479,6 +481,7 @@
479481
--test_env=DOCKER_TLS_VERIFY=1
480482
--test_env=DOCKER_CERT_PATH=/certs/client
481483
--test_env=DOCKER_TLS_CERTDIR=/certs
484+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
482485

483486
- label: ":hadoop: Ray AIR HDFS tests"
484487
conditions: ["RAY_CI_ML_AFFECTED"]

.buildkite/pipeline.build_py37.yml

+3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
} > test_shard.txt
2727
- cat test_shard.txt
2828
- bazel test --config=ci $(./ci/run/bazel_export_options)
29+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
2930
$(cat test_shard.txt)
3031

3132

@@ -55,6 +56,7 @@
5556
} > test_shard.txt
5657
- cat test_shard.txt
5758
- bazel test --config=ci $(./ci/run/bazel_export_options)
59+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
5860
$(cat test_shard.txt)
5961

6062

@@ -139,4 +141,5 @@
139141
--test_env=DOCKER_TLS_VERIFY=1
140142
--test_env=DOCKER_CERT_PATH=/certs/client
141143
--test_env=DOCKER_TLS_CERTDIR=/certs
144+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
142145
$(cat test_shard.txt)

.buildkite/pipeline.build_redis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
- DL=1 ./ci/env/install-dependencies.sh
2323
- ./ci/env/env_info.sh
2424
- ./ci/ci.sh test_large --test_env=TEST_EXTERNAL_REDIS=1
25+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
2526

2627
- label: ":redis: (External Redis) (Medium A-J)"
2728
conditions: ["RAY_CI_PYTHON_AFFECTED"]

.buildkite/pipeline.gpu.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
--test_tag_filters=gpu
2727
--test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
2828
--test_env=RLLIB_NUM_GPUS=1
29+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
2930
rllib/...
3031

3132

@@ -41,7 +42,9 @@
4142
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
4243
- ./ci/env/install-dependencies.sh
4344
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
44-
- bazel test --config=ci $(./ci/run/bazel_export_options) --test_tag_filters=gpu python/ray/serve/...
45+
- bazel test --config=ci $(./ci/run/bazel_export_options) --test_tag_filters=gpu
46+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
47+
python/ray/serve/...
4548

4649
# Todo: enable once tests pass
4750
#- label: ":tv: :brain: RLlib: GPU Examples {C/D}"

.buildkite/pipeline.gpu_large.yml

+16-7
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
99
- ./ci/env/install-horovod.sh
1010
- ./ci/env/env_info.sh
11-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air python/ray/train/...
11+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air
12+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
13+
python/ray/train/...
1214

1315
- label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
1416
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
@@ -17,7 +19,9 @@
1719
- TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
1820
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
1921
- ./ci/env/env_info.sh
20-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train,-doctest doc/...
22+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train,-doctest
23+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
24+
doc/...
2125

2226
- label: ":tv: :brain: RLlib: Multi-GPU Tests"
2327
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
@@ -45,7 +49,9 @@
4549
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
4650
- ./ci/env/install-horovod.sh
4751
- ./ci/env/env_info.sh
48-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/air/... python/ray/train/...
52+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu
53+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
54+
python/ray/air/... python/ray/train/...
4955

5056
- label: ":tv: :book: Doc GPU tests and examples"
5157
conditions:
@@ -59,7 +65,9 @@
5965
# TODO(amogkam): Remove when https://github.com/ray-project/ray/issues/36011
6066
# is resolved.
6167
- pip install -U transformers
62-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-post_wheel_build,-doctest doc/...
68+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-post_wheel_build,-doctest
69+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
70+
doc/...
6371

6472
- label: ":book: Doctest (GPU)"
6573
commands:
@@ -75,8 +83,7 @@
7583
- pip install transformers==4.30.2 datasets==2.14.0
7684
- ./ci/env/env_info.sh
7785
- bazel test --config=ci $(./scripts/bazel_export_options)
78-
--test_tag_filters=doctest,-cpu
79-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=1
86+
--test_tag_filters=doctest,-cpu
8087
python/ray/... doc/...
8188

8289
- label: ":zap: :python: Lightning 2.0 Train GPU tests"
@@ -90,4 +97,6 @@
9097
- pip uninstall -y pytorch-lightning
9198
- pip install lightning==2.0.4 pytorch-lightning==2.0.4 # todo move to requirements-test.txt
9299
- ./ci/env/env_info.sh
93-
- bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=ptl_v2 python/ray/train/...
100+
- bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=ptl_v2
101+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
102+
python/ray/train/...

.buildkite/pipeline.ml.yml

+30-7
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
- DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
99
- ./ci/env/env_info.sh
1010
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-hdfs
11+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
1112
python/ray/air/...
12-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air python/ray/data/...
13+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air
14+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
15+
python/ray/data/...
1316

1417
- label: ":airplane: AIR/ML release smoke tests"
1518
conditions:
@@ -26,6 +29,7 @@
2629
- bazel test --config=ci $(./ci/run/bazel_export_options)
2730
--build_tests_only
2831
--test_tag_filters=team:ml
32+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
2933
release/...
3034

3135

@@ -42,8 +46,10 @@
4246
- ./ci/run/run_bazel_test_with_sharding.sh
4347
--config=ci $(./ci/run/bazel_export_options)
4448
--test_tag_filters=-gpu_only,-gpu,-minimal,-tune,-doctest,-new_storage
49+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
4550
python/ray/train/...
4651

52+
4753
# Currently empty test suite
4854
#- label: ":steam_locomotive: :octopus: Train + Tune tests and examples"
4955
# conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
@@ -242,7 +248,9 @@
242248
- RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
243249
- ./ci/env/env_info.sh
244250
- ./ci/run/run_bazel_test_with_sharding.sh --config=ci $(./ci/run/bazel_export_options) --build_tests_only
245-
--test_tag_filters=examples,-multi_gpu,-gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
251+
--test_tag_filters=examples,-multi_gpu,-gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
252+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
253+
rllib/...
246254

247255
- label: ":brain: RLlib: tests/ dir"
248256
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
@@ -277,6 +285,7 @@
277285
- ./ci/run/run_bazel_test_with_sharding.sh
278286
--config=ci $(./ci/run/bazel_export_options) --build_tests_only
279287
--test_tag_filters=-medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
288+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
280289
python/ray/tune/...
281290

282291
- label: ":octopus: Tune tests and examples (medium)"
@@ -288,6 +297,7 @@
288297
- ./ci/env/env_info.sh
289298
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
290299
--test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
300+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
291301
python/ray/tune/...
292302

293303
- label: ":octopus: :spiral_note_pad: New output: Tune tests and examples (small)"
@@ -302,6 +312,7 @@
302312
--config=ci $(./ci/run/bazel_export_options) --build_tests_only
303313
--test_tag_filters=-medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
304314
--test_env=AIR_VERBOSITY=1
315+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
305316
python/ray/tune/...
306317

307318
- label: ":octopus: :spiral_note_pad: New output: Tune tests and examples (medium)"
@@ -314,6 +325,7 @@
314325
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
315326
--test_tag_filters=medium_instance,-soft_imports,-gpu_only,-rllib,-multinode,-new_storage
316327
--test_env=AIR_VERBOSITY=1
328+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
317329
python/ray/tune/...
318330

319331

@@ -325,6 +337,7 @@
325337
- TUNE_TESTING=1 ./ci/env/install-dependencies.sh
326338
- ./ci/env/env_info.sh
327339
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu_only,rllib
340+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
328341
python/ray/tune/...
329342

330343
- label: ":octopus: ML library integrations tests and examples."
@@ -334,8 +347,12 @@
334347
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
335348
- TUNE_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh
336349
- ./ci/env/env_info.sh
337-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/xgboost/...
338-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/horovod/...
350+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
351+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
352+
python/ray/tests/xgboost/...
353+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only
354+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
355+
python/ray/tests/horovod/...
339356

340357

341358

@@ -524,7 +541,9 @@
524541
# (see https://github.com/ray-project/ray/pull/38432/)
525542
- pip install "transformers==4.30.2" "datasets==2.14.0"
526543
- ./ci/env/env_info.sh
527-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-timeseries_libs,-external,-ray_air,-gpu,-post_wheel_build,-doctest,-datasets_train,-highly_parallel,-new_storage doc/...
544+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-timeseries_libs,-external,-ray_air,-gpu,-post_wheel_build,-doctest,-datasets_train,-highly_parallel,-new_storage
545+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
546+
doc/...
528547

529548
- label: ":book: Doc tests and examples with time series libraries"
530549
conditions:
@@ -545,7 +564,9 @@
545564
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
546565
- DOC_TESTING=1 ./ci/env/install-dependencies.sh
547566
- ./ci/env/env_info.sh
548-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-external,-timeseries_libs,-gpu,-post_wheel_build,-doctest doc/...
567+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-external,-timeseries_libs,-gpu,-post_wheel_build,-doctest
568+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
569+
doc/...
549570

550571
- label: ":book: Doc examples for external code "
551572
conditions: ["RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED", "RAY_CI_SERVE_AFFECTED", "RAY_CI_ML_AFFECTED"]
@@ -554,7 +575,9 @@
554575
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
555576
- DOC_TESTING=1 ./ci/env/install-dependencies.sh
556577
- ./ci/env/env_info.sh
557-
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=external,-timeseries_libs,-gpu,-post_wheel_build,-doctest doc/...
578+
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=external,-timeseries_libs,-gpu,-post_wheel_build,-doctest
579+
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
580+
doc/...
558581

559582

560583
- label: ":exploding_death_star: RLlib Contrib: A3C Tests"

.buildkite/pipeline.windows.yml

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ steps:
3737
- . ./ci/ci.sh init
3838
- ./ci/ci.sh build
3939
- export RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER="1"
40+
- export RAY_AIR_NEW_PERSISTENCE_MODE="0"
4041
- if [ "${BUILDKITE_PARALLEL_JOB}" = "0" ]; then ./ci/ci.sh test_core; fi
4142
# The next command will be sharded into $parallelism shards.
4243
- ./ci/ci.sh test_python

ci/ray_ci/container.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ def _run_tests_in_docker(test_targets: List[str], team: str) -> subprocess.Popen
5858
]
5959
)
6060
commands.append(
61-
"bazel test --config=ci $(./ci/run/bazel_export_options) "
61+
"bazel test --config=ci "
62+
# TODO(matthewdeng): Remove this env var as part of #38570.
63+
"--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0 "
64+
"$(./ci/run/bazel_export_options) "
6265
f"{' '.join(test_targets)}",
6366
)
6467
return subprocess.Popen(_docker_run_bash_script("\n".join(commands), team))

python/ray/air/tests/test_air_usage.py

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def test_tag_env_vars(ray_start_4_cpus, mock_record, tuner):
202202
variables are ignored."""
203203
env_vars_to_record = {
204204
"RAY_AIR_LOCAL_CACHE_DIR": "~/ray_results",
205+
"RAY_AIR_NEW_PERSISTENCE_MODE": "0",
205206
"TUNE_DISABLE_AUTO_CALLBACK_SYNCER": "1",
206207
}
207208
untracked_env_vars = {"RANDOM_USER_ENV_VAR": "asdf"}

python/ray/air/tests/test_experiment_restore.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import os
23

34
import numpy as np
45
import pandas as pd
@@ -101,18 +102,21 @@ def test_experiment_restore(tmp_path, runner_type):
101102

102103
total_iters = iters_per_trial * num_trials
103104

104-
env = {
105-
"RUNNER_TYPE": runner_type,
106-
"STORAGE_PATH": str(storage_path),
107-
"EXP_NAME": exp_name,
108-
"CALLBACK_DUMP_FILE": str(callback_dump_file),
109-
"RUN_STARTED_MARKER": str(run_started_marker),
110-
"TIME_PER_ITER_S": str(time_per_iter_s),
111-
"ITERATIONS_PER_TRIAL": str(iters_per_trial),
112-
"NUM_TRIALS": str(num_trials),
113-
"MAX_CONCURRENT_TRIALS": str(max_concurrent),
114-
"CSV_DATA_FILE": csv_file,
115-
}
105+
env = os.environ.copy()
106+
env.update(
107+
{
108+
"RUNNER_TYPE": runner_type,
109+
"STORAGE_PATH": str(storage_path),
110+
"EXP_NAME": exp_name,
111+
"CALLBACK_DUMP_FILE": str(callback_dump_file),
112+
"RUN_STARTED_MARKER": str(run_started_marker),
113+
"TIME_PER_ITER_S": str(time_per_iter_s),
114+
"ITERATIONS_PER_TRIAL": str(iters_per_trial),
115+
"NUM_TRIALS": str(num_trials),
116+
"MAX_CONCURRENT_TRIALS": str(max_concurrent),
117+
"CSV_DATA_FILE": csv_file,
118+
}
119+
)
116120

117121
# Pass criteria
118122
no_interrupts_runtime = 16.0

python/ray/tests/test_usage_stats.py

+1
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,7 @@ def run_usage_stats_server(reporter):
12081208
expected_payload["tune_searcher"] = "BasicVariantGenerator"
12091209
expected_payload["air_storage_configuration"] = "driver"
12101210
expected_payload["air_entrypoint"] = "Tuner.fit"
1211+
expected_payload["air_env_vars"] = '["RAY_AIR_NEW_PERSISTENCE_MODE"]'
12111212
assert payload["extra_usage_tags"] == expected_payload
12121213
assert payload["total_num_nodes"] == 1
12131214
assert payload["total_num_running_jobs"] == 1

python/ray/train/_internal/storage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def _use_storage_context() -> bool:
4343
# Whether to enable the new simple persistence mode.
4444
from ray.train.constants import RAY_AIR_NEW_PERSISTENCE_MODE
4545

46-
return bool(int(os.environ.get(RAY_AIR_NEW_PERSISTENCE_MODE, "0")))
46+
return bool(int(os.environ.get(RAY_AIR_NEW_PERSISTENCE_MODE, "1")))
4747

4848

4949
class _ExcludingLocalFilesystem(LocalFileSystem):

0 commit comments

Comments
 (0)