Skip to content

Commit 8794357

Browse files
committed
[train] enable new persistence mode for core and serve tests (ray-project#38938)
Signed-off-by: Matthew Deng <matt@anyscale.com>
1 parent c62ddd2 commit 8794357

File tree

10 files changed

+39
-27
lines changed

10 files changed

+39
-27
lines changed

.buildkite/pipeline.build.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@
9595
--test_env=DOCKER_CERT_PATH=/certs/client
9696
--test_env=DOCKER_TLS_CERTDIR=/certs
9797
--test_env=RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=0
98-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
9998
$(cat test_shard.txt)
10099

101100
- label: ":serverless: Serve Tests (streaming and routing FFs off)"
@@ -130,7 +129,6 @@
130129
--test_env=DOCKER_TLS_CERTDIR=/certs
131130
--test_env=RAY_SERVE_ENABLE_NEW_ROUTING=0
132131
--test_env=RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=0
133-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
134132
$(cat test_shard.txt)
135133

136134
- label: ":python: Minimal install Python {{matrix}}"
@@ -213,7 +211,6 @@
213211
--test_env=CONDA_SHLVL
214212
--test_env=CONDA_PREFIX
215213
--test_env=CONDA_DEFAULT_ENV
216-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
217214
python/ray/tests/...
218215

219216
- label: ":book: Doctest (CPU)"

.buildkite/pipeline.build_py37.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,5 +141,4 @@
141141
--test_env=DOCKER_TLS_VERIFY=1
142142
--test_env=DOCKER_CERT_PATH=/certs/client
143143
--test_env=DOCKER_TLS_CERTDIR=/certs
144-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
145144
$(cat test_shard.txt)

.buildkite/pipeline.build_redis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
- DL=1 ./ci/env/install-dependencies.sh
2323
- ./ci/env/env_info.sh
2424
- ./ci/ci.sh test_large --test_env=TEST_EXTERNAL_REDIS=1
25-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
2625

2726
- label: ":redis: (External Redis) (Medium A-J)"
2827
conditions: ["RAY_CI_PYTHON_AFFECTED"]

.buildkite/pipeline.gpu.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
- ./ci/env/install-dependencies.sh
4444
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
4545
- bazel test --config=ci $(./ci/run/bazel_export_options) --test_tag_filters=gpu
46-
--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0
4746
python/ray/serve/...
4847

4948
# Todo: enable once tests pass

ci/ray_ci/container.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ def _run_tests_in_docker(test_targets: List[str], team: str) -> subprocess.Popen
5959
)
6060
commands.append(
6161
"bazel test --config=ci "
62-
# TODO(matthewdeng): Remove this env var as part of #38570.
63-
"--test_env=RAY_AIR_NEW_PERSISTENCE_MODE=0 "
6462
"$(./ci/run/bazel_export_options) "
6563
f"{' '.join(test_targets)}",
6664
)

python/ray/serve/tests/test_air_integrations.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import tempfile
3-
from typing import List, Optional
3+
from typing import Any, Dict, List, Optional
44

55
import numpy as np
66
import pandas as pd
@@ -10,6 +10,7 @@
1010
from fastapi import Depends, FastAPI
1111

1212
import ray
13+
import ray.cloudpickle as ray_pickle
1314
from ray import serve
1415
from ray.train import Checkpoint
1516
from ray.serve.air_integrations import _BatchingManager
@@ -128,6 +129,22 @@ def test_unpack_dataframe(self, batched_df, expected):
128129
)
129130

130131

132+
def create_dict_checkpoint(
133+
data: Dict[str, Any], directory: Optional[str] = None
134+
) -> Checkpoint:
135+
if not directory:
136+
directory = tempfile.mkdtemp()
137+
with open(os.path.join(directory, "data.pkl"), "wb") as f:
138+
ray_pickle.dump(data, f)
139+
return Checkpoint.from_directory(directory)
140+
141+
142+
def load_dict_checkpoint(checkpoint: Checkpoint) -> Dict[str, Any]:
143+
with checkpoint.as_directory() as checkpoint_dir:
144+
with open(os.path.join(checkpoint_dir, "data.pkl"), "rb") as f:
145+
return ray_pickle.load(f)
146+
147+
131148
class AdderPredictor(Predictor):
132149
def __init__(self, increment: int, do_double: bool) -> None:
133150
self.increment = increment
@@ -137,7 +154,7 @@ def __init__(self, increment: int, do_double: bool) -> None:
137154
def from_checkpoint(
138155
cls, checkpoint: Checkpoint, do_double: bool = False
139156
) -> "AdderPredictor":
140-
return cls(checkpoint.to_dict()["increment"], do_double)
157+
return cls(load_dict_checkpoint(checkpoint)["increment"], do_double)
141158

142159
def predict(
143160
self, data: np.ndarray, override_increment: Optional[int] = None
@@ -170,7 +187,7 @@ async def __call__(self, request: Request):
170187
return self.predictor.predict(np.array(data["array"]))
171188

172189
AdderDeployment.options(name="Adder").deploy(
173-
checkpoint=Checkpoint.from_dict({"increment": 2}),
190+
checkpoint=create_dict_checkpoint({"increment": 2}),
174191
)
175192
resp = ray.get(send_request.remote(json={"array": [40]}))
176193
assert resp == [{"value": 42, "batch_size": 1}]
@@ -189,7 +206,7 @@ async def __call__(self, request: Request):
189206
)
190207

191208
AdderDeployment.options(name="Adder").deploy(
192-
checkpoint=Checkpoint.from_dict({"increment": 2}),
209+
checkpoint=create_dict_checkpoint({"increment": 2}),
193210
)
194211

195212
resp = ray.get(send_request.remote(json={"array": [40]}))
@@ -207,7 +224,7 @@ async def __call__(self, request: Request):
207224
return self.predictor.predict(np.array(data["array"]))
208225

209226
AdderDeployment.options(name="Adder").deploy(
210-
checkpoint=Checkpoint.from_dict({"increment": 2}),
227+
checkpoint=create_dict_checkpoint({"increment": 2}),
211228
)
212229
resp = ray.get(send_request.remote(json={"array": [40]}))
213230
assert resp == [{"value": 84, "batch_size": 1}]
@@ -226,7 +243,7 @@ async def __call__(self, requests: List[Request]):
226243
return self.predictor.predict(batch)
227244

228245
AdderDeployment.options(name="Adder").deploy(
229-
checkpoint=Checkpoint.from_dict({"increment": 2}),
246+
checkpoint=create_dict_checkpoint({"increment": 2}),
230247
)
231248

232249
refs = [send_request.remote(json={"array": [40]}) for _ in range(2)]
@@ -250,8 +267,7 @@ async def predict(self, data=Depends(json_to_ndarray)):
250267

251268
def test_air_integrations_in_pipeline(serve_instance):
252269
path = tempfile.mkdtemp()
253-
uri = f"file://{path}/test_uri"
254-
Checkpoint.from_dict({"increment": 2}).to_uri(uri)
270+
create_dict_checkpoint({"increment": 2}, path)
255271

256272
@serve.deployment
257273
class AdderDeployment:
@@ -263,7 +279,7 @@ async def __call__(self, data):
263279

264280
with InputNode() as dag_input:
265281
m1 = AdderDeployment.bind(
266-
checkpoint=Checkpoint.from_uri(uri),
282+
checkpoint=Checkpoint.from_directory(path),
267283
)
268284
dag = m1.__call__.bind(dag_input)
269285
deployments = build(Ingress.bind(dag), "")
@@ -278,8 +294,7 @@ async def __call__(self, data):
278294

279295
def test_air_integrations_reconfigure(serve_instance):
280296
path = tempfile.mkdtemp()
281-
uri = f"file://{path}/test_uri"
282-
Checkpoint.from_dict({"increment": 2}).to_uri(uri)
297+
create_dict_checkpoint({"increment": 2}, path)
283298

284299
@serve.deployment
285300
class AdderDeployment:
@@ -288,7 +303,7 @@ def __init__(self, checkpoint: Checkpoint):
288303

289304
def reconfigure(self, config):
290305
self.predictor = AdderPredictor.from_checkpoint(
291-
Checkpoint.from_dict(config["checkpoint"])
306+
create_dict_checkpoint(config["checkpoint"])
292307
)
293308

294309
async def __call__(self, data):
@@ -300,7 +315,7 @@ async def __call__(self, data):
300315

301316
with InputNode() as dag_input:
302317
m1 = AdderDeployment.options(user_config=additional_config).bind(
303-
checkpoint=Checkpoint.from_uri(uri),
318+
checkpoint=Checkpoint.from_directory(path),
304319
)
305320
dag = m1.__call__.bind(dag_input)
306321
deployments = build(Ingress.bind(dag), "")

python/ray/serve/tests/test_air_integrations_gpu.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,14 @@ def __init__(self, checkpoint):
3838
async def __call__(self, data):
3939
return self.predictor.predict(data)
4040

41+
import tempfile
42+
43+
tmpdir = tempfile.mkdtemp()
44+
checkpoint = Checkpoint.from_directory(tmpdir)
45+
4146
serve.run(
4247
DAGDriver.bind(
43-
DummyGPUDeployment.options(name="GPU").bind(Checkpoint.from_dict({"x": 1})),
48+
DummyGPUDeployment.options(name="GPU").bind(checkpoint),
4449
http_adapter=json_to_ndarray,
4550
)
4651
)

python/ray/tests/test_multi_node_3.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,14 +277,15 @@ def test_run_driver_twice(ray_start_regular):
277277
address_info = ray_start_regular
278278
driver_script = """
279279
import ray
280+
import ray.train
280281
import ray.tune as tune
281282
import os
282283
import time
283284
284-
def train_func(config, reporter): # add a reporter arg
285+
def train_func(config):
285286
for i in range(2):
286287
time.sleep(0.1)
287-
reporter(timesteps_total=i, mean_accuracy=i+97) # report metrics
288+
ray.train.report(dict(timesteps_total=i, mean_accuracy=i+97)) # report metrics
288289
289290
os.environ["TUNE_RESUME_PROMPT_OFF"] = "True"
290291
ray.init(address="{}", namespace="default_test_namespace")

python/ray/tests/test_task_events.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ def test_parent_task_id_tune_e2e(shutdown_only):
434434
script = """
435435
import numpy as np
436436
import ray
437+
import ray.train
437438
from ray import tune
438439
import time
439440
@@ -448,7 +449,7 @@ def train_function(config):
448449
for i in range(5):
449450
loss = config["mean"] * np.random.randn() + ray.get(
450451
train_step_1.remote())
451-
tune.report(loss=loss, nodes=ray.nodes())
452+
ray.train.report(dict(loss=loss, nodes=ray.nodes()))
452453
453454
454455
def tune_function():

python/ray/tests/test_usage_stats.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,9 +1206,7 @@ def run_usage_stats_server(reporter):
12061206
if os.environ.get("RAY_MINIMAL") != "1":
12071207
expected_payload["tune_scheduler"] = "FIFOScheduler"
12081208
expected_payload["tune_searcher"] = "BasicVariantGenerator"
1209-
expected_payload["air_storage_configuration"] = "driver"
12101209
expected_payload["air_entrypoint"] = "Tuner.fit"
1211-
expected_payload["air_env_vars"] = '["RAY_AIR_NEW_PERSISTENCE_MODE"]'
12121210
assert payload["extra_usage_tags"] == expected_payload
12131211
assert payload["total_num_nodes"] == 1
12141212
assert payload["total_num_running_jobs"] == 1

0 commit comments

Comments
 (0)