Skip to content

Commit 0661cfe

Browse files
hijkzzzyoukaichao
andauthored
Fix v1 supported oracle for worker-cls and worker-extension-cls (#15324)
Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
1 parent a827aa8 commit 0661cfe

File tree

3 files changed

+7
-12
lines changed

3 files changed

+7
-12
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ steps:
148148
# TODO: create a dedicated test section for multi-GPU example tests
149149
# when we have multiple distributed example tests
150150
- pushd ../examples/offline_inference
151-
- python3 rlhf.py
152-
- RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
151+
- VLLM_ENABLE_V1_MULTIPROCESSING=0 python3 rlhf.py
152+
- VLLM_ENABLE_V1_MULTIPROCESSING=0 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
153153
- popd
154154

155155
- label: Metrics, Tracing Test # 10min

vllm/engine/arg_utils.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,16 +1459,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
14591459
recommend_to_remove=False)
14601460
return False
14611461

1462-
if self.worker_cls != EngineArgs.worker_cls:
1463-
_raise_or_fallback(feature_name="--worker-cls",
1464-
recommend_to_remove=False)
1465-
return False
1466-
1467-
if self.worker_extension_cls != EngineArgs.worker_extension_cls:
1468-
_raise_or_fallback(feature_name="--worker-extension-cls",
1469-
recommend_to_remove=False)
1470-
return False
1471-
14721462
if self.num_scheduler_steps != EngineArgs.num_scheduler_steps:
14731463
_raise_or_fallback(feature_name="--num-scheduler-steps",
14741464
recommend_to_remove=True)

vllm/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,6 +2170,11 @@ def _maybe_force_spawn():
21702170
if cuda_is_initialized():
21712171
reason = "CUDA is initialized"
21722172
elif is_in_ray_actor():
2173+
# even if we choose to spawn, we need to pass the ray address
2174+
# to the subprocess so that it knows how to connect to the ray cluster.
2175+
# env vars are inherited by subprocesses, even if we use spawn.
2176+
import ray
2177+
os.environ["RAY_ADDRESS"] = ray.get_runtime_context().gcs_address
21732178
reason = "In a Ray actor and can only be spawned"
21742179

21752180
if reason is not None:

0 commit comments

Comments
 (0)