Skip to content

Commit 0229c38

Browse files
FlorianJoncourFlorianJoncour
andauthored
Better integration with Ray Serve (#1821)
Co-authored-by: FlorianJoncour <florian@zetta-sys.com>
1 parent a7b3e33 commit 0229c38

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

vllm/engine/llm_engine.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
SchedulerConfig)
88
from vllm.core.scheduler import Scheduler, SchedulerOutputs
99
from vllm.engine.arg_utils import EngineArgs
10-
from vllm.engine.ray_utils import RayWorker, initialize_cluster, ray
10+
from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
1111
from vllm.logger import init_logger
1212
from vllm.outputs import RequestOutput
1313
from vllm.sampling_params import SamplingParams
@@ -162,12 +162,12 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
162162
continue
163163
worker = ray.remote(
164164
num_cpus=0,
165-
num_gpus=1,
165+
num_gpus=self.cache_config.gpu_memory_utilization,
166166
scheduling_strategy=PlacementGroupSchedulingStrategy(
167167
placement_group=placement_group,
168168
placement_group_capture_child_tasks=True),
169169
**ray_remote_kwargs,
170-
)(RayWorker).remote(self.model_config.trust_remote_code)
170+
)(RayWorkerVllm).remote(self.model_config.trust_remote_code)
171171
self.workers.append(worker)
172172

173173
# Initialize torch distributed process group for the workers.

vllm/engine/ray_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import ray
1111
from ray.air.util.torch_dist import TorchDistributedWorker
1212

13-
class RayWorker(TorchDistributedWorker):
13+
class RayWorkerVllm(TorchDistributedWorker):
1414
"""Ray wrapper for vllm.worker.Worker, allowing Worker to be
1515
lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
1616

@@ -36,7 +36,7 @@ def execute_method(self, method, *args, **kwargs):
3636
"`pip install ray pandas pyarrow`.")
3737
ray = None
3838
TorchDistributedWorker = None
39-
RayWorker = None
39+
RayWorkerVllm = None
4040

4141
if TYPE_CHECKING:
4242
from ray.util.placement_group import PlacementGroup

0 commit comments

Comments
 (0)