Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better integration with Ray Serve #1821

Merged
merged 2 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
SchedulerConfig)
from vllm.core.scheduler import Scheduler, SchedulerOutputs
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.ray_utils import RayWorker, initialize_cluster, ray
from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
from vllm.logger import init_logger
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
Expand Down Expand Up @@ -162,12 +162,12 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
continue
worker = ray.remote(
num_cpus=0,
num_gpus=1,
num_gpus=self.cache_config.gpu_memory_utilization,
scheduling_strategy=PlacementGroupSchedulingStrategy(
placement_group=placement_group,
placement_group_capture_child_tasks=True),
**ray_remote_kwargs,
)(RayWorker).remote(self.model_config.trust_remote_code)
)(RayWorkerVllm).remote(self.model_config.trust_remote_code)
self.workers.append(worker)

# Initialize torch distributed process group for the workers.
Expand Down
4 changes: 2 additions & 2 deletions vllm/engine/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import ray
from ray.air.util.torch_dist import TorchDistributedWorker

class RayWorker(TorchDistributedWorker):
class RayWorkerVllm(TorchDistributedWorker):
"""Ray wrapper for vllm.worker.Worker, allowing Worker to be
lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""

Expand All @@ -36,7 +36,7 @@ def execute_method(self, method, *args, **kwargs):
"`pip install ray pandas pyarrow`.")
ray = None
TorchDistributedWorker = None
RayWorker = None
RayWorkerVllm = None

if TYPE_CHECKING:
from ray.util.placement_group import PlacementGroup
Expand Down
Loading