vllm-project · zhuohan123 · Nov 29, 2023 · Nov 28, 2023 · Nov 28, 2023
@@ -7,7 +7,7 @@
                          SchedulerConfig)
 from vllm.core.scheduler import Scheduler, SchedulerOutputs
 from vllm.engine.arg_utils import EngineArgs
-from vllm.engine.ray_utils import RayWorker, initialize_cluster, ray
+from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -162,12 +162,12 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
                 continue
             worker = ray.remote(
                 num_cpus=0,
-                num_gpus=1,
+                num_gpus=self.cache_config.gpu_memory_utilization,
                 scheduling_strategy=PlacementGroupSchedulingStrategy(
                     placement_group=placement_group,
                     placement_group_capture_child_tasks=True),
                 **ray_remote_kwargs,
-            )(RayWorker).remote(self.model_config.trust_remote_code)
+            )(RayWorkerVllm).remote(self.model_config.trust_remote_code)
             self.workers.append(worker)
 
         # Initialize torch distributed process group for the workers.

diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py
@@ -10,7 +10,7 @@
     import ray
     from ray.air.util.torch_dist import TorchDistributedWorker
 
-    class RayWorker(TorchDistributedWorker):
+    class RayWorkerVllm(TorchDistributedWorker):
         """Ray wrapper for vllm.worker.Worker, allowing Worker to be
         lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
 
@@ -36,7 +36,7 @@ def execute_method(self, method, *args, **kwargs):
                    "`pip install ray pandas pyarrow`.")
     ray = None
     TorchDistributedWorker = None
-    RayWorker = None
+    RayWorkerVllm = None
 
 if TYPE_CHECKING:
     from ray.util.placement_group import PlacementGroup