From c21d8b875cc127652efb46b9539643574b51eb81 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 19 Jun 2024 01:05:00 -0700 Subject: [PATCH] [misc][distributed] use 127.0.0.1 for single-node (#5619) --- vllm/executor/multiproc_gpu_executor.py | 7 +++++-- vllm/executor/ray_gpu_executor.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/vllm/executor/multiproc_gpu_executor.py b/vllm/executor/multiproc_gpu_executor.py index 8385e56f88b39..e63e5a3a027fa 100644 --- a/vllm/executor/multiproc_gpu_executor.py +++ b/vllm/executor/multiproc_gpu_executor.py @@ -10,7 +10,7 @@ from vllm.logger import init_logger from vllm.sequence import ExecuteModelRequest, SamplerOutput from vllm.utils import (cuda_device_count_stateless, - get_distributed_init_method, get_ip, get_open_port, + get_distributed_init_method, get_open_port, get_vllm_instance_id, make_async) logger = init_logger(__name__) @@ -37,8 +37,11 @@ def _init_executor(self) -> None: assert world_size <= cuda_device_count_stateless(), ( "please set tensor_parallel_size to less than max local gpu count") + # Multiprocessing-based executor does not support multi-node setting. + # Since it only works for single node, we can use the loopback address + # 127.0.0.1 for communication. distributed_init_method = get_distributed_init_method( - get_ip(), get_open_port()) + "127.0.0.1", get_open_port()) if world_size == 1: self.workers = [] diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index 843332e5ea0c8..fc83c552888a6 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -161,6 +161,16 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", self._run_workers("update_environment_variables", all_args=all_args_to_update_environment_variables) + if len(node_gpus) == 1: + # in single node case, we don't need to get the IP address. + # the loopback address is sufficient + # NOTE: a node may have several IP addresses, one for each + # network interface. `get_ip()` might return any of them, + # while they might not work for communication inside the node + # if the network setup is complicated. Using the loopback address + # solves this issue, as it always works for communication inside + # the node. + driver_ip = "127.0.0.1" distributed_init_method = get_distributed_init_method( driver_ip, get_open_port())