From 537c9755a736b4e206107a99e1c8961448a3d63b Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Sun, 18 Feb 2024 14:39:00 -0800 Subject: [PATCH] [Minor] Small fix to make distributed init logic in worker looks cleaner (#2905) --- vllm/worker/worker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 29e4b16fe2594..9df518d155ec2 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -93,8 +93,6 @@ def init_model(self, cupy_port: Optional[int] = None) -> None: # Initialize the distributed environment. init_distributed_environment(self.parallel_config, self.rank, cupy_port, self.distributed_init_method) - if not self.parallel_config.disable_custom_all_reduce: - init_custom_ar() # Initialize the model. set_random_seed(self.model_config.seed) @@ -288,6 +286,10 @@ def init_distributed_environment( ensure_model_parallel_initialized(parallel_config.tensor_parallel_size, parallel_config.pipeline_parallel_size) + # Initialize a custom fast all-reduce implementation. + if not parallel_config.disable_custom_all_reduce: + init_custom_ar() + def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype): # Check if the GPU supports the dtype.