[Minor] Small fix to make distributed init logic in worker looks cleaner

vllm-project · zhuohan123 · Feb 18, 2024 · Feb 18, 2024 · Feb 18, 2024 · a1700ca3e1a0621a4798b9d9b361cc3d0cae1e71
commit a1700ca3e1a0621a4798b9d9b361cc3d0cae1e71
@@ -93,8 +93,6 @@ def init_model(self, cupy_port: Optional[int] = None) -> None:
         # Initialize the distributed environment.
         init_distributed_environment(self.parallel_config, self.rank,
                                      cupy_port, self.distributed_init_method)
-        if not self.parallel_config.disable_custom_all_reduce:
-            init_custom_ar()
         # Initialize the model.
         set_random_seed(self.model_config.seed)
 
@@ -288,6 +286,10 @@ def init_distributed_environment(
     ensure_model_parallel_initialized(parallel_config.tensor_parallel_size,
                                       parallel_config.pipeline_parallel_size)
 
+    # Initialize a custom fast all-reduce implementation.
+    if not parallel_config.disable_custom_all_reduce:
+        init_custom_ar()
+
 
 def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
     # Check if the GPU supports the dtype.