diff --git a/vllm/config.py b/vllm/config.py index c35b6302b2cfa..27c61d4d50439 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -388,16 +388,26 @@ def _verify_args(self) -> None: if self.pipeline_parallel_size > 1: raise NotImplementedError( "Pipeline parallelism is not supported yet.") - if is_hip(): + if not self.disable_custom_all_reduce and self.world_size > 1: + if is_hip(): + self.disable_custom_all_reduce = True + logger.info( + "Disabled the custom all-reduce kernel because it is not " + "supported on AMD GPUs.") + elif self.pipeline_parallel_size > 1: + self.disable_custom_all_reduce = True + logger.info( + "Disabled the custom all-reduce kernel because it is not " + "supported with pipeline parallelism.") + + # FIXME(woosuk): Fix the stability issues and re-enable the custom + # all-reduce kernel. + if not self.disable_custom_all_reduce and self.world_size > 1: self.disable_custom_all_reduce = True logger.info( - "Disabled the custom all-reduce kernel because it is not " - "supported on AMD GPUs.") - elif self.pipeline_parallel_size > 1: - self.disable_custom_all_reduce = True - logger.info( - "Disabled the custom all-reduce kernel because it is not " - "supported with pipeline parallelism.") + "Custom all-reduce kernels are temporarily disabled due to " + "stability issues. We will re-enable them once the issues are " + "resolved.") class SchedulerConfig: