Skip to content

Commit 22f88f2

Browse files
WoosukKwonjimpang
authored andcommitted
Disable custom all reduce by default (vllm-project#2808)
1 parent e61f4d2 commit 22f88f2

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

vllm/config.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,16 +388,26 @@ def _verify_args(self) -> None:
388388
if self.pipeline_parallel_size > 1:
389389
raise NotImplementedError(
390390
"Pipeline parallelism is not supported yet.")
391-
if is_hip():
391+
if not self.disable_custom_all_reduce and self.world_size > 1:
392+
if is_hip():
393+
self.disable_custom_all_reduce = True
394+
logger.info(
395+
"Disabled the custom all-reduce kernel because it is not "
396+
"supported on AMD GPUs.")
397+
elif self.pipeline_parallel_size > 1:
398+
self.disable_custom_all_reduce = True
399+
logger.info(
400+
"Disabled the custom all-reduce kernel because it is not "
401+
"supported with pipeline parallelism.")
402+
403+
# FIXME(woosuk): Fix the stability issues and re-enable the custom
404+
# all-reduce kernel.
405+
if not self.disable_custom_all_reduce and self.world_size > 1:
392406
self.disable_custom_all_reduce = True
393407
logger.info(
394-
"Disabled the custom all-reduce kernel because it is not "
395-
"supported on AMD GPUs.")
396-
elif self.pipeline_parallel_size > 1:
397-
self.disable_custom_all_reduce = True
398-
logger.info(
399-
"Disabled the custom all-reduce kernel because it is not "
400-
"supported with pipeline parallelism.")
408+
"Custom all-reduce kernels are temporarily disabled due to "
409+
"stability issues. We will re-enable them once the issues are "
410+
"resolved.")
401411

402412

403413
class SchedulerConfig:

0 commit comments

Comments
 (0)