add one more condition

vllm-project · youkaichao · Sep 17, 2024 · Sep 13, 2024 · Sep 16, 2024 · Sep 16, 2024
commit 7da3dc87264bfbde6c9c9eabdd1aac9c12fb8046
diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py
@@ -230,6 +230,8 @@ def register_graph_buffers(self):
         ops.register_graph_buffers(self._ptr, handles, offsets)
 
     def should_custom_ar(self, inp: torch.Tensor):
+        if self.disabled:
+            return False
         inp_size = inp.numel() * inp.element_size()
         # custom allreduce requires input byte size to be multiples of 16
         if inp_size % 16 != 0: