vllm-project · DarkLight1337 · Jun 3, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
@@ -83,6 +83,10 @@ def __init__(self, cpu_group):
         assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."  # noqa
         super().__init__(cpu_group)
 
+        # TODO(tms): Disable pplx-a2a intranode as it fails with the error:
+        # failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
+        self.internode = True
+
         if self.internode:
             # inter-node communication needs nvshmem,
             # intra-node communication uses p2p mapping directly

@@ -269,9 +269,13 @@ def init_prepare_finalize(self, moe: MoEConfig,
                 hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
                     (moe.hidden_dim + moe.block_size - 1) // moe.block_size *
                     torch.float32.itemsize)),
-                group_name=all2all_manager.cpu_group.group_name,
             )
 
+            # Intranode pplx a2a takes a group name while internode does not.
+            if not all2all_manager.internode:
+                all_to_all_args[
+                    "group_name"] = all2all_manager.cpu_group.group_name
+
             handle = all2all_manager.get_handle(all_to_all_args)
 
             prepare_finalize = PplxPrepareAndFinalize(