[Bugfix] LoRA : Fix the order in which the kernels process LoRAs (vllm-project#16040)

varun-sundar-rabindranath · Varun Sundar Rabindranath · yangw-dev · commit e5b22045c58a · 2025-04-21T10:08:11.000-07:00
Signed-off-by: Varun Sundar Rabindranath &lt;varun@neuralmagic.com&gt;
Co-authored-by: Varun Sundar Rabindranath &lt;varun@neuralmagic.com&gt;
Signed-off-by: Yang Wang &lt;elainewy@meta.com&gt;
diff --git a/vllm/lora/ops/triton_ops/lora_kernel_metadata.py b/vllm/lora/ops/triton_ops/lora_kernel_metadata.py
@@ -111,7 +111,7 @@ def prepare_tensors(self, token_lora_mapping: torch.Tensor) -> None:
 
         # active_lora_ids, num_tokens_per_lora
         lora_ids, num_tokens_per_lora = torch.unique(token_lora_mapping,
-                                                     sorted=False,
+                                                     sorted=True,
                                                      return_counts=True)
         self.active_lora_ids[:lora_ids.size(0)].copy_(lora_ids,
                                                       non_blocking=True)