We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ff30c1b commit 80ecbbfCopy full SHA for 80ecbbf
vllm/v1/attention/backends/gdn_attn.py
@@ -229,7 +229,7 @@ def build( # type: ignore[override]
229
spec_token_masks = torch.repeat_interleave(
230
spec_sequence_masks, query_lens
231
)
232
- indx = torch.argsort(spec_token_masks)
+ indx = torch.argsort(spec_token_masks, stable=True)
233
num_non_spec_tokens = num_prefill_tokens + num_decode_tokens
234
non_spec_token_indx = indx[:num_non_spec_tokens]
235
spec_token_indx = indx[num_non_spec_tokens:]
0 commit comments