Update server/text_generation_server/models/flash_causal_lm.py

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
huggingface · Oct 17, 2024 · 3e0a82d · 3e0a82d
1 parent 8d7448d
commit 3e0a82d
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
@@ -1922,7 +1922,7 @@ def generate_token(
             batch.adapter_meta.adapter_indices = next_adapter_indices
 
         if prefill and prefill_logprobs:
-            # Get prefill logprobs with inplace softmax (avoid copying the `out` tensor (max_batch_size * vocab_size))
+            # Get prefill logprobs with inplace softmax (avoid copying the `out` tensor (max_batch_prefill_tokens * vocab_size))
             torch.log_softmax(out, -1, out=out)
             prefill_logprobs_tensor = out
             prefill_logprobs = torch.gather(