Skip to content

Commit 5a6c81b

Browse files
authored
Remove eos tokens from output by default (#2611)
1 parent 51cd22c commit 5a6c81b

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

vllm/engine/llm_engine.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -908,13 +908,13 @@ def _check_stop(self, seq: Sequence,
908908
"""Stop the finished sequences."""
909909
for stop_str in sampling_params.stop:
910910
if seq.output_text.endswith(stop_str):
911-
if not sampling_params.include_stop_str_in_output:
912-
# Truncate the output text so that the stop string is
913-
# not included in the output.
914-
seq.output_text = seq.output_text[:-len(stop_str)]
911+
self._finalize_sequence(seq, sampling_params, stop_str)
915912
seq.status = SequenceStatus.FINISHED_STOPPED
916913
return
917914
if seq.get_last_token_id() in sampling_params.stop_token_ids:
915+
stop_str = self.get_tokenizer_for_seq(seq).convert_ids_to_tokens(
916+
seq.get_last_token_id())
917+
self._finalize_sequence(seq, sampling_params, stop_str)
918918
seq.status = SequenceStatus.FINISHED_STOPPED
919919
return
920920

@@ -934,6 +934,14 @@ def _check_stop(self, seq: Sequence,
934934
seq.status = SequenceStatus.FINISHED_STOPPED
935935
return
936936

937+
def _finalize_sequence(self, seq: Sequence,
938+
sampling_params: SamplingParams,
939+
stop_string: str) -> None:
940+
if not sampling_params.include_stop_str_in_output and stop_string:
941+
# Truncate the output text so that the stop string is
942+
# not included in the output.
943+
seq.output_text = seq.output_text[:-len(stop_string)]
944+
937945
def add_lora(self, lora_request: LoRARequest) -> bool:
938946
assert lora_request.lora_int_id > 0, "lora_id must be greater than 0."
939947
return self._run_workers(

0 commit comments

Comments
 (0)