22
33import asyncio
44from dataclasses import dataclass
5- from typing import Dict , List , Optional
5+ from typing import Dict , List , Optional , Union
66
77from vllm .outputs import RequestOutput
88from vllm .sampling_params import RequestOutputKind
@@ -164,6 +164,7 @@ def process_outputs(
164164
165165 new_token_ids = engine_core_output .new_token_ids
166166 finish_reason = engine_core_output .finish_reason
167+ stop_reason = engine_core_output .stop_reason
167168
168169 # TODO(andy): prompt logprobs + chunked prefill can
169170 # result in engine core returning an output for a
@@ -181,9 +182,10 @@ def process_outputs(
181182
182183 # 2) Detokenize the token ids into text and check for stop
183184 # strings.
184- stop_reason = req_state .detokenizer .update (new_token_ids )
185- if stop_reason :
185+ stop_string = req_state .detokenizer .update (new_token_ids )
186+ if stop_string and finish_reason != FinishReason . STOP :
186187 finish_reason = FinishReason .STOP
188+ stop_reason = stop_string
187189
188190 # 3) Compute sample and prompt logprobs for request,
189191 # if required.
@@ -250,7 +252,7 @@ def _make_request_output(
250252 request_state : RequestState ,
251253 new_token_ids : List [int ],
252254 finish_reason : Optional [FinishReason ],
253- stop_reason : Optional [ str ],
255+ stop_reason : Union [ int , str , None ],
254256 ) -> Optional [RequestOutput ]:
255257
256258 finished = finish_reason is not None
0 commit comments