Skip to content

Commit 96ee352

Browse files
committed
remove first empty chunk from stream response
Signed-off-by: NickLucche <nlucches@redhat.com>
1 parent 121b226 commit 96ee352

File tree

1 file changed

+1
-23
lines changed

1 file changed

+1
-23
lines changed

vllm/entrypoints/openai/serving_transcription.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,6 @@ async def transcription_stream_generator(
325325
created_time = int(time.time())
326326
model_name = request.model
327327
chunk_object_type: Final = "transcription.chunk"
328-
first_iteration = True
329328

330329
completion_tokens = 0
331330
num_prompt_tokens = 0
@@ -352,30 +351,9 @@ async def transcription_stream_generator(
352351
# We need to do it here, because if there are exceptions in
353352
# the result_generator, it needs to be sent as the FIRST
354353
# response (by the try...catch).
355-
if first_iteration:
356-
# Fist delta message.
357-
choice_data = TranscriptionResponseStreamChoice(
358-
delta=DeltaMessage(content="", ), finish_reason=None)
359-
chunk = TranscriptionStreamResponse(
360-
id=request_id,
361-
object=chunk_object_type,
362-
created=created_time,
363-
choices=[choice_data],
364-
model=model_name)
365-
366-
# if continuous usage stats are requested, add it
367-
if include_continuous_usage:
368-
chunk.usage = UsageInfo(
369-
prompt_tokens=num_prompt_tokens,
370-
completion_tokens=0,
371-
total_tokens=num_prompt_tokens)
372-
373-
data = chunk.model_dump_json(exclude_unset=True)
374-
yield f"data: {data}\n\n"
375-
376-
first_iteration = False
377354

378355
# Just one output (n=1) supported.
356+
assert len(res.outputs) == 1
379357
output = res.outputs[0]
380358

381359
delta_message = DeltaMessage(content=output.text)

0 commit comments

Comments
 (0)