1
1
# SPDX-License-Identifier: Apache-2.0
2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+ """
4
+ This script demonstrates how to use the vLLM API server to perform audio
5
+ transcription with the `openai/whisper-large-v3` model.
6
+
7
+ Before running this script, you must start the vLLM server with the following command:
8
+
9
+ vllm serve openai/whisper-large-v3
10
+
11
+ Requirements:
12
+ - vLLM with audio support
13
+ - openai Python SDK
14
+ - httpx for streaming support
15
+
16
+ The script performs:
17
+ 1. Synchronous transcription using OpenAI-compatible API.
18
+ 2. Streaming transcription using raw HTTP request to the vLLM server.
19
+ """
20
+
3
21
import asyncio
4
22
import json
5
23
21
39
22
40
23
41
def sync_openai ():
42
+ """
43
+ Perform synchronous transcription using OpenAI-compatible API.
44
+ """
24
45
with open (str (mary_had_lamb ), "rb" ) as f :
25
46
transcription = client .audio .transcriptions .create (
26
47
file = f ,
@@ -37,11 +58,11 @@ def sync_openai():
37
58
print ("transcription result:" , transcription .text )
38
59
39
60
40
- sync_openai ()
41
-
42
-
43
61
# OpenAI Transcription API client does not support streaming.
44
62
async def stream_openai_response ():
63
+ """
64
+ Perform streaming transcription using vLLM's raw HTTP streaming API.
65
+ """
45
66
data = {
46
67
"language" : "en" ,
47
68
"stream" : True ,
@@ -68,7 +89,15 @@ async def stream_openai_response():
68
89
# Extract and print the content
69
90
content = chunk ["choices" ][0 ].get ("delta" , {}).get ("content" )
70
91
print (content , end = "" )
92
+ print () # Final newline after stream ends
93
+
94
+
95
+ def main ():
96
+ sync_openai ()
97
+
98
+ # Run the asynchronous function
99
+ asyncio .run (stream_openai_response ())
71
100
72
101
73
- # Run the asynchronous function
74
- asyncio . run ( stream_openai_response () )
102
+ if __name__ == "__main__" :
103
+ main ( )
0 commit comments