22import  importlib 
33import  inspect 
44import  re 
5- import  signal 
5+ from   argparse   import  Namespace 
66from  contextlib  import  asynccontextmanager 
77from  http  import  HTTPStatus 
8- from  typing  import  Optional , Set 
8+ from  typing  import  Any ,  Optional , Set 
99
10- import  fastapi 
11- import  uvicorn 
12- from  fastapi  import  APIRouter , Request 
10+ from  fastapi  import  APIRouter , FastAPI , Request 
1311from  fastapi .exceptions  import  RequestValidationError 
1412from  fastapi .middleware .cors  import  CORSMiddleware 
1513from  fastapi .responses  import  JSONResponse , Response , StreamingResponse 
3836from  vllm .entrypoints .openai .serving_tokenization  import  (
3937    OpenAIServingTokenization )
4038from  vllm .logger  import  init_logger 
39+ from  vllm .server  import  serve_http 
4140from  vllm .usage .usage_lib  import  UsageContext 
4241from  vllm .utils  import  FlexibleArgumentParser 
4342from  vllm .version  import  __version__  as  VLLM_VERSION 
5756
5857
5958@asynccontextmanager  
60- async  def  lifespan (app : fastapi . FastAPI ):
59+ async  def  lifespan (app : FastAPI ):
6160
6261    async  def  _force_log ():
6362        while  True :
@@ -75,7 +74,7 @@ async def _force_log():
7574router  =  APIRouter ()
7675
7776
78- def  mount_metrics (app : fastapi . FastAPI ):
77+ def  mount_metrics (app : FastAPI ):
7978    # Add prometheus asgi middleware to route /metrics requests 
8079    metrics_route  =  Mount ("/metrics" , make_asgi_app ())
8180    # Workaround for 307 Redirect for /metrics 
@@ -165,8 +164,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
165164        return  JSONResponse (content = generator .model_dump ())
166165
167166
168- def  build_app (args ) :
169-     app  =  fastapi . FastAPI (lifespan = lifespan )
167+ def  build_app (args :  Namespace )  ->   FastAPI :
168+     app  =  FastAPI (lifespan = lifespan )
170169    app .include_router (router )
171170    app .root_path  =  args .root_path 
172171
@@ -214,11 +213,8 @@ async def authentication(request: Request, call_next):
214213    return  app 
215214
216215
217- async  def  build_server (
218-     args ,
219-     llm_engine : Optional [AsyncLLMEngine ] =  None ,
220-     ** uvicorn_kwargs ,
221- ) ->  uvicorn .Server :
216+ async  def  init_app (args : Namespace ,
217+                    llm_engine : Optional [AsyncLLMEngine ] =  None ) ->  FastAPI :
222218    app  =  build_app (args )
223219
224220    if  args .served_model_name  is  not None :
@@ -281,14 +277,17 @@ async def build_server(
281277    )
282278    app .root_path  =  args .root_path 
283279
284-     logger .info ("Available routes are:" )
285-     for  route  in  app .routes :
286-         if  not  hasattr (route , 'methods' ):
287-             continue 
288-         methods  =  ', ' .join (route .methods )
289-         logger .info ("Route: %s, Methods: %s" , route .path , methods )
280+     return  app 
281+ 
282+ 
283+ async  def  run_server (args : Namespace ,
284+                      llm_engine : Optional [AsyncLLMEngine ] =  None ,
285+                      ** uvicorn_kwargs : Any ) ->  None :
286+     logger .info ("vLLM API server version %s" , VLLM_VERSION )
287+     logger .info ("args: %s" , args )
290288
291-     config  =  uvicorn .Config (
289+     app  =  await  init_app (args , llm_engine )
290+     await  serve_http (
292291        app ,
293292        host = args .host ,
294293        port = args .port ,
@@ -301,36 +300,6 @@ async def build_server(
301300        ** uvicorn_kwargs ,
302301    )
303302
304-     return  uvicorn .Server (config )
305- 
306- 
307- async  def  run_server (args , llm_engine = None , ** uvicorn_kwargs ) ->  None :
308-     logger .info ("vLLM API server version %s" , VLLM_VERSION )
309-     logger .info ("args: %s" , args )
310- 
311-     server  =  await  build_server (
312-         args ,
313-         llm_engine ,
314-         ** uvicorn_kwargs ,
315-     )
316- 
317-     loop  =  asyncio .get_running_loop ()
318- 
319-     server_task  =  loop .create_task (server .serve ())
320- 
321-     def  signal_handler () ->  None :
322-         # prevents the uvicorn signal handler to exit early 
323-         server_task .cancel ()
324- 
325-     loop .add_signal_handler (signal .SIGINT , signal_handler )
326-     loop .add_signal_handler (signal .SIGTERM , signal_handler )
327- 
328-     try :
329-         await  server_task 
330-     except  asyncio .CancelledError :
331-         print ("Gracefully stopping http server" )
332-         await  server .shutdown ()
333- 
334303
335304if  __name__  ==  "__main__" :
336305    # NOTE(simon): 
@@ -339,4 +308,5 @@ def signal_handler() -> None:
339308        description = "vLLM OpenAI-Compatible RESTful API server." )
340309    parser  =  make_arg_parser (parser )
341310    args  =  parser .parse_args ()
311+ 
342312    asyncio .run (run_server (args ))
0 commit comments