22import  importlib 
33import  inspect 
44import  re 
5- import  signal 
5+ from   argparse   import  Namespace 
66from  contextlib  import  asynccontextmanager 
77from  http  import  HTTPStatus 
88from  multiprocessing  import  Process 
99from  typing  import  AsyncIterator , Set 
1010
11- import  fastapi 
12- import  uvicorn 
13- from  fastapi  import  APIRouter , Request 
11+ from  fastapi  import  APIRouter , FastAPI , Request 
1412from  fastapi .exceptions  import  RequestValidationError 
1513from  fastapi .middleware .cors  import  CORSMiddleware 
1614from  fastapi .responses  import  JSONResponse , Response , StreamingResponse 
2220from  vllm .engine .arg_utils  import  AsyncEngineArgs 
2321from  vllm .engine .async_llm_engine  import  AsyncLLMEngine 
2422from  vllm .engine .protocol  import  AsyncEngineClient 
23+ from  vllm .entrypoints .launcher  import  serve_http 
2524from  vllm .entrypoints .logger  import  RequestLogger 
2625from  vllm .entrypoints .openai .cli_args  import  make_arg_parser 
2726# yapf conflicts with isort for this block 
@@ -71,7 +70,7 @@ def model_is_embedding(model_name: str) -> bool:
7170
7271
7372@asynccontextmanager  
74- async  def  lifespan (app : fastapi . FastAPI ):
73+ async  def  lifespan (app : FastAPI ):
7574
7675    async  def  _force_log ():
7776        while  True :
@@ -135,7 +134,7 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
135134router  =  APIRouter ()
136135
137136
138- def  mount_metrics (app : fastapi . FastAPI ):
137+ def  mount_metrics (app : FastAPI ):
139138    # Add prometheus asgi middleware to route /metrics requests 
140139    metrics_route  =  Mount ("/metrics" , make_asgi_app ())
141140    # Workaround for 307 Redirect for /metrics 
@@ -225,8 +224,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
225224        return  JSONResponse (content = generator .model_dump ())
226225
227226
228- def  build_app (args ) :
229-     app  =  fastapi . FastAPI (lifespan = lifespan )
227+ def  build_app (args :  Namespace )  ->   FastAPI :
228+     app  =  FastAPI (lifespan = lifespan )
230229    app .include_router (router )
231230    app .root_path  =  args .root_path 
232231
@@ -274,11 +273,10 @@ async def authentication(request: Request, call_next):
274273    return  app 
275274
276275
277- async  def  build_server (
276+ async  def  init_app (
278277    async_engine_client : AsyncEngineClient ,
279-     args ,
280-     ** uvicorn_kwargs ,
281- ) ->  uvicorn .Server :
278+     args : Namespace ,
279+ ) ->  FastAPI :
282280    app  =  build_app (args )
283281
284282    if  args .served_model_name  is  not None :
@@ -334,62 +332,31 @@ async def build_server(
334332    )
335333    app .root_path  =  args .root_path 
336334
337-     logger .info ("Available routes are:" )
338-     for  route  in  app .routes :
339-         if  not  hasattr (route , 'methods' ):
340-             continue 
341-         methods  =  ', ' .join (route .methods )
342-         logger .info ("Route: %s, Methods: %s" , route .path , methods )
343- 
344-     config  =  uvicorn .Config (
345-         app ,
346-         host = args .host ,
347-         port = args .port ,
348-         log_level = args .uvicorn_log_level ,
349-         timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
350-         ssl_keyfile = args .ssl_keyfile ,
351-         ssl_certfile = args .ssl_certfile ,
352-         ssl_ca_certs = args .ssl_ca_certs ,
353-         ssl_cert_reqs = args .ssl_cert_reqs ,
354-         ** uvicorn_kwargs ,
355-     )
356- 
357-     return  uvicorn .Server (config )
335+     return  app 
358336
359337
360338async  def  run_server (args , ** uvicorn_kwargs ) ->  None :
361339    logger .info ("vLLM API server version %s" , VLLM_VERSION )
362340    logger .info ("args: %s" , args )
363341
364-     shutdown_task  =  None 
365342    async  with  build_async_engine_client (args ) as  async_engine_client :
366- 
367-         server  =  await  build_server (
368-             async_engine_client ,
369-             args ,
343+         app  =  await  init_app (async_engine_client , args )
344+ 
345+         shutdown_task  =  await  serve_http (
346+             app ,
347+             host = args .host ,
348+             port = args .port ,
349+             log_level = args .uvicorn_log_level ,
350+             timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
351+             ssl_keyfile = args .ssl_keyfile ,
352+             ssl_certfile = args .ssl_certfile ,
353+             ssl_ca_certs = args .ssl_ca_certs ,
354+             ssl_cert_reqs = args .ssl_cert_reqs ,
370355            ** uvicorn_kwargs ,
371356        )
372357
373-         loop  =  asyncio .get_running_loop ()
374- 
375-         server_task  =  loop .create_task (server .serve ())
376- 
377-         def  signal_handler () ->  None :
378-             # prevents the uvicorn signal handler to exit early 
379-             server_task .cancel ()
380- 
381-         loop .add_signal_handler (signal .SIGINT , signal_handler )
382-         loop .add_signal_handler (signal .SIGTERM , signal_handler )
383- 
384-         try :
385-             await  server_task 
386-         except  asyncio .CancelledError :
387-             logger .info ("Gracefully stopping http server" )
388-             shutdown_task  =  server .shutdown ()
389- 
390-     if  shutdown_task :
391-         # NB: Await server shutdown only after the backend context is exited 
392-         await  shutdown_task 
358+     # NB: Await server shutdown only after the backend context is exited 
359+     await  shutdown_task 
393360
394361
395362if  __name__  ==  "__main__" :
@@ -399,4 +366,5 @@ def signal_handler() -> None:
399366        description = "vLLM OpenAI-Compatible RESTful API server." )
400367    parser  =  make_arg_parser (parser )
401368    args  =  parser .parse_args ()
369+ 
402370    asyncio .run (run_server (args ))
0 commit comments