22import  importlib 
33import  inspect 
44import  re 
5- from   argparse   import  Namespace 
5+ import  signal 
66from  contextlib  import  asynccontextmanager 
77from  http  import  HTTPStatus 
8- from  typing  import  Any ,  Optional , Set 
8+ from  typing  import  Optional , Set 
99
10- from  fastapi  import  APIRouter , FastAPI , Request 
10+ import  fastapi 
11+ import  uvicorn 
12+ from  fastapi  import  APIRouter , Request 
1113from  fastapi .exceptions  import  RequestValidationError 
1214from  fastapi .middleware .cors  import  CORSMiddleware 
1315from  fastapi .responses  import  JSONResponse , Response , StreamingResponse 
3638from  vllm .entrypoints .openai .serving_tokenization  import  (
3739    OpenAIServingTokenization )
3840from  vllm .logger  import  init_logger 
39- from  vllm .server  import  serve_http 
4041from  vllm .usage .usage_lib  import  UsageContext 
4142from  vllm .utils  import  FlexibleArgumentParser 
4243from  vllm .version  import  __version__  as  VLLM_VERSION 
5657
5758
5859@asynccontextmanager  
59- async  def  lifespan (app : FastAPI ):
60+ async  def  lifespan (app : fastapi . FastAPI ):
6061
6162    async  def  _force_log ():
6263        while  True :
@@ -74,7 +75,7 @@ async def _force_log():
7475router  =  APIRouter ()
7576
7677
77- def  mount_metrics (app : FastAPI ):
78+ def  mount_metrics (app : fastapi . FastAPI ):
7879    # Add prometheus asgi middleware to route /metrics requests 
7980    metrics_route  =  Mount ("/metrics" , make_asgi_app ())
8081    # Workaround for 307 Redirect for /metrics 
@@ -164,8 +165,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
164165        return  JSONResponse (content = generator .model_dump ())
165166
166167
167- def  build_app (args :  Namespace )  ->   FastAPI :
168-     app  =  FastAPI (lifespan = lifespan )
168+ def  build_app (args ) :
169+     app  =  fastapi . FastAPI (lifespan = lifespan )
169170    app .include_router (router )
170171    app .root_path  =  args .root_path 
171172
@@ -213,8 +214,11 @@ async def authentication(request: Request, call_next):
213214    return  app 
214215
215216
216- async  def  init_app (args : Namespace ,
217-                    llm_engine : Optional [AsyncLLMEngine ] =  None ) ->  FastAPI :
217+ async  def  build_server (
218+     args ,
219+     llm_engine : Optional [AsyncLLMEngine ] =  None ,
220+     ** uvicorn_kwargs ,
221+ ) ->  uvicorn .Server :
218222    app  =  build_app (args )
219223
220224    if  args .served_model_name  is  not None :
@@ -277,17 +281,14 @@ async def init_app(args: Namespace,
277281    )
278282    app .root_path  =  args .root_path 
279283
280-     return  app 
281- 
282- 
283- async  def  run_server (args : Namespace ,
284-                      llm_engine : Optional [AsyncLLMEngine ] =  None ,
285-                      ** uvicorn_kwargs : Any ) ->  None :
286-     logger .info ("vLLM API server version %s" , VLLM_VERSION )
287-     logger .info ("args: %s" , args )
284+     logger .info ("Available routes are:" )
285+     for  route  in  app .routes :
286+         if  not  hasattr (route , 'methods' ):
287+             continue 
288+         methods  =  ', ' .join (route .methods )
289+         logger .info ("Route: %s, Methods: %s" , route .path , methods )
288290
289-     app  =  await  init_app (args , llm_engine )
290-     await  serve_http (
291+     config  =  uvicorn .Config (
291292        app ,
292293        host = args .host ,
293294        port = args .port ,
@@ -300,6 +301,36 @@ async def run_server(args: Namespace,
300301        ** uvicorn_kwargs ,
301302    )
302303
304+     return  uvicorn .Server (config )
305+ 
306+ 
307+ async  def  run_server (args , llm_engine = None , ** uvicorn_kwargs ) ->  None :
308+     logger .info ("vLLM API server version %s" , VLLM_VERSION )
309+     logger .info ("args: %s" , args )
310+ 
311+     server  =  await  build_server (
312+         args ,
313+         llm_engine ,
314+         ** uvicorn_kwargs ,
315+     )
316+ 
317+     loop  =  asyncio .get_running_loop ()
318+ 
319+     server_task  =  loop .create_task (server .serve ())
320+ 
321+     def  signal_handler () ->  None :
322+         # prevents the uvicorn signal handler to exit early 
323+         server_task .cancel ()
324+ 
325+     loop .add_signal_handler (signal .SIGINT , signal_handler )
326+     loop .add_signal_handler (signal .SIGTERM , signal_handler )
327+ 
328+     try :
329+         await  server_task 
330+     except  asyncio .CancelledError :
331+         print ("Gracefully stopping http server" )
332+         await  server .shutdown ()
333+ 
303334
304335if  __name__  ==  "__main__" :
305336    # NOTE(simon): 
@@ -308,5 +339,4 @@ async def run_server(args: Namespace,
308339        description = "vLLM OpenAI-Compatible RESTful API server." )
309340    parser  =  make_arg_parser (parser )
310341    args  =  parser .parse_args ()
311- 
312342    asyncio .run (run_server (args ))
0 commit comments