Skip to content

Commit a4a6776

Browse files
DarkLight1337Alvant
authored andcommitted
[Frontend] Factor out code for running uvicorn (vllm-project#6828)
Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent 7bd7dae commit a4a6776

File tree

4 files changed

+116
-75
lines changed

4 files changed

+116
-75
lines changed

vllm/entrypoints/api_server.py

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,23 @@
55
We are also not going to accept PRs modifying this file, please
66
change `vllm/entrypoints/openai/api_server.py` instead.
77
"""
8-
8+
import asyncio
99
import json
1010
import ssl
11-
from typing import AsyncGenerator
11+
from argparse import Namespace
12+
from typing import Any, AsyncGenerator, Optional
1213

13-
import uvicorn
1414
from fastapi import FastAPI, Request
1515
from fastapi.responses import JSONResponse, Response, StreamingResponse
1616

1717
from vllm.engine.arg_utils import AsyncEngineArgs
1818
from vllm.engine.async_llm_engine import AsyncLLMEngine
1919
from vllm.logger import init_logger
2020
from vllm.sampling_params import SamplingParams
21+
from vllm.server import serve_http
2122
from vllm.usage.usage_lib import UsageContext
2223
from vllm.utils import FlexibleArgumentParser, random_uuid
24+
from vllm.version import __version__ as VLLM_VERSION
2325

2426
logger = init_logger("vllm.entrypoints.api_server")
2527

@@ -81,6 +83,50 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
8183
return JSONResponse(ret)
8284

8385

86+
def build_app(args: Namespace) -> FastAPI:
87+
global app
88+
89+
app.root_path = args.root_path
90+
return app
91+
92+
93+
async def init_app(
94+
args: Namespace,
95+
llm_engine: Optional[AsyncLLMEngine] = None,
96+
) -> FastAPI:
97+
app = build_app(args)
98+
99+
global engine
100+
101+
engine_args = AsyncEngineArgs.from_cli_args(args)
102+
engine = (llm_engine
103+
if llm_engine is not None else AsyncLLMEngine.from_engine_args(
104+
engine_args, usage_context=UsageContext.API_SERVER))
105+
106+
return app
107+
108+
109+
async def run_server(args: Namespace,
110+
llm_engine: Optional[AsyncLLMEngine] = None,
111+
**uvicorn_kwargs: Any) -> None:
112+
logger.info("vLLM API server version %s", VLLM_VERSION)
113+
logger.info("args: %s", args)
114+
115+
app = await init_app(args, llm_engine)
116+
await serve_http(
117+
app,
118+
host=args.host,
119+
port=args.port,
120+
log_level=args.log_level,
121+
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
122+
ssl_keyfile=args.ssl_keyfile,
123+
ssl_certfile=args.ssl_certfile,
124+
ssl_ca_certs=args.ssl_ca_certs,
125+
ssl_cert_reqs=args.ssl_cert_reqs,
126+
**uvicorn_kwargs,
127+
)
128+
129+
84130
if __name__ == "__main__":
85131
parser = FlexibleArgumentParser()
86132
parser.add_argument("--host", type=str, default=None)
@@ -105,25 +151,5 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
105151
parser.add_argument("--log-level", type=str, default="debug")
106152
parser = AsyncEngineArgs.add_cli_args(parser)
107153
args = parser.parse_args()
108-
engine_args = AsyncEngineArgs.from_cli_args(args)
109-
engine = AsyncLLMEngine.from_engine_args(
110-
engine_args, usage_context=UsageContext.API_SERVER)
111-
112-
app.root_path = args.root_path
113154

114-
logger.info("Available routes are:")
115-
for route in app.routes:
116-
if not hasattr(route, 'methods'):
117-
continue
118-
methods = ', '.join(route.methods)
119-
logger.info("Route: %s, Methods: %s", route.path, methods)
120-
121-
uvicorn.run(app,
122-
host=args.host,
123-
port=args.port,
124-
log_level=args.log_level,
125-
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
126-
ssl_keyfile=args.ssl_keyfile,
127-
ssl_certfile=args.ssl_certfile,
128-
ssl_ca_certs=args.ssl_ca_certs,
129-
ssl_cert_reqs=args.ssl_cert_reqs)
155+
asyncio.run(run_server(args))

vllm/entrypoints/openai/api_server.py

Lines changed: 21 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22
import importlib
33
import inspect
44
import re
5-
import signal
5+
from argparse import Namespace
66
from contextlib import asynccontextmanager
77
from http import HTTPStatus
8-
from typing import Optional, Set
8+
from typing import Any, Optional, Set
99

10-
import fastapi
11-
import uvicorn
12-
from fastapi import APIRouter, Request
10+
from fastapi import APIRouter, FastAPI, Request
1311
from fastapi.exceptions import RequestValidationError
1412
from fastapi.middleware.cors import CORSMiddleware
1513
from fastapi.responses import JSONResponse, Response, StreamingResponse
@@ -38,6 +36,7 @@
3836
from vllm.entrypoints.openai.serving_tokenization import (
3937
OpenAIServingTokenization)
4038
from vllm.logger import init_logger
39+
from vllm.server import serve_http
4140
from vllm.usage.usage_lib import UsageContext
4241
from vllm.utils import FlexibleArgumentParser
4342
from vllm.version import __version__ as VLLM_VERSION
@@ -57,7 +56,7 @@
5756

5857

5958
@asynccontextmanager
60-
async def lifespan(app: fastapi.FastAPI):
59+
async def lifespan(app: FastAPI):
6160

6261
async def _force_log():
6362
while True:
@@ -75,7 +74,7 @@ async def _force_log():
7574
router = APIRouter()
7675

7776

78-
def mount_metrics(app: fastapi.FastAPI):
77+
def mount_metrics(app: FastAPI):
7978
# Add prometheus asgi middleware to route /metrics requests
8079
metrics_route = Mount("/metrics", make_asgi_app())
8180
# Workaround for 307 Redirect for /metrics
@@ -165,8 +164,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
165164
return JSONResponse(content=generator.model_dump())
166165

167166

168-
def build_app(args):
169-
app = fastapi.FastAPI(lifespan=lifespan)
167+
def build_app(args: Namespace) -> FastAPI:
168+
app = FastAPI(lifespan=lifespan)
170169
app.include_router(router)
171170
app.root_path = args.root_path
172171

@@ -214,11 +213,8 @@ async def authentication(request: Request, call_next):
214213
return app
215214

216215

217-
async def build_server(
218-
args,
219-
llm_engine: Optional[AsyncLLMEngine] = None,
220-
**uvicorn_kwargs,
221-
) -> uvicorn.Server:
216+
async def init_app(args: Namespace,
217+
llm_engine: Optional[AsyncLLMEngine] = None) -> FastAPI:
222218
app = build_app(args)
223219

224220
if args.served_model_name is not None:
@@ -281,14 +277,17 @@ async def build_server(
281277
)
282278
app.root_path = args.root_path
283279

284-
logger.info("Available routes are:")
285-
for route in app.routes:
286-
if not hasattr(route, 'methods'):
287-
continue
288-
methods = ', '.join(route.methods)
289-
logger.info("Route: %s, Methods: %s", route.path, methods)
280+
return app
281+
282+
283+
async def run_server(args: Namespace,
284+
llm_engine: Optional[AsyncLLMEngine] = None,
285+
**uvicorn_kwargs: Any) -> None:
286+
logger.info("vLLM API server version %s", VLLM_VERSION)
287+
logger.info("args: %s", args)
290288

291-
config = uvicorn.Config(
289+
app = await init_app(args, llm_engine)
290+
await serve_http(
292291
app,
293292
host=args.host,
294293
port=args.port,
@@ -301,36 +300,6 @@ async def build_server(
301300
**uvicorn_kwargs,
302301
)
303302

304-
return uvicorn.Server(config)
305-
306-
307-
async def run_server(args, llm_engine=None, **uvicorn_kwargs) -> None:
308-
logger.info("vLLM API server version %s", VLLM_VERSION)
309-
logger.info("args: %s", args)
310-
311-
server = await build_server(
312-
args,
313-
llm_engine,
314-
**uvicorn_kwargs,
315-
)
316-
317-
loop = asyncio.get_running_loop()
318-
319-
server_task = loop.create_task(server.serve())
320-
321-
def signal_handler() -> None:
322-
# prevents the uvicorn signal handler to exit early
323-
server_task.cancel()
324-
325-
loop.add_signal_handler(signal.SIGINT, signal_handler)
326-
loop.add_signal_handler(signal.SIGTERM, signal_handler)
327-
328-
try:
329-
await server_task
330-
except asyncio.CancelledError:
331-
print("Gracefully stopping http server")
332-
await server.shutdown()
333-
334303

335304
if __name__ == "__main__":
336305
# NOTE(simon):
@@ -339,4 +308,5 @@ def signal_handler() -> None:
339308
description="vLLM OpenAI-Compatible RESTful API server.")
340309
parser = make_arg_parser(parser)
341310
args = parser.parse_args()
311+
342312
asyncio.run(run_server(args))

vllm/server/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .launch import serve_http
2+
3+
__all__ = ["serve_http"]

vllm/server/launch.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import asyncio
2+
import signal
3+
from typing import Any
4+
5+
import uvicorn
6+
from fastapi import FastAPI
7+
8+
from vllm.logger import init_logger
9+
10+
logger = init_logger(__name__)
11+
12+
13+
async def serve_http(app: FastAPI, **uvicorn_kwargs: Any) -> None:
14+
logger.info("Available routes are:")
15+
for route in app.routes:
16+
methods = getattr(route, "methods", None)
17+
path = getattr(route, "path", None)
18+
19+
if methods is None or path is None:
20+
continue
21+
22+
logger.info("Route: %s, Methods: %s", path, ', '.join(methods))
23+
24+
config = uvicorn.Config(app, **uvicorn_kwargs)
25+
server = uvicorn.Server(config)
26+
27+
loop = asyncio.get_running_loop()
28+
29+
server_task = loop.create_task(server.serve())
30+
31+
def signal_handler() -> None:
32+
# prevents the uvicorn signal handler to exit early
33+
server_task.cancel()
34+
35+
loop.add_signal_handler(signal.SIGINT, signal_handler)
36+
loop.add_signal_handler(signal.SIGTERM, signal_handler)
37+
38+
try:
39+
await server_task
40+
except asyncio.CancelledError:
41+
logger.info("Gracefully stopping http server")
42+
await server.shutdown()

0 commit comments

Comments
 (0)