Skip to content

Commit

Permalink
[Frontend] Pass pre-created socket to uvicorn (vllm-project#13113)
Browse files Browse the repository at this point in the history
  • Loading branch information
russellb authored and kerthcet committed Feb 21, 2025
1 parent 080fd83 commit b521f93
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
1 change: 1 addition & 0 deletions vllm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ async def run_server(args: Namespace,

shutdown_task = await serve_http(
app,
sock=None,
host=args.host,
port=args.port,
log_level=args.log_level,
Expand Down
9 changes: 6 additions & 3 deletions vllm/entrypoints/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import asyncio
import signal
import socket
from http import HTTPStatus
from typing import Any
from typing import Any, Optional

import uvicorn
from fastapi import FastAPI, Request, Response
Expand All @@ -17,7 +18,8 @@
logger = init_logger(__name__)


async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
async def serve_http(app: FastAPI, sock: Optional[socket.socket],
**uvicorn_kwargs: Any):
logger.info("Available routes are:")
for route in app.routes:
methods = getattr(route, "methods", None)
Expand All @@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):

loop = asyncio.get_running_loop()

server_task = loop.create_task(server.serve())
server_task = loop.create_task(
server.serve(sockets=[sock] if sock else None))

def signal_handler() -> None:
# prevents the uvicorn signal handler to exit early
Expand Down
13 changes: 10 additions & 3 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import re
import signal
import socket
import sys
import tempfile
import uuid
from argparse import Namespace
Expand Down Expand Up @@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket:

sock = socket.socket(family=family, type=socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
sock.bind(addr)

return sock
Expand Down Expand Up @@ -878,8 +878,17 @@ def signal_handler(*_) -> None:
model_config = await engine_client.get_model_config()
await init_app_state(engine_client, model_config, app.state, args)

def _listen_addr(a: str) -> str:
if is_valid_ipv6_address(a):
return '[' + a + ']'
return a or "0.0.0.0"

logger.info("Starting vLLM API server on http://%s:%d",
_listen_addr(sock_addr[0]), sock_addr[1])

shutdown_task = await serve_http(
app,
sock=sock,
host=args.host,
port=args.port,
log_level=args.uvicorn_log_level,
Expand All @@ -888,8 +897,6 @@ def signal_handler(*_) -> None:
ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs,
# Workaround to work on macOS
fd=sock.fileno() if sys.platform.startswith("darwin") else None,
**uvicorn_kwargs,
)

Expand Down

0 comments on commit b521f93

Please sign in to comment.