From ad2542d346559b3109c2235e5a0d1acf4ccdd53f Mon Sep 17 00:00:00 2001 From: Jason Cox Date: Sun, 3 Mar 2024 00:00:29 -0500 Subject: [PATCH] Add vLLM version info to logs and openai API server (#3161) --- vllm/engine/llm_engine.py | 3 ++- vllm/entrypoints/openai/api_server.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index e84fda5640e4d..c9bd89a1b18f4 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -7,6 +7,7 @@ from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union) +import vllm from vllm.lora.request import LoRARequest from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, SchedulerConfig, LoRAConfig) @@ -85,7 +86,7 @@ def __init__( log_stats: bool, ) -> None: logger.info( - "Initializing an LLM engine with config: " + f"Initializing an LLM engine (v{vllm.__version__}) with config: " f"model={model_config.model!r}, " f"tokenizer={model_config.tokenizer!r}, " f"tokenizer_mode={model_config.tokenizer_mode}, " diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 3777e0f3a0601..993a834e5a720 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -15,6 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse, Response +import vllm from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest, ErrorResponse @@ -168,6 +169,12 @@ async def show_available_models(): return JSONResponse(content=models.model_dump()) +@app.get("/version") +async def show_version(): + ver = {"version": vllm.__version__} + return JSONResponse(content=ver) + + @app.post("/v1/chat/completions") async def create_chat_completion(request: ChatCompletionRequest, raw_request: Request): @@ -231,6 +238,7 @@ async def authentication(request: Request, call_next): f"Invalid middleware {middleware}. Must be a function or a class." ) + logger.info(f"vLLM API server version {vllm.__version__}") logger.info(f"args: {args}") if args.served_model_name is not None: