diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index e380212a4d76b..bbd61a2c5dd59 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -82,6 +82,7 @@ class ModelCard(OpenAIBaseModel): owned_by: str = "vllm" root: Optional[str] = None parent: Optional[str] = None + max_model_len: Optional[int] = None permission: List[ModelPermission] = Field(default_factory=list) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 066acdf1c019a..ae659d19c878b 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -62,6 +62,7 @@ async def show_available_models(self) -> ModelList: """Show available models. Right now we only have one model.""" model_cards = [ ModelCard(id=served_model_name, + max_model_len=self.max_model_len, root=self.served_model_names[0], permission=[ModelPermission()]) for served_model_name in self.served_model_names