From 5b6b8ed2123049c568b743fb1ed7a441cba1e759 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 1 Jun 2024 19:35:41 -0400 Subject: [PATCH] [BugFix] Prevent `LLM.encode` for non-generation Models (#5184) Co-authored-by: mgoin --- vllm/entrypoints/llm.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 8a4245f93679b..7f1c6ef9603a8 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -285,6 +285,11 @@ def generate( considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.generate() is only supported for generation models " + "(XForCausalLM).") + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts), @@ -429,6 +434,11 @@ def encode( considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if not self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.encode() is only supported for embedding models (XModel)." + ) + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts),