[VLM] Merged multi-modal processor for GLM4V (vllm-project#12449)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
martihns24 · Feb 8, 2025 · 86222a3 · 86222a3
1 parent fe743b7
commit 86222a3
Show file tree

Hide file tree

Showing 4 changed files with 222 additions and 167 deletions.
diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md
@@ -719,7 +719,7 @@ See [this page](#generative-models) for more information on how to use generativ
   * `THUDM/glm-4v-9b` etc.
   * ✅︎
   * ✅︎
-  *
+  * ✅︎
 - * `H2OVLChatModel`
   * H2OVL
   * T + I<sup>E+</sup>

diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
@@ -106,7 +106,9 @@ def run_glm4v(question: str, modality: str):
               trust_remote_code=True,
               enforce_eager=True,
               disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache)
-    prompt = question
+    prompt = f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\
+        {question}<|assistant|>"
+
     stop_token_ids = [151329, 151336, 151338]
     return llm, prompt, stop_token_ids
 

diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
@@ -147,6 +147,7 @@ def _test_processing_correctness(
     "facebook/chameleon-7b",
     "deepseek-ai/deepseek-vl2-tiny",
     "adept/fuyu-8b",
+    "THUDM/glm-4v-9b",
     "h2oai/h2ovl-mississippi-800m",
     "OpenGVLab/InternVL2-1B",
     "HuggingFaceM4/Idefics3-8B-Llama3",