review: update comments for clarity

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
vllm-project · Feb 27, 2025 · 0543ce3 · 0543ce3
1 parent 3fbf643
commit 0543ce3
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 2 deletions.
diff --git a/tests/models/encoder_decoder/vision_language/test_mllama.py b/tests/models/encoder_decoder/vision_language/test_mllama.py
@@ -479,7 +479,8 @@ def test_regression(vllm_runner, image_assets, model, dtype, max_tokens,
 
         # Regression tests for https://github.com/vllm-project/vllm/issues/10648
 
-        # Number of image groups is greater than the number of images provided
+        # Number of groups of image tokens is greater than the number of images
+        # provided (the whitespace between the tags is necessary)
         prompt = "<|begin_of_text|><|image|> <|image|> Compare the two images"  # noqa: E501
         image = stop_sign
         with pytest.raises(ValueError):

diff --git a/vllm/model_executor/models/mllama.py b/vllm/model_executor/models/mllama.py
@@ -178,7 +178,8 @@ def apply(
     ) -> MultiModalEncDecInputs:
         mm_inputs = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
 
-        # Check that the number of image tokens matches the number of images
+        # Check that the number of image tokens in the decoder prompt matches
+        # the number of images provided in mm_data
         num_image_tokens = mm_inputs['prompt_token_ids'].count(
             self.info.get_hf_config().image_token_index)
         image_data = mm_data.get("image", [])