From 5390a7eeec0fb0890cec8fac45decd3ebaac9690 Mon Sep 17 00:00:00 2001
From: Chang Su <chang.s.su@oracle.com>
Date: Thu, 20 Jun 2024 21:45:34 -0700
Subject: [PATCH] [BugFix] Fix test_phi3v.py (#5725)

---
 tests/conftest.py          |  4 +++-
 tests/models/test_phi3v.py | 10 ++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 5bbfd87f0bb3b..67885b93285c5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -233,11 +233,13 @@ def generate_greedy(
         prompts: List[str],
         max_tokens: int,
         images: Optional[List[Image.Image]] = None,
+        **kwargs,
     ) -> List[Tuple[List[int], str]]:
         outputs = self.generate(prompts,
                                 do_sample=False,
                                 max_new_tokens=max_tokens,
-                                images=images)
+                                images=images,
+                                **kwargs)
 
         return [(output_ids[0], output_str[0])
                 for output_ids, output_str in outputs]
diff --git a/tests/models/test_phi3v.py b/tests/models/test_phi3v.py
index 1732e8f08a897..23454759827d5 100644
--- a/tests/models/test_phi3v.py
+++ b/tests/models/test_phi3v.py
@@ -77,7 +77,7 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str],
 # numeric difference for longer context and test can't pass
 @pytest.mark.parametrize("model_and_config", model_and_vl_config)
 @pytest.mark.parametrize("dtype", [target_dtype])
-@pytest.mark.parametrize("max_tokens", [8])
+@pytest.mark.parametrize("max_tokens", [128])
 def test_models(hf_runner, vllm_runner, hf_images, vllm_images,
                 model_and_config, dtype: str, max_tokens: int) -> None:
     """Inference result should be the same between hf and vllm.
@@ -95,9 +95,11 @@ def test_models(hf_runner, vllm_runner, hf_images, vllm_images,
     hf_model_kwargs = {"_attn_implementation": "eager"}
     with hf_runner(model_id, dtype=dtype,
                    model_kwargs=hf_model_kwargs) as hf_model:
-        hf_outputs = hf_model.generate_greedy(HF_IMAGE_PROMPTS,
-                                              max_tokens,
-                                              images=hf_images)
+        hf_outputs = hf_model.generate_greedy(
+            HF_IMAGE_PROMPTS,
+            max_tokens,
+            images=hf_images,
+            eos_token_id=hf_model.processor.tokenizer.eos_token_id)
 
     vllm_image_prompts = [
         p.replace("<|image_1|>",