|
6 | 6 |
|
7 | 7 | from vllm.multimodal.utils import encode_image_base64, fetch_image |
8 | 8 |
|
9 | | -from ...utils import VLLM_PATH, RemoteOpenAIServer |
| 9 | +from ...utils import RemoteOpenAIServer |
10 | 10 |
|
11 | | -MODEL_NAME = "llava-hf/llava-1.5-7b-hf" |
12 | | -LLAVA_CHAT_TEMPLATE = VLLM_PATH / "examples/template_llava.jinja" |
13 | | -assert LLAVA_CHAT_TEMPLATE.exists() |
| 11 | +MODEL_NAME = "microsoft/Phi-3.5-vision-instruct" |
| 12 | +MAXIMUM_IMAGES = 2 |
14 | 13 |
|
15 | 14 | # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA) |
16 | 15 | TEST_IMAGE_URLS = [ |
|
24 | 23 | @pytest.fixture(scope="module") |
25 | 24 | def server(): |
26 | 25 | args = [ |
27 | | - "--dtype", |
28 | | - "bfloat16", |
29 | | - "--max-model-len", |
30 | | - "4096", |
31 | | - "--enforce-eager", |
32 | | - "--chat-template", |
33 | | - str(LLAVA_CHAT_TEMPLATE), |
| 26 | + "--dtype", "bfloat16", "--max-model-len", "4096", "--max-num-seqs", |
| 27 | + "5", "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", |
| 28 | + f"image={MAXIMUM_IMAGES}" |
34 | 29 | ] |
35 | 30 |
|
36 | 31 | with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: |
@@ -84,7 +79,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI, |
84 | 79 | choice = chat_completion.choices[0] |
85 | 80 | assert choice.finish_reason == "length" |
86 | 81 | assert chat_completion.usage == openai.types.CompletionUsage( |
87 | | - completion_tokens=10, prompt_tokens=596, total_tokens=606) |
| 82 | + completion_tokens=10, prompt_tokens=772, total_tokens=782) |
88 | 83 |
|
89 | 84 | message = choice.message |
90 | 85 | message = chat_completion.choices[0].message |
@@ -139,7 +134,7 @@ async def test_single_chat_session_image_base64encoded( |
139 | 134 | choice = chat_completion.choices[0] |
140 | 135 | assert choice.finish_reason == "length" |
141 | 136 | assert chat_completion.usage == openai.types.CompletionUsage( |
142 | | - completion_tokens=10, prompt_tokens=596, total_tokens=606) |
| 137 | + completion_tokens=10, prompt_tokens=772, total_tokens=782) |
143 | 138 |
|
144 | 139 | message = choice.message |
145 | 140 | message = chat_completion.choices[0].message |
@@ -217,47 +212,53 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI, |
217 | 212 |
|
218 | 213 | @pytest.mark.asyncio |
219 | 214 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
220 | | -@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) |
| 215 | +@pytest.mark.parametrize( |
| 216 | + "image_urls", |
| 217 | + [TEST_IMAGE_URLS[:i] for i in range(2, len(TEST_IMAGE_URLS))]) |
221 | 218 | async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str, |
222 | | - image_url: str): |
| 219 | + image_urls: List[str]): |
223 | 220 |
|
224 | 221 | messages = [{ |
225 | 222 | "role": |
226 | 223 | "user", |
227 | 224 | "content": [ |
228 | | - { |
229 | | - "type": "image_url", |
230 | | - "image_url": { |
231 | | - "url": image_url |
232 | | - } |
233 | | - }, |
234 | | - { |
| 225 | + *({ |
235 | 226 | "type": "image_url", |
236 | 227 | "image_url": { |
237 | 228 | "url": image_url |
238 | 229 | } |
239 | | - }, |
| 230 | + } for image_url in image_urls), |
240 | 231 | { |
241 | 232 | "type": "text", |
242 | 233 | "text": "What's in this image?" |
243 | 234 | }, |
244 | 235 | ], |
245 | 236 | }] |
246 | 237 |
|
247 | | - with pytest.raises(openai.BadRequestError): # test multi-image input |
248 | | - await client.chat.completions.create( |
| 238 | + if len(image_urls) > MAXIMUM_IMAGES: |
| 239 | + with pytest.raises(openai.BadRequestError): # test multi-image input |
| 240 | + await client.chat.completions.create( |
| 241 | + model=model_name, |
| 242 | + messages=messages, |
| 243 | + max_tokens=10, |
| 244 | + temperature=0.0, |
| 245 | + ) |
| 246 | + |
| 247 | + # the server should still work afterwards |
| 248 | + completion = await client.completions.create( |
| 249 | + model=model_name, |
| 250 | + prompt=[0, 0, 0, 0, 0], |
| 251 | + max_tokens=5, |
| 252 | + temperature=0.0, |
| 253 | + ) |
| 254 | + completion = completion.choices[0].text |
| 255 | + assert completion is not None and len(completion) >= 0 |
| 256 | + else: |
| 257 | + chat_completion = await client.chat.completions.create( |
249 | 258 | model=model_name, |
250 | 259 | messages=messages, |
251 | 260 | max_tokens=10, |
252 | 261 | temperature=0.0, |
253 | 262 | ) |
254 | | - |
255 | | - # the server should still work afterwards |
256 | | - completion = await client.completions.create( |
257 | | - model=model_name, |
258 | | - prompt=[0, 0, 0, 0, 0], |
259 | | - max_tokens=5, |
260 | | - temperature=0.0, |
261 | | - ) |
262 | | - completion = completion.choices[0].text |
263 | | - assert completion is not None and len(completion) >= 0 |
| 263 | + message = chat_completion.choices[0].message |
| 264 | + assert message.content is not None and len(message.content) >= 0 |
0 commit comments