Skip to content

Commit b13ee63

Browse files
authored
enable new model uts to xpu and fix some failures on xpu (#41386)
* enable new model uts to xpu and fix some failures on xpu Signed-off-by: Yao, Matrix <matrix.yao@intel.com> * add more Signed-off-by: Yao, Matrix <matrix.yao@intel.com> * fix style Signed-off-by: Yao, Matrix <matrix.yao@intel.com> * Update test_modeling_internvl.py * Update test_modeling_llava.py * Update test_modeling_qwen2_5_omni.py * Update test_modeling_llava_next_video.py * Update test_modeling_qwen3.py * Update test_modeling_whisper.py * Update test_modeling_whisper.py * Update test_modeling_llava.py * Update test_modeling_llava.py * Update test_modeling_qwen2_5_omni.py * fix style Signed-off-by: Yao, Matrix <matrix.yao@intel.com> --------- Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
1 parent 1c5ac89 commit b13ee63

File tree

11 files changed

+145
-26
lines changed

11 files changed

+145
-26
lines changed

tests/models/cohere2_vision/test_modeling_cohere2_vision.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_model_integration_forward(self):
223223

224224
EXPECTED_LOGITS = Expectations(
225225
{
226-
("xpu", 3): [0.4109, 0.1532, 0.8018, 2.1328, 0.5483],
226+
("xpu", 3): [2.4297, 1.6836, 1.8779, 2.1895, 1.9395],
227227
# 4-bit
228228
("cuda", 7): [0.1097, 0.3481, 3.8340, 9.7969, 2.0488],
229229
("cuda", 8): [2.4277, 1.6875, 1.8789, 2.1875, 1.9375],
@@ -264,6 +264,7 @@ def test_model_integration_generate_text_only(self):
264264

265265
expected_outputs = Expectations(
266266
{
267+
("xpu", 3): "<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>",
267268
("cuda", 8): "<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>",
268269
}
269270
) # fmt: skip
@@ -298,6 +299,7 @@ def test_model_integration_generate_chat_template(self):
298299

299300
expected_outputs = Expectations(
300301
{
302+
("xpu", 3): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
301303
("cuda", 8): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
302304
}
303305
) # fmt: skip
@@ -344,6 +346,7 @@ def test_model_integration_batched_generate(self):
344346
decoded_output = processor.decode(output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
345347
expected_outputs = Expectations(
346348
{
349+
("xpu", 3): 'Dock stretches to calm',
347350
("cuda", 8): 'Dock stretches to calm',
348351
}
349352
) # fmt: skip
@@ -360,6 +363,7 @@ def test_model_integration_batched_generate(self):
360363

361364
expected_outputs = Expectations(
362365
{
366+
("xpu", 3): 'The image depicts a',
363367
("cuda", 8): 'The image depicts a',
364368
}
365369
) # fmt: skip
@@ -418,6 +422,7 @@ def test_model_integration_batched_generate_multi_image(self):
418422
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
419423
expected_outputs = Expectations(
420424
{
425+
("xpu", 3): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
421426
("cuda", 8): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
422427
}
423428
) # fmt: skip
@@ -433,6 +438,7 @@ def test_model_integration_batched_generate_multi_image(self):
433438
decoded_output = processor.decode(output[1, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
434439
expected_outputs = Expectations(
435440
{
441+
("xpu", 3): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
436442
("cuda", 8): '<|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|><|CHATBOT_TOKEN|>',
437443
}
438444
) # fmt: skip

tests/models/ernie4_5/test_modeling_ernie4_5.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def test_ernie4_5_0p3B(self):
7979
"""
8080
expected_texts = Expectations(
8181
{
82+
("xpu", 3): "User: Hey, are you conscious? Can you talk to me?\nAssistant: Hey! I'm here to help you with whatever you need. Are you feeling a bit overwhelmed or stressed? I'm here to listen and provide support.",
8283
("cuda", None): "User: Hey, are you conscious? Can you talk to me?\nAssistant: Hey! I'm here to help you with whatever you need. Are you feeling a bit overwhelmed or stressed? I'm here to listen and provide support.",
8384
}
8485
) # fmt: skip

tests/models/internvl/test_modeling_internvl.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ def test_llama_small_model_integration_forward(self):
645645

646646
expected_logits_all = Expectations(
647647
{
648-
("xpu", 3): [-9.8750, -0.5703, 1.4297, -10.3125, -10.3125],
648+
("xpu", 3): [-9.8828, -0.4954, 1.4561, -10.3438, -10.3438],
649649
("cuda", 7): [-9.8750, -0.4861, 1.4648, -10.3359, -10.3359],
650650
("cuda", 8): [-9.8906, -0.4995, 1.4473, -10.3359, -10.3438],
651651
("rocm", (9, 4)): [ -9.8828, -0.5005, 1.4697, -10.3438, -10.3438],
@@ -680,6 +680,7 @@ def test_llama_small_model_integration_generate_text_only(self):
680680

681681
expected_outputs = Expectations(
682682
{
683+
("xpu", 3): "Autumn leaves fall,\nNature's breath, a season's sigh,\nSilent woods awake.",
683684
("cuda", 7): "Autumn leaves fall,\nNature's breath, a gentle sigh,\nSilent whispers.",
684685
("cuda", 8): "Autumn leaves fall,\nNature's breath, a silent sigh,\nWinter's chill approaches.",
685686
}
@@ -920,7 +921,7 @@ def test_llama_small_model_integration_interleaved_images_videos(self):
920921
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
921922
expected_outputs = Expectations(
922923
{
923-
("xpu", 3): "user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. After re-examining the images, I can see that they are actually",
924+
("xpu", 3): "user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **",
924925
("cuda", 7): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **',
925926
("cuda", 8): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. After re-examining the images, I can see that there are no',
926927
("rocm", (9, 4)): 'user\n\n\nWhat are the difference between these two images?\nassistant\nI apologize for the confusion in my previous response. Upon closer inspection, the differences between the two images are:\n\n1. **',
@@ -938,7 +939,7 @@ def test_llama_small_model_integration_interleaved_images_videos(self):
938939
decoded_output = processor.decode(output[1], skip_special_tokens=True)
939940
expected_outputs = Expectations(
940941
{
941-
("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot. This is a common shot in tennis where the player swings the racket across their",
942+
("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot. This is a common stroke in tennis where the player swings the racket across their",
942943
("cuda", 7): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot. This is a common stroke in tennis where the player swings the racket across their',
943944
("cuda", 8): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot. This is a common stroke in tennis where the player swings the racket across their',
944945
}

tests/models/llava/test_modeling_llava.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def test_small_model_integration_test(self):
300300

301301
output = model.generate(**inputs, max_new_tokens=20)
302302
expected_decoded_texts = Expectations({
303+
("xpu", 3): "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. Firstly,",
303304
("cuda", None): "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. Firstly,",
304305
("rocm", (9, 5)): "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. First, the",
305306
}) # fmt: skip
@@ -328,17 +329,16 @@ def test_small_model_integration_test_llama_single(self):
328329

329330
EXPECTED_DECODED_TEXTS = Expectations(
330331
{
332+
("xpu", 3): 'USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Lastly, be respectful of the environment and other visitors, as the pier is a shared space where people can enjoy the view, relax, or engage in recreational activities.',
331333
("cuda", 7): 'USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Lastly, be respectful of the environment and other visitors, as the pier is a shared space where people can enjoy the view, relax, or engage in recreational activities.',
332334
("cuda", 8): 'USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Lastly, be respectful of the environment and other visitors, as the pier is a shared space where people can enjoy the view, relax, or engage in recreational activities.',
333335
("rocm", (9, 5)): 'USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock overlooking a lake, you should be cautious about the following:\n\n1. Safety: Ensure that the pier or dock is stable and secure before stepping onto it. Avoid walking on the edge of the pier or dock, as it could be unstable or unsafe.\n\n2. Weather conditions: Be aware of the weather forecast before visiting the area. Strong winds, heavy rain, or storms can make the pier or dock unsafe to use.\n\n3. Wildlife: Be mindful of the wildlife in the area, such as birds or aquatic animals. Avoid disturbing their natural habitat or causing harm to the local ecosystem.\n\n4. Water safety: If you plan to go swimming or engage in water activities, be aware of the water conditions, such as currents, tides, or potential hazards like submerged objects.\n\n5. Personal belongings: Keep an eye on your personal belongings, such as bags or backpacks, to prevent theft or loss.\n\n6. Leave no trace: When visiting the area, make sure to clean up after yourself and leave no trace of your presence to preserve the natural environment.',
334336
}
335337
) # fmt: skip
336338
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
339+
decoded_text = processor.decode(output[0], skip_special_tokens=True)
337340

338-
self.assertEqual(
339-
processor.decode(output[0], skip_special_tokens=True),
340-
EXPECTED_DECODED_TEXT,
341-
)
341+
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
342342

343343
@slow
344344
@require_bitsandbytes
@@ -362,6 +362,13 @@ def test_small_model_integration_test_llama_batched(self):
362362

363363
expected_decoded_texts = Expectations(
364364
{
365+
("xpu", 3): [
366+
"USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring "
367+
"with me? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, "
368+
"you",
369+
"USER: \nWhat is this? ASSISTANT: The image features two cats lying down on a pink couch. One cat "
370+
"is located on",
371+
],
365372
("cuda", None): [
366373
"USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring "
367374
"with me? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, "
@@ -404,6 +411,10 @@ def test_small_model_integration_test_batch(self):
404411

405412
EXPECTED_DECODED_TEXTS = Expectations(
406413
{
414+
("xpu", 3): [
415+
'USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, there are a few things to be cautious about and items to bring along',
416+
'USER: \nWhat is this?\nASSISTANT: Cats',
417+
],
407418
("cuda", 7): [
408419
'USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, there are a few things to be cautious about and items to bring along',
409420
'USER: \nWhat is this?\nASSISTANT: Cats',
@@ -452,6 +463,13 @@ def test_small_model_integration_test_llama_batched_regression(self):
452463

453464
expected_decoded_texts = Expectations(
454465
{
466+
("xpu", 3): [
467+
"USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring "
468+
"with me?\nASSISTANT: When visiting this place, which appears to be a dock or pier extending over a "
469+
"body of water",
470+
"USER: \nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: \nAnd this?\nASSISTANT: A cat "
471+
"sleeping on a bed.",
472+
],
455473
("cuda", None): [
456474
"USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring "
457475
"with me?\nASSISTANT: When visiting this place, which appears to be a dock or pier extending over a "
@@ -501,6 +519,11 @@ def test_batched_generation(self):
501519

502520
EXPECTED_OUTPUTS = Expectations(
503521
{
522+
("xpu", 3): [
523+
"\n \nUSER: What's the difference of two images?\nASSISTANT: The difference between the two images is that one shows a dog standing on a grassy field, while",
524+
'\nUSER: Describe the image.\nASSISTANT: The image features a brown and white dog sitting on a sidewalk. The dog is holding a small',
525+
'\nUSER: Describe the image.\nASSISTANT: The image features a lone llama standing on a grassy hill. The llama is the'
526+
],
504527
("cuda", 7): [
505528
"\n \nUSER: What's the difference of two images?\nASSISTANT: The difference between the two images is that one of them has a dog standing on a field, while",
506529
"\nUSER: Describe the image.\nASSISTANT: The image features a brown and white dog sitting on a sidewalk. The dog is holding a small",
@@ -573,8 +596,16 @@ def test_generation_siglip_backbone(self):
573596
# Make sure that `generate` works
574597
output = model.generate(**inputs, max_new_tokens=30)
575598

576-
EXPECTED_DECODED_TEXT = "user\n\nWhat are these?\nassistant The image shows two cats, one on the left and one on the right. They appear to be resting or sleeping on a pink blanket. The cat"
577-
self.assertTrue(processor.batch_decode(output, skip_special_tokens=True)[0] == EXPECTED_DECODED_TEXT)
599+
EXPECTED_DECODED_TEXTS = Expectations(
600+
{
601+
("xpu", 3): "user\n\nWhat are these?\nassistant These are two cats, one with a green collar and the other with a black collar. They are lying on a pink blanket and appear to be sleeping",
602+
("cuda", None): "user\n\nWhat are these?\nassistant The image shows two cats, one on the left and one on the right. They appear to be resting or sleeping on a pink blanket. The cat",
603+
}
604+
) # fmt: skip
605+
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
606+
607+
decoded_text = processor.batch_decode(output, skip_special_tokens=True)[0]
608+
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
578609

579610
@slow
580611
def test_pixtral(self):

0 commit comments

Comments
 (0)