|
1 |
| -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 1 | +# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | #
|
3 | 3 | # Redistribution and use in source and binary forms, with or without
|
4 | 4 | # modification, are permitted provided that the following conditions
|
@@ -619,3 +619,64 @@ def test_chat_completions_invalid_chat_tokenizer(
|
619 | 619 | assert any(
|
620 | 620 | error in response.json()["detail"].lower() for error in expected_errors
|
621 | 621 | )
|
| 622 | + |
| 623 | + |
| 624 | +class TestMultipleTokenizers: |
| 625 | + @pytest.fixture(scope="class") |
| 626 | + def model_repository(self): |
| 627 | + # Custom model repository for these specific tests |
| 628 | + return str(Path(__file__).parent / "vllm_tiny_models") |
| 629 | + |
| 630 | + # Re-use a single Triton server for different frontend configurations |
| 631 | + @pytest.fixture(scope="class") |
| 632 | + def server(self, model_repository: str): |
| 633 | + server = setup_server(model_repository) |
| 634 | + yield server |
| 635 | + server.stop() |
| 636 | + |
| 637 | + @pytest.fixture(scope="class") |
| 638 | + def models(self): |
| 639 | + return ["tiny_llama", "phi-4"] |
| 640 | + |
| 641 | + def test_chat_completions_multiple_tokenizers( |
| 642 | + self, |
| 643 | + server: tritonserver.Server, |
| 644 | + models: List[str], |
| 645 | + messages: List[dict], |
| 646 | + ): |
| 647 | + app = setup_fastapi_app( |
| 648 | + tokenizer={ |
| 649 | + "tiny_llama:TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
| 650 | + "phi-4:microsoft/Phi-4-mini-instruct" |
| 651 | + }, |
| 652 | + server=server, |
| 653 | + backend="vllm", |
| 654 | + ) |
| 655 | + for model in models: |
| 656 | + with TestClient(app) as client: |
| 657 | + response = client.post( |
| 658 | + "/v1/chat/completions", |
| 659 | + json={"model": model, "messages": messages}, |
| 660 | + ) |
| 661 | + |
| 662 | + assert response.status_code == 200 |
| 663 | + message = response.json()["choices"][0]["message"] |
| 664 | + assert message["content"].strip() |
| 665 | + assert message["role"] == "assistant" |
| 666 | + |
| 667 | + def test_chat_completions_unknown_tokenizers( |
| 668 | + self, |
| 669 | + server: tritonserver.Server, |
| 670 | + models: List[str], |
| 671 | + messages: List[dict], |
| 672 | + ): |
| 673 | + app = setup_fastapi_app(tokenizer="", server=server, backend="vllm") |
| 674 | + for model in models: |
| 675 | + with TestClient(app) as client: |
| 676 | + response = client.post( |
| 677 | + "/v1/chat/completions", |
| 678 | + json={"model": model, "messages": messages}, |
| 679 | + ) |
| 680 | + |
| 681 | + assert response.status_code == 400 |
| 682 | + assert response.json()["detail"] == "Unknown tokenizer" |
0 commit comments