diff --git a/.github/workflows/llm_integration.yml b/.github/workflows/llm_integration.yml index 72ee854bf..3068d2c6c 100644 --- a/.github/workflows/llm_integration.yml +++ b/.github/workflows/llm_integration.yml @@ -166,6 +166,17 @@ jobs: python3 llm/client.py huggingface bigscience/bloom-3b rm -rf docker_env docker rm -f $(docker ps -aq) + - name: Test streaming t5-large + working-directory: tests/integration + run: | + rm -rf models + echo -en "CUDA_VISIBLE_DEVICES=1" > docker_env + python3 llm/prepare.py huggingface t5-large + ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \ + serve + python3 llm/client.py huggingface t5-large + rm -rf docker_env + docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} working-directory: tests/integration diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py index 50a1dd90d..09acab5ac 100644 --- a/tests/integration/llm/client.py +++ b/tests/integration/llm/client.py @@ -106,6 +106,13 @@ def compute_model_name_hash(model_name): "worker": 1, "stream_output": True, }, + "t5-large": { + "max_memory_per_gpu": [5.0], + "batch_size": [1], + "seq_length": [32], + "worker": 1, + "stream_output": True, + }, "no-code/nomic-ai/gpt4all-j": { "max_memory_per_gpu": [10.0, 12.0], "batch_size": [1, 4], @@ -456,7 +463,10 @@ def test_handler(model, model_spec): model_name=spec.get("model_name", "test")) for i, batch_size in enumerate(spec["batch_size"]): for seq_length in spec["seq_length"]: - req = {"inputs": batch_generation(batch_size)} + if "t5" in model: + req = {"inputs": t5_batch_generation(batch_size)} + else: + req = {"inputs": batch_generation(batch_size)} params = {"max_new_tokens": seq_length} req["parameters"] = params logging.info(f"req {req}") diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 312934959..341af4609 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -114,6 +114,12 @@ "option.enable_streaming": True, "gpu.maxWorkers": 1, }, + "t5-large": { + "option.model_id": "t5-large", + "option.tensor_parallel_degree": 1, + "option.device_map": "auto", + "option.enable_streaming": True, + }, } ds_handler_list = {