Skip to content

Commit

Permalink
add seq2seq streaming integ test (#724)
Browse files Browse the repository at this point in the history
  • Loading branch information
rohithkrn authored May 16, 2023
1 parent eb0d5f0 commit 737bb94
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
11 changes: 11 additions & 0 deletions .github/workflows/llm_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,17 @@ jobs:
python3 llm/client.py huggingface bigscience/bloom-3b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: Test streaming t5-large
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=1" > docker_env
python3 llm/prepare.py huggingface t5-large
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py huggingface t5-large
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand Down
12 changes: 11 additions & 1 deletion tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ def compute_model_name_hash(model_name):
"worker": 1,
"stream_output": True,
},
"t5-large": {
"max_memory_per_gpu": [5.0],
"batch_size": [1],
"seq_length": [32],
"worker": 1,
"stream_output": True,
},
"no-code/nomic-ai/gpt4all-j": {
"max_memory_per_gpu": [10.0, 12.0],
"batch_size": [1, 4],
Expand Down Expand Up @@ -456,7 +463,10 @@ def test_handler(model, model_spec):
model_name=spec.get("model_name", "test"))
for i, batch_size in enumerate(spec["batch_size"]):
for seq_length in spec["seq_length"]:
req = {"inputs": batch_generation(batch_size)}
if "t5" in model:
req = {"inputs": t5_batch_generation(batch_size)}
else:
req = {"inputs": batch_generation(batch_size)}
params = {"max_new_tokens": seq_length}
req["parameters"] = params
logging.info(f"req {req}")
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@
"option.enable_streaming": True,
"gpu.maxWorkers": 1,
},
"t5-large": {
"option.model_id": "t5-large",
"option.tensor_parallel_degree": 1,
"option.device_map": "auto",
"option.enable_streaming": True,
},
}

ds_handler_list = {
Expand Down

0 comments on commit 737bb94

Please sign in to comment.