From c7503f424a65ae115fd4df9b441d563ccc9641df Mon Sep 17 00:00:00 2001 From: Xin Yang Date: Mon, 8 May 2023 16:34:50 -0700 Subject: [PATCH] [tgi] Add more models to TGI test pipeline --- .github/workflows/hf_tgi_integration.yml | 42 ++++++++++++++++++++---- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/hf_tgi_integration.yml b/.github/workflows/hf_tgi_integration.yml index 01faa3e600..2d107b2bd3 100644 --- a/.github/workflows/hf_tgi_integration.yml +++ b/.github/workflows/hf_tgi_integration.yml @@ -57,15 +57,45 @@ jobs: REGISTRY: ${{ steps.login-ecr.outputs.registry }} run: | HF_MODEL_ID=bigscience/bloom-560m && \ - SM_NUM_GPUS=2 && \ - TGI_VERSION=0.5.0 && \ - docker run --gpus all --shm-size 1g -itd --rm -p 8080:8080 \ + SM_NUM_GPUS=4 && \ + TGI_VERSION=$TGI_VERSION && \ + docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ + -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID -e MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} + sleep 30 + ret=$(curl http://localhost:8080/invocations -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ + -H 'Content-Type: application/json') + [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 + docker rm -f $(docker ps -aq) + - name: Test gpt-neox-20b + env: + REGISTRY: ${{ steps.login-ecr.outputs.registry }} + run: | + HF_MODEL_ID=EleutherAI/gpt-neox-20b && \ + SM_NUM_GPUS=4 && \ + TGI_VERSION=$TGI_VERSION && \ + docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} - sleep 90 + sleep 120 + ret=$(curl http://localhost:8080/invocations -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ + -H 'Content-Type: application/json') + [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 + docker rm -f $(docker ps -aq) + - name: Test flan-t5-xxl + env: + REGISTRY: ${{ steps.login-ecr.outputs.registry }} + run: | + HF_MODEL_ID=google/flan-t5-xxl && \ + SM_NUM_GPUS=4 && \ + TGI_VERSION=$TGI_VERSION && \ + docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ + -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID -e MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} + sleep 180 ret=$(curl http://localhost:8080/invocations -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ -H 'Content-Type: application/json') - [[ $ret != '[{"generated_text":"What is Deep Learning? Deep learning is a set of computer algorithms that learn a set of data, in a"}]' ]] && exit 1 + [[ $ret != "[{\"generated_text\""* ]] && exit 1 docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }}