From c7503f424a65ae115fd4df9b441d563ccc9641df Mon Sep 17 00:00:00 2001
From: Xin Yang <xyang19@gmail.com>
Date: Mon, 8 May 2023 16:34:50 -0700
Subject: [PATCH] [tgi] Add more models to TGI test pipeline

---
 .github/workflows/hf_tgi_integration.yml | 42 ++++++++++++++++++++----
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/hf_tgi_integration.yml b/.github/workflows/hf_tgi_integration.yml
index 01faa3e600..2d107b2bd3 100644
--- a/.github/workflows/hf_tgi_integration.yml
+++ b/.github/workflows/hf_tgi_integration.yml
@@ -57,15 +57,45 @@ jobs:
           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
         run: |
           HF_MODEL_ID=bigscience/bloom-560m && \
-          SM_NUM_GPUS=2 && \
-          TGI_VERSION=0.5.0 && \
-          docker run --gpus all --shm-size 1g -itd --rm -p 8080:8080 \
+          SM_NUM_GPUS=4 && \
+          TGI_VERSION=$TGI_VERSION && \
+          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
+              -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID -e MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
+          sleep 30
+          ret=$(curl http://localhost:8080/invocations -X POST \
+              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
+              -H 'Content-Type: application/json')
+          [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
+          docker rm -f $(docker ps -aq)
+      - name: Test gpt-neox-20b
+        env:
+          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+        run: |
+          HF_MODEL_ID=EleutherAI/gpt-neox-20b && \
+          SM_NUM_GPUS=4 && \
+          TGI_VERSION=$TGI_VERSION && \
+          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
               -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
-          sleep 90
+          sleep 120
+          ret=$(curl http://localhost:8080/invocations -X POST \
+              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
+              -H 'Content-Type: application/json')
+          [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
+          docker rm -f $(docker ps -aq)
+      - name: Test flan-t5-xxl
+        env:
+          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+        run: |
+          HF_MODEL_ID=google/flan-t5-xxl && \
+          SM_NUM_GPUS=4 && \
+          TGI_VERSION=$TGI_VERSION && \
+          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
+              -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID -e MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
+          sleep 180
           ret=$(curl http://localhost:8080/invocations -X POST \
-              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
+              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
               -H 'Content-Type: application/json')
-          [[ $ret != '[{"generated_text":"What is Deep Learning? Deep learning is a set of computer algorithms that learn a set of data, in a"}]' ]] && exit 1
+          [[ $ret != "[{\"generated_text\""* ]] && exit 1
           docker rm -f $(docker ps -aq)
       - name: On fail step
         if: ${{ failure() }}