opea-project · xiaotia3 · Apr 3, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 17, 2025
@@ -64,7 +64,7 @@ We remind you that when using a specific version of the code, you need to use th
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -110,7 +110,7 @@ We remind you that when using a specific version of the code, you need to use th
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/agent:latest
   - redis/redis-stack:7.2.0-v9
   - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

@@ -2,7 +2,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: tgi-service
     ports:
       - "${TGI_SERVICE_PORT-8085}:80"

@@ -25,7 +25,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - ${LLM_SERVER_PORT:-3006}:80

@@ -19,7 +19,7 @@ docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build
 
 ### 3. Build LLM Image
 
-Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
+Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu (https://github.com/huggingface/text-generation-inference)
 
 ### 4. Build TTS Image
 

@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"

@@ -19,7 +19,7 @@ docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy -
 
 ### 3. Build LLM Image
 
-Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
+Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.3.1 (https://github.com/huggingface/tgi-gaudi)
 
 ### 4. Build TTS Image
 

@@ -38,7 +38,7 @@ services:
       - SYS_NICE
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"

@@ -36,7 +36,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
 
     docker images && sleep 1s
 }

@@ -34,7 +34,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
 
     docker images && sleep 3s
 }

@@ -36,7 +36,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper speecht5 wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 
     docker images && sleep 1s
 }

@@ -165,7 +165,7 @@ eaf24161aca8   opea/nginx:latest                                       "/docker-
 05512bd29fee   opea/dataprep:latest                                    "sh -c 'python $( [ …"   37 seconds ago   Up 36 seconds (healthy)   0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp                                              chatqna-dataprep-service
 49844d339d1d   opea/retriever:latest                                   "python opea_retriev…"   37 seconds ago   Up 36 seconds             0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp                                                chatqna-retriever
 75b698fe7de0   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18808->80/tcp, [::]:18808->80/tcp                                                  chatqna-tei-reranking-service
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
 6081eb1c119d   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         37 seconds ago   Up 36 seconds             0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp   chatqna-redis-vector-db
 eded17420782   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18090->80/tcp, [::]:18090->80/tcp                                                  chatqna-tei-embedding-service
 ```
@@ -181,7 +181,7 @@ e0ef1ea67640   opea/llm-faqgen:latest                                  "bash ent
 05512bd29fee   opea/dataprep:latest                                    "sh -c 'python $( [ …"   37 seconds ago   Up 36 seconds (healthy)   0.0.0.0:18103->5000/tcp, [::]:18103->5000/tcp                                              chatqna-dataprep-service
 49844d339d1d   opea/retriever:latest                                   "python opea_retriev…"   37 seconds ago   Up 36 seconds             0.0.0.0:7000->7000/tcp, [::]:7000->7000/tcp                                                chatqna-retriever
 75b698fe7de0   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18808->80/tcp, [::]:18808->80/tcp                                                  chatqna-tei-reranking-service
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              chatqna-tgi-service
 6081eb1c119d   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         37 seconds ago   Up 36 seconds             0.0.0.0:6379->6379/tcp, [::]:6379->6379/tcp, 0.0.0.0:8001->8001/tcp, [::]:8001->8001/tcp   chatqna-redis-vector-db
 eded17420782   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   37 seconds ago   Up 36 seconds             0.0.0.0:18090->80/tcp, [::]:18090->80/tcp                                                  chatqna-tei-embedding-service
 ```

@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"

@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"

@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     ports:
       - ${LLM_ENDPOINT_PORT:-9009}:80

@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"

@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 - retriever: opea/retriever:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

@@ -4,7 +4,7 @@
 services:
 
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     profiles:
       - codegen-xeon-tgi

@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-textgen"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 1s
 }
 

@@ -46,7 +46,7 @@ function build_docker_images() {
 
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -150,7 +150,7 @@ eaf24161aca8   opea/nginx:latest                                       "/docker-
 2fce48a4c0f4   opea/codetrans-ui:latest                                  "docker-entrypoint.s…"   37 seconds ago   Up 5 seconds              0.0.0.0:18101->5173/tcp, [::]:18101->5173/tcp                                              codetrans-ui-server
 613c384979f4   opea/codetrans:latest                                     "bash entrypoint.sh"     37 seconds ago   Up 5 seconds              0.0.0.0:18102->8888/tcp, [::]:18102->8888/tcp                                              codetrans-backend-server
 e0ef1ea67640   opea/llm-textgen:latest                                  "bash entrypoint.sh"     37 seconds ago   Up 36 seconds             0.0.0.0:18011->9000/tcp, [::]:18011->9000/tcp                                              codetrans-llm-server
-342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              codetrans-tgi-service
+342f01bfdbb2   ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"python3 /workspace/…"   37 seconds ago   Up 36 seconds             0.0.0.0:18008->8011/tcp, [::]:18008->8011/tcp                                              codetrans-tgi-service
 ```
 
 if used vLLM:

@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: codetrans-xeon-tgi-service
     ports:
       - "8008:80"

@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: codetrans-gaudi-tgi-service
     ports:
       - "8008:80"

@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-textgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
     docker images && sleep 1s
 }
 

@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-textgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"

@@ -23,7 +23,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -3,7 +3,7 @@
 
 services:
   tgi-server:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: docsum-xeon-tgi-server
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80

@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
 
 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.3.1`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
 
 [NOTE]

@@ -30,7 +30,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 3s
 }
 

@@ -39,7 +39,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:1.4
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1
     docker images && sleep 1s
 }
 

@@ -30,7 +30,7 @@ function build_docker_images() {
     service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-rocm"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 3s
 }
 

@@ -77,7 +77,7 @@ After launching your instance, you can connect to it using SSH (for Linux instan
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images

@@ -100,7 +100,7 @@ services:
       timeout: 10s
       retries: 60
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
@@ -156,7 +156,7 @@ services:
     ipc: host
     restart: always
   tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi_service_codegen
     ports:
       - "8028:80"

@@ -23,7 +23,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -91,7 +91,7 @@ services:
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"

@@ -36,7 +36,7 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }