diff --git a/examples/vision/classification/paddleclas/serving/README.md b/examples/vision/classification/paddleclas/serving/README.md
index 606770e17c..59e5d2c053 100644
--- a/examples/vision/classification/paddleclas/serving/README.md
+++ b/examples/vision/classification/paddleclas/serving/README.md
@@ -21,12 +21,12 @@ mv ResNet50_vd_infer/inference.pdiparams models/runtime/1/model.pdiparams
 
 # 拉取fastdeploy镜像
 # GPU镜像
-docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 # CPU镜像
-docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10
 
 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10  bash
 
 # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
 CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
diff --git a/examples/vision/detection/yolov5/serving/README.md b/examples/vision/detection/yolov5/serving/README.md
index 521f9c4196..4f95df4411 100644
--- a/examples/vision/detection/yolov5/serving/README.md
+++ b/examples/vision/detection/yolov5/serving/README.md
@@ -14,13 +14,13 @@ wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx
 mv yolov5s.onnx models/runtime/1/model.onnx
 
 # 拉取fastdeploy镜像
-docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 
 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /yolov5_serving 目录
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10  bash
 
 # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
-CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=models --backend-config=python,shm-default-byte-size=10485760
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/yolov5_serving/models --backend-config=python,shm-default-byte-size=10485760
 ```
 >> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务，同时更改yolov5_grpc_client.py中的请求端口号
 
diff --git a/examples/vision/detection/yolov5/serving/README_EN.md b/examples/vision/detection/yolov5/serving/README_EN.md
index db110efabd..cc85355d89 100644
--- a/examples/vision/detection/yolov5/serving/README_EN.md
+++ b/examples/vision/detection/yolov5/serving/README_EN.md
@@ -10,10 +10,10 @@ wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx
 mv yolov5s.onnx models/infer/1/
 
 # Pull fastdeploy image
-docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 
 # Run the docker. The docker name is fd_serving, and the current directory is mounted as the docker's /yolov5_serving directory
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10  bash
 
 # Start the service (Without setting the CUDA_VISIBLE_DEVICES environment variable, it will have scheduling privileges for all GPU cards)
 CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=models --backend-config=python,shm-default-byte-size=10485760
diff --git a/serving/Dockerfile b/serving/Dockerfile
index eb27ca4eb6..cca94ebe93 100644
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -33,7 +33,7 @@ RUN apt-get update \
     && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9
 
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev \
+    && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
     && python3 -m pip install -U pip \
     && python3 -m pip install paddlepaddle-gpu paddlenlp faster_tokenizer
 
diff --git a/serving/Dockerfile_cpu b/serving/Dockerfile_cpu
index e671f03a17..9e6d8b0ef4 100644
--- a/serving/Dockerfile_cpu
+++ b/serving/Dockerfile_cpu
@@ -17,7 +17,7 @@ FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
 ENV TZ=Asia/Shanghai \
     DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
+RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 ffmpeg libsm6 libxext6 \
     && python3 -m pip install -U pip \
     && python3 -m pip install paddlepaddle paddlenlp faster_tokenizer 
 
diff --git a/serving/README_CN.md b/serving/README_CN.md
index b26b4599fb..377698873a 100644
--- a/serving/README_CN.md
+++ b/serving/README_CN.md
@@ -17,13 +17,13 @@ FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-se
 #### CPU镜像
 CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署，支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
 ``` shell
-docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10
 ```
 
 #### GPU镜像
 GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署，支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
 ```
-docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 ```
 
 用户也可根据自身需求，参考如下文档自行编译镜像
@@ -33,4 +33,12 @@ docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
 - [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录)
 - [服务化部署配置说明](docs/zh_CN/model_configuration.md)  (说明runtime的配置选项)
 - [服务化部署示例](docs/zh_CN/demo.md)
-  - [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md)
+
+### 模型示例列表
+
+| 任务场景 | 模型  |
+|---|---|
+| Classification | [PaddleClas](../examples/vision/classification/paddleclas/serving/README.md) |
+| Detection | [ultralytics/YOLOv5](../examples/vision/detection/yolov5/serving/README.md) |
+| NLP |	[PaddleNLP/ERNIE-3.0](../examples/text/ernie-3.0/serving/README.md)|
+| Speech |	[PaddleSpeech/PP-TTS](../examples/audio/pp-tts/serving/README.md)|
diff --git a/serving/README_EN.md b/serving/README_EN.md
index b7bb028f5f..30bc405c0f 100644
--- a/serving/README_EN.md
+++ b/serving/README_EN.md
@@ -20,7 +20,7 @@ FastDeploy builds an end-to-end serving deployment based on [Triton Inference Se
 CPU images only support Paddle/ONNX models for serving deployment on CPUs, and supported inference backends include OpenVINO, Paddle Inference, and ONNX Runtime
 
 ```shell
-docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10
 ```
 
 #### GPU Image
@@ -28,7 +28,7 @@ docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10
 GPU images support Paddle/ONNX models for serving deployment on GPU and CPU, and supported inference backends including OpenVINO, TensorRT, Paddle Inference, and ONNX Runtime
 
 ```
-docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 ```
 
 Users can also compile the image by themselves according to their own needs, referring to the following documents:
@@ -41,3 +41,12 @@ Users can also compile the image by themselves according to their own needs, ref
 - [Serving Deployment Configuration for Runtime](docs/zh_CN/model_configuration.md)
 - [Serving Deployment Demo](docs/zh_CN/demo.md)
   - [YOLOV5 - Detection Task](../examples/vision/detection/yolov5/serving/README.md)
+
+### Model List
+
+| Task | Model  |
+|---|---|
+| Classification | [PaddleClas](../examples/vision/classification/paddleclas/serving/README.md) |
+| Detection | [ultralytics/YOLOv5](../examples/vision/detection/yolov5/serving/README.md) |
+| NLP |	[PaddleNLP/ERNIE-3.0](../examples/text/ernie-3.0/serving/README.md)|
+| Speech |	[PaddleSpeech/PP-TTS](../examples/audio/pp-tts/serving/README.md)|
diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md
index fd72d76734..32476c19a8 100644
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -13,7 +13,7 @@ bash scripts/build.sh
 
 # Exit to the FastDeploy home directory and create the image
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
+docker build -t paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
 ```
 
 ## CPU Image
@@ -26,7 +26,7 @@ bash scripts/build.sh OFF
 
 # Exit to the FastDeploy home directory and create the image
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 -f serving/Dockerfile_cpu .
+docker build -t paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 -f serving/Dockerfile_cpu .
 ```
 
 ## IPU Image
@@ -38,5 +38,5 @@ bash scripts/build_fd_ipu.sh
 
 # Exit to the FastDeploy home directory and create the image
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-ipu-only-21.10 -f serving/Dockerfile_ipu .
+docker build -t paddlepaddle/fastdeploy:0.6.0-ipu-only-21.10 -f serving/Dockerfile_ipu .
 ```
diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md
index 21a9c2ab43..ebbb25da44 100644
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -13,7 +13,7 @@ bash scripts/build.sh
 
 # 退出到FastDeploy主目录，制作镜像
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
+docker build -t paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
 ```
 
 ## 制作CPU镜像
@@ -25,7 +25,7 @@ bash scripts/build.sh OFF
 
 # 退出到FastDeploy主目录，制作镜像
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 -f serving/Dockerfile_cpu .
+docker build -t paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 -f serving/Dockerfile_cpu .
 ```
 
 ## 制作IPU镜像
@@ -37,5 +37,5 @@ bash scripts/build_fd_ipu.sh
 
 # 退出到FastDeploy主目录，制作镜像
 cd ../
-docker build -t paddlepaddle/fastdeploy:0.3.0-ipu-only-21.10 -f serving/Dockerfile_ipu .
+docker build -t paddlepaddle/fastdeploy:0.6.0-ipu-only-21.10 -f serving/Dockerfile_ipu .
 ```
diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh
index e09af35d80..465e3f03af 100644
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -30,7 +30,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then
     rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
 fi
 
-nvidia-docker run -it --rm --name build_fd_vison \
+nvidia-docker run -it --rm --name build_fd \
            -v`pwd`/..:/workspace/fastdeploy \
            nvcr.io/nvidia/tritonserver:21.10-py3-min \
            bash -c \
@@ -50,7 +50,8 @@ nvidia-docker run -it --rm --name build_fd_vison \
             export ENABLE_TEXT=ON;
             python setup.py build;
             python setup.py bdist_wheel;
-            cd ../;rm -rf build; mkdir -p build;cd build;
+            cd /workspace/fastdeploy;
+            rm -rf build; mkdir -p build;cd build;
             cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
             make -j`nproc`;
             make install;
@@ -65,7 +66,7 @@ else
 
 echo "start build FD CPU library"
 
-docker run -it --rm --name build_fd_vison \
+docker run -it --rm --name build_fd \
            -v`pwd`/..:/workspace/fastdeploy \
            paddlepaddle/fastdeploy:21.10-cpu-only-buildbase \
            bash -c \