diff --git a/examples/vision/classification/paddleclas/serving/README.md b/examples/vision/classification/paddleclas/serving/README.md index 606770e17c..59e5d2c053 100644 --- a/examples/vision/classification/paddleclas/serving/README.md +++ b/examples/vision/classification/paddleclas/serving/README.md @@ -21,12 +21,12 @@ mv ResNet50_vd_infer/inference.pdiparams models/runtime/1/model.pdiparams # 拉取fastdeploy镜像 # GPU镜像 -docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 # CPU镜像 -docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录 -nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 bash +nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 bash # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量,会拥有所有GPU卡的调度权限) CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760 diff --git a/examples/vision/detection/yolov5/serving/README.md b/examples/vision/detection/yolov5/serving/README.md index 521f9c4196..4f95df4411 100644 --- a/examples/vision/detection/yolov5/serving/README.md +++ b/examples/vision/detection/yolov5/serving/README.md @@ -14,13 +14,13 @@ wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx mv yolov5s.onnx models/runtime/1/model.onnx # 拉取fastdeploy镜像 -docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /yolov5_serving 目录 -nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 bash +nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 bash # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量,会拥有所有GPU卡的调度权限) -CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=models --backend-config=python,shm-default-byte-size=10485760 +CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/yolov5_serving/models --backend-config=python,shm-default-byte-size=10485760 ``` >> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务,同时更改yolov5_grpc_client.py中的请求端口号 diff --git a/examples/vision/detection/yolov5/serving/README_EN.md b/examples/vision/detection/yolov5/serving/README_EN.md index db110efabd..cc85355d89 100644 --- a/examples/vision/detection/yolov5/serving/README_EN.md +++ b/examples/vision/detection/yolov5/serving/README_EN.md @@ -10,10 +10,10 @@ wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx mv yolov5s.onnx models/infer/1/ # Pull fastdeploy image -docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 # Run the docker. The docker name is fd_serving, and the current directory is mounted as the docker's /yolov5_serving directory -nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 bash +nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 bash # Start the service (Without setting the CUDA_VISIBLE_DEVICES environment variable, it will have scheduling privileges for all GPU cards) CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=models --backend-config=python,shm-default-byte-size=10485760 diff --git a/serving/Dockerfile b/serving/Dockerfile index eb27ca4eb6..cca94ebe93 100644 --- a/serving/Dockerfile +++ b/serving/Dockerfile @@ -33,7 +33,7 @@ RUN apt-get update \ && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9 RUN apt-get update \ - && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev \ + && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \ && python3 -m pip install -U pip \ && python3 -m pip install paddlepaddle-gpu paddlenlp faster_tokenizer diff --git a/serving/Dockerfile_cpu b/serving/Dockerfile_cpu index e671f03a17..9e6d8b0ef4 100644 --- a/serving/Dockerfile_cpu +++ b/serving/Dockerfile_cpu @@ -17,7 +17,7 @@ FROM paddlepaddle/fastdeploy:22.09-cpu-only-min ENV TZ=Asia/Shanghai \ DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \ +RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 ffmpeg libsm6 libxext6 \ && python3 -m pip install -U pip \ && python3 -m pip install paddlepaddle paddlenlp faster_tokenizer diff --git a/serving/README_CN.md b/serving/README_CN.md index b26b4599fb..377698873a 100644 --- a/serving/README_CN.md +++ b/serving/README_CN.md @@ -17,13 +17,13 @@ FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-se #### CPU镜像 CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署,支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime ``` shell -docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 ``` #### GPU镜像 GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署,支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime ``` -docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 ``` 用户也可根据自身需求,参考如下文档自行编译镜像 @@ -33,4 +33,12 @@ docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 - [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录) - [服务化部署配置说明](docs/zh_CN/model_configuration.md) (说明runtime的配置选项) - [服务化部署示例](docs/zh_CN/demo.md) - - [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md) + +### 模型示例列表 + +| 任务场景 | 模型 | +|---|---| +| Classification | [PaddleClas](../examples/vision/classification/paddleclas/serving/README.md) | +| Detection | [ultralytics/YOLOv5](../examples/vision/detection/yolov5/serving/README.md) | +| NLP | [PaddleNLP/ERNIE-3.0](../examples/text/ernie-3.0/serving/README.md)| +| Speech | [PaddleSpeech/PP-TTS](../examples/audio/pp-tts/serving/README.md)| diff --git a/serving/README_EN.md b/serving/README_EN.md index b7bb028f5f..30bc405c0f 100644 --- a/serving/README_EN.md +++ b/serving/README_EN.md @@ -20,7 +20,7 @@ FastDeploy builds an end-to-end serving deployment based on [Triton Inference Se CPU images only support Paddle/ONNX models for serving deployment on CPUs, and supported inference backends include OpenVINO, Paddle Inference, and ONNX Runtime ```shell -docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 ``` #### GPU Image @@ -28,7 +28,7 @@ docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 GPU images support Paddle/ONNX models for serving deployment on GPU and CPU, and supported inference backends including OpenVINO, TensorRT, Paddle Inference, and ONNX Runtime ``` -docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 +docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 ``` Users can also compile the image by themselves according to their own needs, referring to the following documents: @@ -41,3 +41,12 @@ Users can also compile the image by themselves according to their own needs, ref - [Serving Deployment Configuration for Runtime](docs/zh_CN/model_configuration.md) - [Serving Deployment Demo](docs/zh_CN/demo.md) - [YOLOV5 - Detection Task](../examples/vision/detection/yolov5/serving/README.md) + +### Model List + +| Task | Model | +|---|---| +| Classification | [PaddleClas](../examples/vision/classification/paddleclas/serving/README.md) | +| Detection | [ultralytics/YOLOv5](../examples/vision/detection/yolov5/serving/README.md) | +| NLP | [PaddleNLP/ERNIE-3.0](../examples/text/ernie-3.0/serving/README.md)| +| Speech | [PaddleSpeech/PP-TTS](../examples/audio/pp-tts/serving/README.md)| diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md index fd72d76734..32476c19a8 100644 --- a/serving/docs/EN/compile-en.md +++ b/serving/docs/EN/compile-en.md @@ -13,7 +13,7 @@ bash scripts/build.sh # Exit to the FastDeploy home directory and create the image cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . +docker build -t paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . ``` ## CPU Image @@ -26,7 +26,7 @@ bash scripts/build.sh OFF # Exit to the FastDeploy home directory and create the image cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 -f serving/Dockerfile_cpu . +docker build -t paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 -f serving/Dockerfile_cpu . ``` ## IPU Image @@ -38,5 +38,5 @@ bash scripts/build_fd_ipu.sh # Exit to the FastDeploy home directory and create the image cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-ipu-only-21.10 -f serving/Dockerfile_ipu . +docker build -t paddlepaddle/fastdeploy:0.6.0-ipu-only-21.10 -f serving/Dockerfile_ipu . ``` diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md index 21a9c2ab43..ebbb25da44 100644 --- a/serving/docs/zh_CN/compile.md +++ b/serving/docs/zh_CN/compile.md @@ -13,7 +13,7 @@ bash scripts/build.sh # 退出到FastDeploy主目录,制作镜像 cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . +docker build -t paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile . ``` ## 制作CPU镜像 @@ -25,7 +25,7 @@ bash scripts/build.sh OFF # 退出到FastDeploy主目录,制作镜像 cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-cpu-only-21.10 -f serving/Dockerfile_cpu . +docker build -t paddlepaddle/fastdeploy:0.6.0-cpu-only-21.10 -f serving/Dockerfile_cpu . ``` ## 制作IPU镜像 @@ -37,5 +37,5 @@ bash scripts/build_fd_ipu.sh # 退出到FastDeploy主目录,制作镜像 cd ../ -docker build -t paddlepaddle/fastdeploy:0.3.0-ipu-only-21.10 -f serving/Dockerfile_ipu . +docker build -t paddlepaddle/fastdeploy:0.6.0-ipu-only-21.10 -f serving/Dockerfile_ipu . ``` diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh index e09af35d80..465e3f03af 100644 --- a/serving/scripts/build.sh +++ b/serving/scripts/build.sh @@ -30,7 +30,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz fi -nvidia-docker run -it --rm --name build_fd_vison \ +nvidia-docker run -it --rm --name build_fd \ -v`pwd`/..:/workspace/fastdeploy \ nvcr.io/nvidia/tritonserver:21.10-py3-min \ bash -c \ @@ -50,7 +50,8 @@ nvidia-docker run -it --rm --name build_fd_vison \ export ENABLE_TEXT=ON; python setup.py build; python setup.py bdist_wheel; - cd ../;rm -rf build; mkdir -p build;cd build; + cd /workspace/fastdeploy; + rm -rf build; mkdir -p build;cd build; cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; make -j`nproc`; make install; @@ -65,7 +66,7 @@ else echo "start build FD CPU library" -docker run -it --rm --name build_fd_vison \ +docker run -it --rm --name build_fd \ -v`pwd`/..:/workspace/fastdeploy \ paddlepaddle/fastdeploy:21.10-cpu-only-buildbase \ bash -c \