diff --git a/Dockerfile.sdk b/Dockerfile.sdk index ddd1374f6d..e79ef3b7e7 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.11-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.12-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_COMMON_REPO_TAG=main diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min index a55e1431b8..26812def15 100644 --- a/Dockerfile.win10.min +++ b/Dockerfile.win10.min @@ -100,27 +100,27 @@ ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-11.8.cudnn8 LABEL TENSORRT_VERSION="${TENSORRT_VERSION}" -ARG CUDNN_VERSION=8.7.0.80 +ARG CUDNN_VERSION=8.7.0.84 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda11-archive.zip LABEL CUDNN_VERSION="${CUDNN_VERSION}" ADD https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/network_installers/cuda_${CUDA_VERSION}_windows_network.exe cuda_${CUDA_VERSION}_windows_network.exe -COPY ${CUDNN_ZIP} . -COPY ${TENSORRT_ZIP} . +COPY ${CUDNN_ZIP} /tmp/${CUDNN_ZIP} +COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP} LABEL CUDA_VERSION="${CUDA_VERSION}" -RUN unzip %TENSORRT_ZIP% -RUN move TensorRT-%TENSORRT_VERSION% TensorRT +RUN unzip /tmp/%TENSORRT_ZIP% +RUN move TensorRT-* TensorRT ENV TRT_VERSION ${TENSORRT_VERSION} RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES% -RUN unzip %CUDNN_ZIP% -RUN move %CUDNN_DIR% cudnn +RUN unzip /tmp/%CUDNN_ZIP% +RUN move cudnn-* cudnn RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\." -RUN copy cudnn\lib\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\." +RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\." RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\." ENV CUDNN_VERSION ${CUDNN_VERSION} diff --git a/README.md b/README.md index 31d066b0a8..1f1ea9a53f 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ **LATEST RELEASE: You are currently on the main branch which tracks under-development progress towards the next release. The current release is -version [2.28.0](https://github.com/triton-inference-server/server/tree/r22.11) -and corresponds to the 22.11 container release on +version [2.29.0](https://github.com/triton-inference-server/server/tree/r22.12) +and corresponds to the 22.12 container release on [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).** ---- @@ -84,16 +84,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r22.11 https://github.com/triton-inference-server/server.git +git clone -b r22.12 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:22.11-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:22.12-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:22.11-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:22.12-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following diff --git a/TRITON_VERSION b/TRITON_VERSION index c1b8c643a2..fd88889280 100644 --- a/TRITON_VERSION +++ b/TRITON_VERSION @@ -1 +1 @@ -2.29.0dev +2.30.0dev diff --git a/build.py b/build.py index 6f0f249a12..9e65ea0405 100755 --- a/build.py +++ b/build.py @@ -67,9 +67,9 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - '2.29.0dev': ( - '22.12dev', # triton container - '22.11', # upstream container + '2.30.0dev': ( + '23.01dev', # triton container + '22.12', # upstream container '1.13.1', # ORT '2022.1.0', # ORT OpenVINO '2022.1.0', # Standalone OpenVINO diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index 2a7de51a98..125dacc885 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:22.11-py3 + imageName: nvcr.io/nvidia/tritonserver:22.12-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index 718b74d68b..d0e48380fa 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.28.0" +appVersion: "2.29.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index 437bd0ca56..7bd48166ca 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:22.11-py3 + imageName: nvcr.io/nvidia/tritonserver:22.12-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -46,13 +46,13 @@ image: # Model Control Mode (Optional, default: none) # # To set model control mode, uncomment and configure below - # See https://github.com/triton-inference-server/server/blob/r22.11/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r22.12/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r22.11/README.md + # see https://github.com/triton-inference-server/server/blob/r22.12/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index 6cae0a75e2..806d6aadae 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:22.11-py3 + imageName: nvcr.io/nvidia/tritonserver:22.12-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index 5647eb988e..7719733e4f 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:22.11-py3-sdk + - image: nvcr.io/nvidia/tritonserver:22.12-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 8fb495ca47..e2b0d0ed9a 100644 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -26,9 +26,9 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver -export MAJOR_VERSION=2.28 -export MINOR_VERSION=2.28.0 -export NGC_VERSION=22.11-py3 +export MAJOR_VERSION=2.29 +export MINOR_VERSION=2.29.0 +export NGC_VERSION=22.12-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index 434fc6a1ed..ec6816ca14 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. apiVersion: v1 -appVersion: "2.28" +appVersion: "2.29" description: Triton Inference Server name: triton-inference-server -version: 2.28.0 +version: 2.29.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index 1d8b3aa1fb..1a0f90de19 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -31,14 +31,14 @@ maxReplicaCount: 3 tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 -modelRepositoryPath: gs://triton_sample_models/22_11 -publishedVersion: '2.28.0' +modelRepositoryPath: gs://triton_sample_models/22_12 +publishedVersion: '2.29.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 22.11-py3 + tag: 22.12-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index de23289a95..75ca62f1e4 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.28.0' + publishedVersion: '2.29.0' publishedVersionMetadata: releaseNote: >- Initial release. @@ -89,7 +89,7 @@ properties: modelRepositoryPath: type: string title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc. - default: gs://triton_sample_models/22_11 + default: gs://triton_sample_models/22_12 image.ldPreloadPath: type: string title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index 6692d99d08..8c7a3b299a 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.28.0' + publishedVersion: '2.29.0' publishedVersionMetadata: releaseNote: >- Initial release. @@ -89,7 +89,7 @@ properties: modelRepositoryPath: type: string title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc. - default: gs://triton_sample_models/22_11 + default: gs://triton_sample_models/22_12 image.ldPreloadPath: type: string title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable. diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index 6dce808220..cc7c8361c4 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common: --repo-tag=core:` will default to the branch name. For example, if you are building on the -r22.11 branch, `` will default to r22.11. If you are +r22.12 branch, `` will default to r22.12. If you are building on any other branch (including the *main* branch) then `` will default to "main". Therefore, you typically do not need to provide `` at all (nor the preceding @@ -334,8 +334,8 @@ python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild If you are building on *main* branch then '' will default to "main". If you are building on a release branch then '' will default to the branch name. For example, if you -are building on the r22.11 branch, '' will default to -r22.11. Therefore, you typically do not need to provide '' will default to +r22.12. Therefore, you typically do not need to provide '' at all (nor the preceding colon). You can use a different '' for a component to instead use the corresponding branch/tag in the build. For example, if you have a branch called diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md index 40e533e7a8..5f978e0c1e 100644 --- a/docs/customization_guide/compose.md +++ b/docs/customization_guide/compose.md @@ -44,8 +44,8 @@ from source to get more exact customization. The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server). Simply clone the repository and run `compose.py` to create a custom container. Note: Created container version will depend on the branch that was cloned. -For example branch [r22.11](https://github.com/triton-inference-server/server/tree/r22.11) -should be used to create a image based on the NGC 22.11 Triton release. +For example branch [r22.12](https://github.com/triton-inference-server/server/tree/r22.12) +should be used to create a image based on the NGC 22.12 Triton release. `compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. @@ -62,7 +62,7 @@ will provide a container `tritonserver` locally. You can access the container wi $ docker run -it tritonserver:latest ``` -Note: If `compose.py` is run on release versions `r22.11` and earlier, +Note: If `compose.py` is run on release versions `r22.12` and earlier, the resulting container will have DCGM version 2.2.3 installed. This may result in different GPU statistic reporting behavior. @@ -76,19 +76,19 @@ For example, running ``` python3 compose.py --backend tensorflow1 --repoagent checksum ``` -on branch [r22.11](https://github.com/triton-inference-server/server/tree/r22.11) pulls: -- `min` container `nvcr.io/nvidia/tritonserver:22.11-py3-min` -- `full` container `nvcr.io/nvidia/tritonserver:22.11-py3` +on branch [r22.12](https://github.com/triton-inference-server/server/tree/r22.12) pulls: +- `min` container `nvcr.io/nvidia/tritonserver:22.12-py3-min` +- `full` container `nvcr.io/nvidia/tritonserver:22.12-py3` Alternatively, users can specify the version of Triton container to pull from any branch by either: 1. Adding flag `--container-version ` to branch ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 22.11 +python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 22.12 ``` 2. Specifying `--image min, --image full,`. The user is responsible for specifying compatible `min` and `full` containers. ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:22.11-py3-min --image full,nvcr.io/nvidia/tritonserver:22.11-py3 +python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:22.12-py3-min --image full,nvcr.io/nvidia/tritonserver:22.12-py3 ``` Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified. diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md index 60876c9eae..ded7a2e36c 100644 --- a/docs/customization_guide/test.md +++ b/docs/customization_guide/test.md @@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops ``` This will create multiple model repositories in /tmp//qa_* -(for example /tmp/22.11/qa_model_repository). The TensorRT models +(for example /tmp/22.12/qa_model_repository). The TensorRT models will be created for the GPU on the system that CUDA considers device 0 (zero). If you have multiple GPUs on your system see the documentation in the scripts for how to target a specific GPU. diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md index cb53cc02a1..42d904464c 100644 --- a/docs/user_guide/custom_operations.md +++ b/docs/user_guide/custom_operations.md @@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is to use the [NGC TensorRT container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) corresponding to the Triton container. For example, if you are using -the 22.11 version of Triton, use the 22.11 version of the TensorRT +the 22.12 version of Triton, use the 22.12 version of the TensorRT container. ## TensorFlow @@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow is to use the [NGC TensorFlow container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow) corresponding to the Triton container. For example, if you are using -the 22.11 version of Triton, use the 22.11 version of the TensorFlow +the 22.12 version of Triton, use the 22.12 version of the TensorFlow container. ## PyTorch @@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is to use the [NGC PyTorch container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) corresponding to the Triton container. For example, if you are using -the 22.11 version of Triton, use the 22.11 version of the PyTorch +the 22.12 version of Triton, use the 22.12 version of the PyTorch container. ## ONNX diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md index 9113bd64ad..eda2613055 100644 --- a/docs/user_guide/performance_tuning.md +++ b/docs/user_guide/performance_tuning.md @@ -150,10 +150,10 @@ These containers can be started interactively instead, but for the sake of demon ```bash # Start server container in the background -docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:22.11-py3 +docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:22.12-py3 # Start client container in the background -docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:22.11-py3-sdk +docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:22.12-py3-sdk ``` > **Note** diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops index e013a2dee7..4f55c67c75 100755 --- a/qa/common/gen_qa_custom_ops +++ b/qa/common/gen_qa_custom_ops @@ -37,7 +37,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=22.11} +TRITON_VERSION=${TRITON_VERSION:=22.12} NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3} diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 9cdeac74ef..5995b31fef 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -48,7 +48,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=22.11} +TRITON_VERSION=${TRITON_VERSION:=22.12} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version ONNX_VERSION=1.10.1 diff --git a/qa/common/gen_xavier_trt_models b/qa/common/gen_xavier_trt_models index 3a68045aa0..c7d943861d 100755 --- a/qa/common/gen_xavier_trt_models +++ b/qa/common/gen_xavier_trt_models @@ -50,7 +50,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=22.11} +TRITON_VERSION=${TRITON_VERSION:=22.12} CUDA_DEVICE=${RUNNER_ID:=0} HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}