From b72fe10d14d7b7a1a8b835874433ba5e522a1a3f Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Tue, 6 Sep 2022 15:12:13 -0700
Subject: [PATCH] Update README and versions for 22.09 branch

---
 Dockerfile.sdk                                   |  2 +-
 README.md                                        | 10 +++++-----
 build.py                                         | 10 +++++-----
 deploy/aws/values.yaml                           |  2 +-
 deploy/fleetcommand/Chart.yaml                   |  2 +-
 deploy/fleetcommand/values.yaml                  |  6 +++---
 deploy/gcp/values.yaml                           |  2 +-
 .../perf-analyzer-script/triton_client.yaml      |  2 +-
 .../server-deployer/build_and_push.sh            |  6 +++---
 .../server-deployer/chart/triton/Chart.yaml      |  4 ++--
 .../server-deployer/chart/triton/values.yaml     |  6 +++---
 .../server-deployer/data-test/schema.yaml        |  4 ++--
 .../server-deployer/schema.yaml                  |  4 ++--
 docs/customization_guide/build.md                |  6 +++---
 docs/customization_guide/compose.md              | 16 ++++++++--------
 docs/customization_guide/test.md                 |  2 +-
 docs/user_guide/custom_operations.md             |  6 +++---
 docs/user_guide/performance_tuning.md            |  4 ++--
 qa/common/gen_qa_custom_ops                      |  4 ++--
 qa/common/gen_qa_model_repository                |  2 +-
 qa/common/gen_xavier_trt_models                  |  2 +-
 21 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index b97a5ac953..4496229d45 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.08-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:22.09-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main
diff --git a/README.md b/README.md
index 07693f58c3..def3223b79 100644
--- a/README.md
+++ b/README.md
@@ -32,8 +32,8 @@
 
 **LATEST RELEASE: You are currently on the main branch which tracks
 under-development progress towards the next release. The current release is 
-version [2.24.0](https://github.com/triton-inference-server/server/tree/r22.07)
-and corresponds to the 22.07 container release on 
+version [2.26.0](https://github.com/triton-inference-server/server/tree/r22.09)
+and corresponds to the 22.09 container release on 
 [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).**
 
 ----
@@ -84,16 +84,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository 
-git clone -b r22.07 https://github.com/triton-inference-server/server.git
+git clone -b r22.09 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:22.07-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:22.09-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request 
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:22.07-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:22.09-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following
diff --git a/build.py b/build.py
index de8479e652..6b63a037ca 100755
--- a/build.py
+++ b/build.py
@@ -69,8 +69,8 @@
 TRITON_VERSION_MAP = {
     '2.27.0dev': (
         '22.10dev',  # triton container
-        '22.08',  # upstream container
-        '1.12.0',  # ORT
+        '22.09',  # upstream container
+        '1.12.1',  # ORT
         '2022.1.0',  # ORT OpenVINO
         '2022.1.0',  # Standalone OpenVINO
         '2.2.9',  # DCGM version
@@ -1101,9 +1101,9 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
 
 RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
-COPY --from=min_container /usr/local/cuda-11.7/targets/{cuda_arch}-linux/lib/libcudart.so.11.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
-COPY --from=min_container /usr/local/cuda-11.7/targets/{cuda_arch}-linux/lib/libcupti.so.11.7 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
-COPY --from=min_container /usr/local/cuda-11.7/targets/{cuda_arch}-linux/lib/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda-11.8/targets/{cuda_arch}-linux/lib/libcudart.so.11.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda-11.8/targets/{cuda_arch}-linux/lib/libcupti.so.11.8 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda-11.8/targets/{cuda_arch}-linux/lib/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 
 COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8
 
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
index 3469f908d3..183c47a34d 100644
--- a/deploy/aws/values.yaml
+++ b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:22.08-py3
+  imageName: nvcr.io/nvidia/tritonserver:22.09-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
index 68056c66b6..b87b157300 100644
--- a/deploy/fleetcommand/Chart.yaml
+++ b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.25.0"
+appVersion: "2.26.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
index 5915e25d13..1dd8bcc09f 100644
--- a/deploy/fleetcommand/values.yaml
+++ b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:22.08-py3
+  imageName: nvcr.io/nvidia/tritonserver:22.09-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -46,13 +46,13 @@ image:
     # Model Control Mode (Optional, default: none)
     #
     # To set model control mode, uncomment and configure below
-    # See https://github.com/triton-inference-server/server/blob/r22.08/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r22.09/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r22.08/README.md
+    # see https://github.com/triton-inference-server/server/blob/r22.09/README.md
     #  for more details
 
 service:
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
index 3a5743a40b..877d783be4 100644
--- a/deploy/gcp/values.yaml
+++ b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:22.08-py3
+  imageName: nvcr.io/nvidia/tritonserver:22.09-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
index 5e8e38614f..4dcb27f0ce 100644
--- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:22.08-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:22.09-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
index 1156d8c0f3..abe664f0b2 100644
--- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -26,9 +26,9 @@
 
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
-export MAJOR_VERSION=2.25
-export MINOR_VERSION=2.25.0
-export NGC_VERSION=22.08-py3
+export MAJOR_VERSION=2.26
+export MINOR_VERSION=2.26.0
+export NGC_VERSION=22.09-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
index 75f2a4e478..1e16390a2b 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 apiVersion: v1
-appVersion: "2.25"
+appVersion: "2.26"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.25.0
+version: 2.26.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
index bc64a98e19..d61f086c23 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -31,14 +31,14 @@ maxReplicaCount: 3
 tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
-modelRepositoryPath: gs://triton_sample_models/22_08
-publishedVersion: '2.25.0'
+modelRepositoryPath: gs://triton_sample_models/22_09
+publishedVersion: '2.26.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 22.08-py3
+  tag: 22.09-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
index 41172e7803..7112ef1ece 100644
--- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.25.0'
+  publishedVersion: '2.26.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
@@ -89,7 +89,7 @@ properties:
   modelRepositoryPath:
     type: string
     title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
-    default: gs://triton_sample_models/22_08
+    default: gs://triton_sample_models/22_09
   image.ldPreloadPath:
     type: string
     title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
index ff7110dae5..cebbc16a48 100644
--- a/deploy/gke-marketplace-app/server-deployer/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.25.0'
+  publishedVersion: '2.26.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
@@ -89,7 +89,7 @@ properties:
   modelRepositoryPath:
     type: string
     title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
-    default: gs://triton_sample_models/22_08
+    default: gs://triton_sample_models/22_09
   image.ldPreloadPath:
     type: string
     title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md
index 81cd401429..cf9ac119e4 100644
--- a/docs/customization_guide/build.md
+++ b/docs/customization_guide/build.md
@@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta
 
 If you are building on a release branch then `<container tag>` will
 default to the branch name. For example, if you are building on the
-r22.08 branch, `<container tag>` will default to r22.08. If you are
+r22.09 branch, `<container tag>` will default to r22.09. If you are
 building on any other branch (including the *main* branch) then
 `<container tag>` will default to "main". Therefore, you typically do
 not need to provide `<container tag>` at all (nor the preceding
@@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
 If you are building on *main* branch then '<container tag>' will
 default to "main". If you are building on a release branch then
 '<container tag>' will default to the branch name. For example, if you
-are building on the r22.08 branch, '<container tag>' will default to
-r22.08. Therefore, you typically do not need to provide '<container
+are building on the r22.09 branch, '<container tag>' will default to
+r22.09. Therefore, you typically do not need to provide '<container
 tag>' at all (nor the preceding colon). You can use a different
 '<container tag>' for a component to instead use the corresponding
 branch/tag in the build. For example, if you have a branch called
diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
index 6a91ee0f76..1c890c9612 100644
--- a/docs/customization_guide/compose.md
+++ b/docs/customization_guide/compose.md
@@ -44,8 +44,8 @@ from source to get more exact customization.
 The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
 Simply clone the repository and run `compose.py` to create a custom container.
 Note: Created container version will depend on the branch that was cloned.
-For example branch [r22.08](https://github.com/triton-inference-server/server/tree/r22.08)
-should be used to create a image based on the NGC 22.08 Triton release.
+For example branch [r22.09](https://github.com/triton-inference-server/server/tree/r22.09)
+should be used to create a image based on the NGC 22.09 Triton release.
 
 `compose.py` provides `--backend`, `--repoagent` options that allow you to
 specify which backends and repository agents to include in the custom image.
@@ -62,7 +62,7 @@ will provide a container `tritonserver` locally. You can access the container wi
 $ docker run -it tritonserver:latest
 ```
 
-Note: If `compose.py` is run on release versions `r22.08` and earlier,
+Note: If `compose.py` is run on release versions `r22.09` and earlier,
 the resulting container will have DCGM version 2.2.3 installed.
 This may result in different GPU statistic reporting behavior.
 
@@ -76,19 +76,19 @@ For example, running
 ```
 python3 compose.py --backend tensorflow1 --repoagent checksum
 ```
-on branch [r22.08](https://github.com/triton-inference-server/server/tree/r22.08) pulls:
-- `min` container `nvcr.io/nvidia/tritonserver:22.08-py3-min`
-- `full` container `nvcr.io/nvidia/tritonserver:22.08-py3`
+on branch [r22.09](https://github.com/triton-inference-server/server/tree/r22.09) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:22.09-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:22.09-py3`
 
 Alternatively, users can specify the version of Triton container to pull from any branch by either:
 1. Adding flag `--container-version <container version>` to branch
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 22.08
+python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 22.09
 ```
 2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
    The user is responsible for specifying compatible `min` and `full` containers.
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:22.08-py3-min --image full,nvcr.io/nvidia/tritonserver:22.08-py3
+python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:22.09-py3-min --image full,nvcr.io/nvidia/tritonserver:22.09-py3
 ```
 Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.
 
diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md
index 7c464e87e2..e54a2c2c1e 100644
--- a/docs/customization_guide/test.md
+++ b/docs/customization_guide/test.md
@@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops
 ```
 
 This will create multiple model repositories in /tmp/<version>/qa_*
-(for example /tmp/22.08/qa_model_repository).  The TensorRT models
+(for example /tmp/22.09/qa_model_repository).  The TensorRT models
 will be created for the GPU on the system that CUDA considers device 0
 (zero). If you have multiple GPUs on your system see the documentation
 in the scripts for how to target a specific GPU.
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
index cc6c0f75fe..f188c77960 100644
--- a/docs/user_guide/custom_operations.md
+++ b/docs/user_guide/custom_operations.md
@@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is
 to use the [NGC TensorRT
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
 corresponding to the Triton container. For example, if you are using
-the 22.08 version of Triton, use the 22.08 version of the TensorRT
+the 22.09 version of Triton, use the 22.09 version of the TensorRT
 container.
 
 ## TensorFlow
@@ -108,7 +108,7 @@ simple way to ensure you are using the correct version of TensorFlow
 is to use the [NGC TensorFlow
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
 corresponding to the Triton container. For example, if you are using
-the 22.08 version of Triton, use the 22.08 version of the TensorFlow
+the 22.09 version of Triton, use the 22.09 version of the TensorFlow
 container.
 
 ## PyTorch
@@ -152,7 +152,7 @@ simple way to ensure you are using the correct version of PyTorch is
 to use the [NGC PyTorch
 container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
 corresponding to the Triton container. For example, if you are using
-the 22.08 version of Triton, use the 22.08 version of the PyTorch
+the 22.09 version of Triton, use the 22.09 version of the PyTorch
 container.
 
 ## ONNX
diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
index 014c988a64..35ac2a214d 100644
--- a/docs/user_guide/performance_tuning.md
+++ b/docs/user_guide/performance_tuning.md
@@ -150,10 +150,10 @@ These containers can be started interactively instead, but for the sake of demon
 
 ```bash
 # Start server container in the background
-docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:22.08-py3 
+docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:22.09-py3 
 
 # Start client container in the background
-docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:22.08-py3-sdk
+docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:22.09-py3-sdk
 ```
 
 > **Note**
diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
index ace5f92240..a933e125a7 100755
--- a/qa/common/gen_qa_custom_ops
+++ b/qa/common/gen_qa_custom_ops
@@ -37,7 +37,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=22.08}
+TRITON_VERSION=${TRITON_VERSION:=22.09}
 TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$TRITON_VERSION-tf2-py3}
 PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3}
 
@@ -116,7 +116,7 @@ cat >$HOST_SRCDIR/$PYTSCRIPT <<EOF
 #!/bin/bash -x
 set -e
 python3 $SRCDIR/gen_qa_custom_ops_models.py --libtorch --models_dir=$DESTDIR
-cp /root/.cache/torch_extensions/py38_cu117/custom_modulo/custom_modulo.so $DESTDIR/libtorch_modulo/.
+cp /root/.cache/torch_extensions/py38_cu118/custom_modulo/custom_modulo.so $DESTDIR/libtorch_modulo/.
 chmod -R 777 $DESTDIR
 EOF
 
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 7aba299dd0..c069a75bcf 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -48,7 +48,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=22.08}
+TRITON_VERSION=${TRITON_VERSION:=22.09}
 
 # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
 ONNX_VERSION=1.10.1
diff --git a/qa/common/gen_xavier_trt_models b/qa/common/gen_xavier_trt_models
index b6493f0ce4..7a743fc94c 100755
--- a/qa/common/gen_xavier_trt_models
+++ b/qa/common/gen_xavier_trt_models
@@ -50,7 +50,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=22.08}
+TRITON_VERSION=${TRITON_VERSION:=22.09}
 CUDA_DEVICE=0
 
 HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}