diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml
index ce2cb1e142..14b944e8f2 100644
--- a/.github/actions/deploy-ubuntu/action.yml
+++ b/.github/actions/deploy-ubuntu/action.yml
@@ -41,18 +41,18 @@ runs:
           export ARCH=arm64
           export ARCH_CUDA=sbsa
           export PREFIX=aarch64-linux-gnu
-          export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.aarch64.rpm
-          export CUDNN=cuda-12-9.3.0.75-1.aarch64
-          export NCCL=2.22.3-1+cuda12.5.aarch64
-          export NVCOMP=nvcomp-linux-sbsa-4.0.0-cuda12.5
+          export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.aarch64.rpm
+          export CUDNN=cuda-12-9.5.1.17-1.aarch64
+          export NCCL=2.23.4-1+cuda12.6.aarch64
+          export NVCOMP=nvcomp-linux-sbsa-4.0.1-cuda12.x
           export USERLAND_BUILDME="buildme --aarch64"
         elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
           export ARCH=ppc64el
           export ARCH_CUDA=ppc64le
           export PREFIX=powerpc64le-linux-gnu
-          export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.ppc64le.rpm
-          export CUDNN=cuda-12-9.3.0.75-1.ppc64le
-          export NCCL=2.22.3-1+cuda12.5.ppc64le
+          export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.ppc64le.rpm
+          export CUDNN=cuda-12-9.5.1.17-1.ppc64le
+          export NCCL=2.23.4-1+cuda12.6.ppc64le
         elif [[ "$CI_DEPLOY_PLATFORM" == "linux-riscv64" ]]; then
           export ARCH=riscv64
           export PREFIX=riscv64-linux-gnu
@@ -63,10 +63,10 @@ runs:
           export ARCH=amd64
           export ARCH_CUDA=x86_64
           export PREFIX=x86_64-linux-gnu
-          export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.x86_64.rpm
-          export CUDNN=cuda-12-9.3.0.75-1.x86_64
-          export NCCL=2.22.3-1+cuda12.5.x86_64
-          export NVCOMP=nvcomp-linux-x86_64-4.0.0-cuda12.5
+          export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.x86_64.rpm
+          export CUDNN=cuda-12-9.5.1.17-1.x86_64
+          export NCCL=2.23.4-1+cuda12.6.x86_64
+          export NVCOMP=nvcomp-linux-x86_64-4.0.1-cuda12.x
         fi
         echo "ARCH=$ARCH" >> $GITHUB_ENV
         echo "PREFIX=$PREFIX" >> $GITHUB_ENV
@@ -165,7 +165,7 @@ runs:
 
         if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
           echo Installing CUDA, cuDNN, nvCOMP, etc
-          curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/$CUDA
+          curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/$CUDA
           curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-$CUDNN.rpm
           curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-devel-$CUDNN.rpm
           curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libnccl-$NCCL.rpm
@@ -183,7 +183,7 @@ runs:
           for f in /usr/local/cuda/lib64/libcudnn*so.9.*; do $SUDO ln -sf $f ${f:0:${#f}-4}; $SUDO ln -sf $f ${f:0:${#f}-6}; done
 
           if [[ -n ${NVCOMP:-} ]]; then
-            curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/$NVCOMP.tar.gz
+            curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/$NVCOMP.tar.gz
             $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=1 lib/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=2 nvcomp/lib/
             $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=1 include/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=2 nvcomp/include/
             rm -f $NVCOMP.tar.gz
@@ -213,16 +213,16 @@ runs:
         if [[ "$CI_DEPLOY_PLATFORM" == "linux-arm64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
           echo Installing TensorRT
           # python3 -m gdown 1LZRCv4ZAGiDQAu4pvADJIGntq4cGl5tU
-          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz
-          $SUDO tar -hxvf TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz -C /usr/local/
+          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz
+          $SUDO tar -hxvf TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/
           $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
         fi
 
         if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
           echo Installing TensorRT
           # python3 -m gdown 1dVhD-DEYY42QbZe1GXl-vxe3k6KqWGsL
-          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz
-          $SUDO tar -hxvf TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz -C /usr/local/
+          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz
+          $SUDO tar -hxvf TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/
           $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
         fi
 
diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml
index 3783ecc388..1869894efa 100644
--- a/.github/actions/deploy-windows/action.yml
+++ b/.github/actions/deploy-windows/action.yml
@@ -99,22 +99,22 @@ runs:
         if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" (
           echo Installing CUDA, cuDNN, nvCOMP, etc
           curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
-          curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/cuda_12.6.0_560.76_windows.exe
-          curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip
+          curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.94_windows.exe
+          curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
           curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
-          curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/nvcomp-windows-x86_64-4.0.0-cuda12.5.zip
+          curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
           cuda_11.8.0_522.06_windows.exe -s
           bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
           bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
-          cuda_12.6.0_560.76_windows.exe -s
+          cuda_12.6.2_560.94_windows.exe -s
           bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old' 'C:/Program Files/NVIDIA Corporation/NvToolsExt'"
           bash -c "ls 'C:/Program Files/NVIDIA Corporation/NvToolsExt'"
-          unzip cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip
+          unzip cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
           unzip zlib123dllx64.zip
-          unzip nvcomp-windows-x86_64-4.0.0-cuda12.5.zip
-          move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
-          move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
-          move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
+          unzip nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
+          move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
+          move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
+          move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
           move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
           move nvcomp\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
           move nvcomp\include\device "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
@@ -144,9 +144,9 @@ runs:
         if "%CI_DEPLOY_MODULE%"=="tensorrt" (
           echo Installing TensorRT
           rem python -m gdown 1GfmJ1BKbacLpUU-0i_mGu0sjrAS0Xzzi
-          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip
-          unzip TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip
-          move TensorRT-10.3.0.26 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
+          curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
+          unzip TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
+          move TensorRT-10.5.0.18 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
         )
 
         if "%CI_DEPLOY_MODULE%"=="mkl" (
@@ -221,7 +221,7 @@ runs:
           set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
           set "CUDA_PATH_V12_6=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
           set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\libnvvp;%PATH%"
-          echo CUDA Version 12.6.0>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt"
+          echo CUDA Version 12.6.2>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt"
         )
         set "CCACHE_DIR=%USERPROFILE%\ccache"
         set "PATH=C:\hostedtoolcache\windows\Python\3.9.13\x64;C:\msys64\%MSYSTEM%\bin;C:\msys64\usr\bin;%ProgramFiles%\apache-maven-3.6.3\bin;%PATH%"
diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml
index d04f7b44a6..9c1cfa0c28 100644
--- a/.github/workflows/tritonserver.yml
+++ b/.github/workflows/tritonserver.yml
@@ -19,6 +19,6 @@ env:
 jobs:
   linux-x86_64:
     runs-on: ubuntu-20.04
-    container: nvcr.io/nvidia/tritonserver:24.07-tf2-python-py3
+    container: nvcr.io/nvidia/tritonserver:24.09-tf2-python-py3
     steps:
       - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5a91468477..78f7f723aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,7 @@
  * Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481))
  * Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472))
  * Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475))
- * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.0, cuDNN 9.3.0, NCCL 2.22.3, nvCOMP 4.0.0, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.3.0.26, Triton Inference Server 2.48.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
+ * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.0.1, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.50.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
 
 ### January 29, 2024 version 1.5.10
  * Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463))
diff --git a/README.md b/README.md
index a1e3e768d4..8f477741dc 100644
--- a/README.md
+++ b/README.md
@@ -217,8 +217,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * Caffe 1.0  https://github.com/BVLC/caffe
  * OpenPose 1.7.0  https://github.com/CMU-Perceptual-Computing-Lab/openpose
  * CUDA 12.6.x  https://developer.nvidia.com/cuda-downloads
-   * cuDNN 9.3.x  https://developer.nvidia.com/cudnn
-   * NCCL 2.22.x  https://developer.nvidia.com/nccl
+   * cuDNN 9.5.x  https://developer.nvidia.com/cudnn
+   * NCCL 2.23.x  https://developer.nvidia.com/nccl
    * nvCOMP 4.0.x https://developer.nvidia.com/nvcomp
  * NVIDIA Video Codec SDK 12.2.x  https://developer.nvidia.com/nvidia-video-codec-sdk
  * OpenCL 3.0.x  https://github.com/KhronosGroup/OpenCL-ICD-Loader
@@ -227,8 +227,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * SentencePiece 0.2.0  https://github.com/google/sentencepiece
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.18.x  https://github.com/tensorflow/tensorflow
- * TensorRT 10.3.x  https://developer.nvidia.com/tensorrt
- * Triton Inference Server 2.48.x  https://developer.nvidia.com/nvidia-triton-inference-server
+ * TensorRT 10.5.x  https://developer.nvidia.com/tensorrt
+ * Triton Inference Server 2.50.x  https://developer.nvidia.com/nvidia-triton-inference-server
  * The Arcade Learning Environment 0.8.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * DepthAI 2.24.x  https://github.com/luxonis/depthai-core
  * ONNX 1.17.x  https://github.com/onnx/onnx
diff --git a/cuda/README.md b/cuda/README.md
index a969df66ec..598d815f7b 100644
--- a/cuda/README.md
+++ b/cuda/README.md
@@ -25,10 +25,10 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * CUDA 12.6.0  https://developer.nvidia.com/cuda-zone
- * cuDNN 9.3.0  https://developer.nvidia.com/cudnn
- * NCCL 2.22.3  https://developer.nvidia.com/nccl
- * nvCOMP 4.0.0  https://developer.nvidia.com/nvcomp
+ * CUDA 12.6.2  https://developer.nvidia.com/cuda-zone
+ * cuDNN 9.5.1  https://developer.nvidia.com/cudnn
+ * NCCL 2.23.4  https://developer.nvidia.com/nccl
+ * nvCOMP 4.0.1  https://developer.nvidia.com/nvcomp
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -67,14 +67,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, and NCCL -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/cuda/platform/pom.xml b/cuda/platform/pom.xml
index f374c4fc33..b84a14f18a 100644
--- a/cuda/platform/pom.xml
+++ b/cuda/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>cuda-platform</artifactId>
-  <version>12.6-9.3-${project.parent.version}</version>
+  <version>12.6-9.5-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for CUDA</name>
 
   <properties>
diff --git a/cuda/platform/redist/pom.xml b/cuda/platform/redist/pom.xml
index 7885ff8e03..056aa0ed30 100644
--- a/cuda/platform/redist/pom.xml
+++ b/cuda/platform/redist/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>cuda-platform-redist</artifactId>
-  <version>12.6-9.3-${project.parent.version}</version>
+  <version>12.6-9.5-${project.parent.version}</version>
   <name>JavaCPP Presets Platform Redist for CUDA</name>
 
   <properties>
diff --git a/cuda/pom.xml b/cuda/pom.xml
index d0bb5940d5..48401ddffe 100644
--- a/cuda/pom.xml
+++ b/cuda/pom.xml
@@ -11,7 +11,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>cuda</artifactId>
-  <version>12.6-9.3-${project.parent.version}</version>
+  <version>12.6-9.5-${project.parent.version}</version>
   <name>JavaCPP Presets for CUDA</name>
 
   <dependencies>
diff --git a/cuda/samples/pom.xml b/cuda/samples/pom.xml
index 0edae05bfd..8a9cc92880 100644
--- a/cuda/samples/pom.xml
+++ b/cuda/samples/pom.xml
@@ -12,14 +12,14 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, and NCCL -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
index 6be405416e..d672e808b2 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
@@ -13,7 +13,7 @@
 import static org.bytedeco.cuda.global.cublas.*;
 
 
-/** Results structure used by cublasLtMatmulGetAlgo.
+/** Results structure used by cublasLtMatmulAlgoGetHeuristic
  *
  * Holds returned configured algo descriptor and its runtime properties.
  */
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
index 7531cf23df..b0e5b26c98 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
@@ -16,6 +16,8 @@
 /**
  * \brief The activity record providing detailed information for a marker.
  *
+ * User must enable CUPTI_ACTIVITY_KIND_MARKER as well
+ * to get records for marker data.
  * The marker data contains color, payload, and category.
  * (CUPTI_ACTIVITY_KIND_MARKER_DATA).
  */
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
index 0e79317c8d..533a9e0407 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
@@ -13,6 +13,7 @@
 import static org.bytedeco.cuda.global.cupti.*;
 
 
+
 /**
  * \brief Data passed into a runtime or driver API callback function.
  *
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
index 04a7bfcb98..8faa8693ef 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
@@ -103,8 +103,8 @@ public class cublas extends org.bytedeco.cuda.presets.cublas {
 
 public static final int CUBLAS_VER_MAJOR = 12;
 public static final int CUBLAS_VER_MINOR = 6;
-public static final int CUBLAS_VER_PATCH = 0;
-public static final int CUBLAS_VER_BUILD = 22;
+public static final int CUBLAS_VER_PATCH = 3;
+public static final int CUBLAS_VER_BUILD = 3;
 public static final int CUBLAS_VERSION = (CUBLAS_VER_MAJOR * 10000 + CUBLAS_VER_MINOR * 100 + CUBLAS_VER_PATCH);
 
 /* CUBLAS status type returns */
@@ -12713,7 +12713,602 @@ public static native void cublasZtrmm(@Cast("char") byte side,
   CUBLASLT_MATMUL_TILE_128x96 = 33,
   CUBLASLT_MATMUL_TILE_32x256 = 34,
   CUBLASLT_MATMUL_TILE_256x32 = 35,
-  CUBLASLT_MATMUL_TILE_END = 36;
+  CUBLASLT_MATMUL_TILE_8x128 = 36,
+  CUBLASLT_MATMUL_TILE_8x192 = 37,
+  CUBLASLT_MATMUL_TILE_8x256 = 38,
+  CUBLASLT_MATMUL_TILE_8x320 = 39,
+  CUBLASLT_MATMUL_TILE_8x384 = 40,
+  CUBLASLT_MATMUL_TILE_8x448 = 41,
+  CUBLASLT_MATMUL_TILE_8x512 = 42,
+  CUBLASLT_MATMUL_TILE_8x576 = 43,
+  CUBLASLT_MATMUL_TILE_8x640 = 44,
+  CUBLASLT_MATMUL_TILE_8x704 = 45,
+  CUBLASLT_MATMUL_TILE_8x768 = 46,
+  CUBLASLT_MATMUL_TILE_16x64 = 47,
+  CUBLASLT_MATMUL_TILE_16x128 = 48,
+  CUBLASLT_MATMUL_TILE_16x192 = 49,
+  CUBLASLT_MATMUL_TILE_16x256 = 50,
+  CUBLASLT_MATMUL_TILE_16x320 = 51,
+  CUBLASLT_MATMUL_TILE_16x384 = 52,
+  CUBLASLT_MATMUL_TILE_16x448 = 53,
+  CUBLASLT_MATMUL_TILE_16x512 = 54,
+  CUBLASLT_MATMUL_TILE_16x576 = 55,
+  CUBLASLT_MATMUL_TILE_16x640 = 56,
+  CUBLASLT_MATMUL_TILE_16x704 = 57,
+  CUBLASLT_MATMUL_TILE_16x768 = 58,
+  CUBLASLT_MATMUL_TILE_24x64 = 59,
+  CUBLASLT_MATMUL_TILE_24x128 = 60,
+  CUBLASLT_MATMUL_TILE_24x192 = 61,
+  CUBLASLT_MATMUL_TILE_24x256 = 62,
+  CUBLASLT_MATMUL_TILE_24x320 = 63,
+  CUBLASLT_MATMUL_TILE_24x384 = 64,
+  CUBLASLT_MATMUL_TILE_24x448 = 65,
+  CUBLASLT_MATMUL_TILE_24x512 = 66,
+  CUBLASLT_MATMUL_TILE_24x576 = 67,
+  CUBLASLT_MATMUL_TILE_24x640 = 68,
+  CUBLASLT_MATMUL_TILE_24x704 = 69,
+  CUBLASLT_MATMUL_TILE_24x768 = 70,
+  CUBLASLT_MATMUL_TILE_32x192 = 71,
+  CUBLASLT_MATMUL_TILE_32x320 = 72,
+  CUBLASLT_MATMUL_TILE_32x384 = 73,
+  CUBLASLT_MATMUL_TILE_32x448 = 74,
+  CUBLASLT_MATMUL_TILE_32x512 = 75,
+  CUBLASLT_MATMUL_TILE_32x576 = 76,
+  CUBLASLT_MATMUL_TILE_32x640 = 77,
+  CUBLASLT_MATMUL_TILE_32x704 = 78,
+  CUBLASLT_MATMUL_TILE_32x768 = 79,
+  CUBLASLT_MATMUL_TILE_40x64 = 80,
+  CUBLASLT_MATMUL_TILE_40x128 = 81,
+  CUBLASLT_MATMUL_TILE_40x192 = 82,
+  CUBLASLT_MATMUL_TILE_40x256 = 83,
+  CUBLASLT_MATMUL_TILE_40x320 = 84,
+  CUBLASLT_MATMUL_TILE_40x384 = 85,
+  CUBLASLT_MATMUL_TILE_40x448 = 86,
+  CUBLASLT_MATMUL_TILE_40x512 = 87,
+  CUBLASLT_MATMUL_TILE_40x576 = 88,
+  CUBLASLT_MATMUL_TILE_40x640 = 89,
+  CUBLASLT_MATMUL_TILE_40x704 = 90,
+  CUBLASLT_MATMUL_TILE_40x768 = 91,
+  CUBLASLT_MATMUL_TILE_48x64 = 92,
+  CUBLASLT_MATMUL_TILE_48x128 = 93,
+  CUBLASLT_MATMUL_TILE_48x192 = 94,
+  CUBLASLT_MATMUL_TILE_48x256 = 95,
+  CUBLASLT_MATMUL_TILE_48x320 = 96,
+  CUBLASLT_MATMUL_TILE_48x384 = 97,
+  CUBLASLT_MATMUL_TILE_48x448 = 98,
+  CUBLASLT_MATMUL_TILE_48x512 = 99,
+  CUBLASLT_MATMUL_TILE_48x576 = 100,
+  CUBLASLT_MATMUL_TILE_48x640 = 101,
+  CUBLASLT_MATMUL_TILE_48x704 = 102,
+  CUBLASLT_MATMUL_TILE_48x768 = 103,
+  CUBLASLT_MATMUL_TILE_56x64 = 104,
+  CUBLASLT_MATMUL_TILE_56x128 = 105,
+  CUBLASLT_MATMUL_TILE_56x192 = 106,
+  CUBLASLT_MATMUL_TILE_56x256 = 107,
+  CUBLASLT_MATMUL_TILE_56x320 = 108,
+  CUBLASLT_MATMUL_TILE_56x384 = 109,
+  CUBLASLT_MATMUL_TILE_56x448 = 110,
+  CUBLASLT_MATMUL_TILE_56x512 = 111,
+  CUBLASLT_MATMUL_TILE_56x576 = 112,
+  CUBLASLT_MATMUL_TILE_56x640 = 113,
+  CUBLASLT_MATMUL_TILE_56x704 = 114,
+  CUBLASLT_MATMUL_TILE_56x768 = 115,
+  CUBLASLT_MATMUL_TILE_64x192 = 116,
+  CUBLASLT_MATMUL_TILE_64x320 = 117,
+  CUBLASLT_MATMUL_TILE_64x384 = 118,
+  CUBLASLT_MATMUL_TILE_64x448 = 119,
+  CUBLASLT_MATMUL_TILE_64x576 = 120,
+  CUBLASLT_MATMUL_TILE_64x640 = 121,
+  CUBLASLT_MATMUL_TILE_64x704 = 122,
+  CUBLASLT_MATMUL_TILE_64x768 = 123,
+  CUBLASLT_MATMUL_TILE_72x64 = 124,
+  CUBLASLT_MATMUL_TILE_72x128 = 125,
+  CUBLASLT_MATMUL_TILE_72x192 = 126,
+  CUBLASLT_MATMUL_TILE_72x256 = 127,
+  CUBLASLT_MATMUL_TILE_72x320 = 128,
+  CUBLASLT_MATMUL_TILE_72x384 = 129,
+  CUBLASLT_MATMUL_TILE_72x448 = 130,
+  CUBLASLT_MATMUL_TILE_72x512 = 131,
+  CUBLASLT_MATMUL_TILE_72x576 = 132,
+  CUBLASLT_MATMUL_TILE_72x640 = 133,
+  CUBLASLT_MATMUL_TILE_80x64 = 134,
+  CUBLASLT_MATMUL_TILE_80x128 = 135,
+  CUBLASLT_MATMUL_TILE_80x192 = 136,
+  CUBLASLT_MATMUL_TILE_80x256 = 137,
+  CUBLASLT_MATMUL_TILE_80x320 = 138,
+  CUBLASLT_MATMUL_TILE_80x384 = 139,
+  CUBLASLT_MATMUL_TILE_80x448 = 140,
+  CUBLASLT_MATMUL_TILE_80x512 = 141,
+  CUBLASLT_MATMUL_TILE_80x576 = 142,
+  CUBLASLT_MATMUL_TILE_88x64 = 143,
+  CUBLASLT_MATMUL_TILE_88x128 = 144,
+  CUBLASLT_MATMUL_TILE_88x192 = 145,
+  CUBLASLT_MATMUL_TILE_88x256 = 146,
+  CUBLASLT_MATMUL_TILE_88x320 = 147,
+  CUBLASLT_MATMUL_TILE_88x384 = 148,
+  CUBLASLT_MATMUL_TILE_88x448 = 149,
+  CUBLASLT_MATMUL_TILE_88x512 = 150,
+  CUBLASLT_MATMUL_TILE_96x192 = 151,
+  CUBLASLT_MATMUL_TILE_96x256 = 152,
+  CUBLASLT_MATMUL_TILE_96x320 = 153,
+  CUBLASLT_MATMUL_TILE_96x384 = 154,
+  CUBLASLT_MATMUL_TILE_96x448 = 155,
+  CUBLASLT_MATMUL_TILE_96x512 = 156,
+  CUBLASLT_MATMUL_TILE_104x64 = 157,
+  CUBLASLT_MATMUL_TILE_104x128 = 158,
+  CUBLASLT_MATMUL_TILE_104x192 = 159,
+  CUBLASLT_MATMUL_TILE_104x256 = 160,
+  CUBLASLT_MATMUL_TILE_104x320 = 161,
+  CUBLASLT_MATMUL_TILE_104x384 = 162,
+  CUBLASLT_MATMUL_TILE_104x448 = 163,
+  CUBLASLT_MATMUL_TILE_112x64 = 164,
+  CUBLASLT_MATMUL_TILE_112x128 = 165,
+  CUBLASLT_MATMUL_TILE_112x192 = 166,
+  CUBLASLT_MATMUL_TILE_112x256 = 167,
+  CUBLASLT_MATMUL_TILE_112x320 = 168,
+  CUBLASLT_MATMUL_TILE_112x384 = 169,
+  CUBLASLT_MATMUL_TILE_120x64 = 170,
+  CUBLASLT_MATMUL_TILE_120x128 = 171,
+  CUBLASLT_MATMUL_TILE_120x192 = 172,
+  CUBLASLT_MATMUL_TILE_120x256 = 173,
+  CUBLASLT_MATMUL_TILE_120x320 = 174,
+  CUBLASLT_MATMUL_TILE_120x384 = 175,
+  CUBLASLT_MATMUL_TILE_128x320 = 176,
+  CUBLASLT_MATMUL_TILE_128x384 = 177,
+  CUBLASLT_MATMUL_TILE_136x64 = 178,
+  CUBLASLT_MATMUL_TILE_136x128 = 179,
+  CUBLASLT_MATMUL_TILE_136x192 = 180,
+  CUBLASLT_MATMUL_TILE_136x256 = 181,
+  CUBLASLT_MATMUL_TILE_136x320 = 182,
+  CUBLASLT_MATMUL_TILE_144x64 = 183,
+  CUBLASLT_MATMUL_TILE_144x128 = 184,
+  CUBLASLT_MATMUL_TILE_144x192 = 185,
+  CUBLASLT_MATMUL_TILE_144x256 = 186,
+  CUBLASLT_MATMUL_TILE_144x320 = 187,
+  CUBLASLT_MATMUL_TILE_152x64 = 188,
+  CUBLASLT_MATMUL_TILE_152x128 = 189,
+  CUBLASLT_MATMUL_TILE_152x192 = 190,
+  CUBLASLT_MATMUL_TILE_152x256 = 191,
+  CUBLASLT_MATMUL_TILE_152x320 = 192,
+  CUBLASLT_MATMUL_TILE_160x64 = 193,
+  CUBLASLT_MATMUL_TILE_160x192 = 194,
+  CUBLASLT_MATMUL_TILE_160x256 = 195,
+  CUBLASLT_MATMUL_TILE_168x64 = 196,
+  CUBLASLT_MATMUL_TILE_168x128 = 197,
+  CUBLASLT_MATMUL_TILE_168x192 = 198,
+  CUBLASLT_MATMUL_TILE_168x256 = 199,
+  CUBLASLT_MATMUL_TILE_176x64 = 200,
+  CUBLASLT_MATMUL_TILE_176x128 = 201,
+  CUBLASLT_MATMUL_TILE_176x192 = 202,
+  CUBLASLT_MATMUL_TILE_176x256 = 203,
+  CUBLASLT_MATMUL_TILE_184x64 = 204,
+  CUBLASLT_MATMUL_TILE_184x128 = 205,
+  CUBLASLT_MATMUL_TILE_184x192 = 206,
+  CUBLASLT_MATMUL_TILE_184x256 = 207,
+  CUBLASLT_MATMUL_TILE_192x64 = 208,
+  CUBLASLT_MATMUL_TILE_192x192 = 209,
+  CUBLASLT_MATMUL_TILE_192x256 = 210,
+  CUBLASLT_MATMUL_TILE_200x64 = 211,
+  CUBLASLT_MATMUL_TILE_200x128 = 212,
+  CUBLASLT_MATMUL_TILE_200x192 = 213,
+  CUBLASLT_MATMUL_TILE_208x64 = 214,
+  CUBLASLT_MATMUL_TILE_208x128 = 215,
+  CUBLASLT_MATMUL_TILE_208x192 = 216,
+  CUBLASLT_MATMUL_TILE_216x64 = 217,
+  CUBLASLT_MATMUL_TILE_216x128 = 218,
+  CUBLASLT_MATMUL_TILE_216x192 = 219,
+  CUBLASLT_MATMUL_TILE_224x64 = 220,
+  CUBLASLT_MATMUL_TILE_224x128 = 221,
+  CUBLASLT_MATMUL_TILE_224x192 = 222,
+  CUBLASLT_MATMUL_TILE_232x64 = 223,
+  CUBLASLT_MATMUL_TILE_232x128 = 224,
+  CUBLASLT_MATMUL_TILE_232x192 = 225,
+  CUBLASLT_MATMUL_TILE_240x64 = 226,
+  CUBLASLT_MATMUL_TILE_240x128 = 227,
+  CUBLASLT_MATMUL_TILE_240x192 = 228,
+  CUBLASLT_MATMUL_TILE_248x64 = 229,
+  CUBLASLT_MATMUL_TILE_248x128 = 230,
+  CUBLASLT_MATMUL_TILE_248x192 = 231,
+  CUBLASLT_MATMUL_TILE_256x192 = 232,
+  CUBLASLT_MATMUL_TILE_264x64 = 233,
+  CUBLASLT_MATMUL_TILE_264x128 = 234,
+  CUBLASLT_MATMUL_TILE_272x64 = 235,
+  CUBLASLT_MATMUL_TILE_272x128 = 236,
+  CUBLASLT_MATMUL_TILE_280x64 = 237,
+  CUBLASLT_MATMUL_TILE_280x128 = 238,
+  CUBLASLT_MATMUL_TILE_288x64 = 239,
+  CUBLASLT_MATMUL_TILE_288x128 = 240,
+  CUBLASLT_MATMUL_TILE_296x64 = 241,
+  CUBLASLT_MATMUL_TILE_296x128 = 242,
+  CUBLASLT_MATMUL_TILE_304x64 = 243,
+  CUBLASLT_MATMUL_TILE_304x128 = 244,
+  CUBLASLT_MATMUL_TILE_312x64 = 245,
+  CUBLASLT_MATMUL_TILE_312x128 = 246,
+  CUBLASLT_MATMUL_TILE_320x64 = 247,
+  CUBLASLT_MATMUL_TILE_320x128 = 248,
+  CUBLASLT_MATMUL_TILE_328x64 = 249,
+  CUBLASLT_MATMUL_TILE_328x128 = 250,
+  CUBLASLT_MATMUL_TILE_336x64 = 251,
+  CUBLASLT_MATMUL_TILE_336x128 = 252,
+  CUBLASLT_MATMUL_TILE_344x64 = 253,
+  CUBLASLT_MATMUL_TILE_344x128 = 254,
+  CUBLASLT_MATMUL_TILE_352x64 = 255,
+  CUBLASLT_MATMUL_TILE_352x128 = 256,
+  CUBLASLT_MATMUL_TILE_360x64 = 257,
+  CUBLASLT_MATMUL_TILE_360x128 = 258,
+  CUBLASLT_MATMUL_TILE_368x64 = 259,
+  CUBLASLT_MATMUL_TILE_368x128 = 260,
+  CUBLASLT_MATMUL_TILE_376x64 = 261,
+  CUBLASLT_MATMUL_TILE_376x128 = 262,
+  CUBLASLT_MATMUL_TILE_384x64 = 263,
+  CUBLASLT_MATMUL_TILE_384x128 = 264,
+  CUBLASLT_MATMUL_TILE_392x64 = 265,
+  CUBLASLT_MATMUL_TILE_400x64 = 266,
+  CUBLASLT_MATMUL_TILE_408x64 = 267,
+  CUBLASLT_MATMUL_TILE_416x64 = 268,
+  CUBLASLT_MATMUL_TILE_424x64 = 269,
+  CUBLASLT_MATMUL_TILE_432x64 = 270,
+  CUBLASLT_MATMUL_TILE_440x64 = 271,
+  CUBLASLT_MATMUL_TILE_448x64 = 272,
+  CUBLASLT_MATMUL_TILE_456x64 = 273,
+  CUBLASLT_MATMUL_TILE_464x64 = 274,
+  CUBLASLT_MATMUL_TILE_472x64 = 275,
+  CUBLASLT_MATMUL_TILE_480x64 = 276,
+  CUBLASLT_MATMUL_TILE_488x64 = 277,
+  CUBLASLT_MATMUL_TILE_496x64 = 278,
+  CUBLASLT_MATMUL_TILE_504x64 = 279,
+  CUBLASLT_MATMUL_TILE_520x64 = 280,
+  CUBLASLT_MATMUL_TILE_528x64 = 281,
+  CUBLASLT_MATMUL_TILE_536x64 = 282,
+  CUBLASLT_MATMUL_TILE_544x64 = 283,
+  CUBLASLT_MATMUL_TILE_552x64 = 284,
+  CUBLASLT_MATMUL_TILE_560x64 = 285,
+  CUBLASLT_MATMUL_TILE_568x64 = 286,
+  CUBLASLT_MATMUL_TILE_576x64 = 287,
+  CUBLASLT_MATMUL_TILE_584x64 = 288,
+  CUBLASLT_MATMUL_TILE_592x64 = 289,
+  CUBLASLT_MATMUL_TILE_600x64 = 290,
+  CUBLASLT_MATMUL_TILE_608x64 = 291,
+  CUBLASLT_MATMUL_TILE_616x64 = 292,
+  CUBLASLT_MATMUL_TILE_624x64 = 293,
+  CUBLASLT_MATMUL_TILE_632x64 = 294,
+  CUBLASLT_MATMUL_TILE_640x64 = 295,
+  CUBLASLT_MATMUL_TILE_648x64 = 296,
+  CUBLASLT_MATMUL_TILE_656x64 = 297,
+  CUBLASLT_MATMUL_TILE_664x64 = 298,
+  CUBLASLT_MATMUL_TILE_672x64 = 299,
+  CUBLASLT_MATMUL_TILE_680x64 = 300,
+  CUBLASLT_MATMUL_TILE_688x64 = 301,
+  CUBLASLT_MATMUL_TILE_696x64 = 302,
+  CUBLASLT_MATMUL_TILE_704x64 = 303,
+  CUBLASLT_MATMUL_TILE_712x64 = 304,
+  CUBLASLT_MATMUL_TILE_720x64 = 305,
+  CUBLASLT_MATMUL_TILE_728x64 = 306,
+  CUBLASLT_MATMUL_TILE_736x64 = 307,
+  CUBLASLT_MATMUL_TILE_744x64 = 308,
+  CUBLASLT_MATMUL_TILE_752x64 = 309,
+  CUBLASLT_MATMUL_TILE_760x64 = 310,
+  CUBLASLT_MATMUL_TILE_768x64 = 311,
+  CUBLASLT_MATMUL_TILE_64x16 = 312,
+  CUBLASLT_MATMUL_TILE_64x24 = 313,
+  CUBLASLT_MATMUL_TILE_64x40 = 314,
+  CUBLASLT_MATMUL_TILE_64x48 = 315,
+  CUBLASLT_MATMUL_TILE_64x56 = 316,
+  CUBLASLT_MATMUL_TILE_64x72 = 317,
+  CUBLASLT_MATMUL_TILE_64x80 = 318,
+  CUBLASLT_MATMUL_TILE_64x88 = 319,
+  CUBLASLT_MATMUL_TILE_64x104 = 320,
+  CUBLASLT_MATMUL_TILE_64x112 = 321,
+  CUBLASLT_MATMUL_TILE_64x120 = 322,
+  CUBLASLT_MATMUL_TILE_64x136 = 323,
+  CUBLASLT_MATMUL_TILE_64x144 = 324,
+  CUBLASLT_MATMUL_TILE_64x152 = 325,
+  CUBLASLT_MATMUL_TILE_64x160 = 326,
+  CUBLASLT_MATMUL_TILE_64x168 = 327,
+  CUBLASLT_MATMUL_TILE_64x176 = 328,
+  CUBLASLT_MATMUL_TILE_64x184 = 329,
+  CUBLASLT_MATMUL_TILE_64x200 = 330,
+  CUBLASLT_MATMUL_TILE_64x208 = 331,
+  CUBLASLT_MATMUL_TILE_64x216 = 332,
+  CUBLASLT_MATMUL_TILE_64x224 = 333,
+  CUBLASLT_MATMUL_TILE_64x232 = 334,
+  CUBLASLT_MATMUL_TILE_64x240 = 335,
+  CUBLASLT_MATMUL_TILE_64x248 = 336,
+  CUBLASLT_MATMUL_TILE_64x264 = 337,
+  CUBLASLT_MATMUL_TILE_64x272 = 338,
+  CUBLASLT_MATMUL_TILE_64x280 = 339,
+  CUBLASLT_MATMUL_TILE_64x288 = 340,
+  CUBLASLT_MATMUL_TILE_64x296 = 341,
+  CUBLASLT_MATMUL_TILE_64x304 = 342,
+  CUBLASLT_MATMUL_TILE_64x312 = 343,
+  CUBLASLT_MATMUL_TILE_64x328 = 344,
+  CUBLASLT_MATMUL_TILE_64x336 = 345,
+  CUBLASLT_MATMUL_TILE_64x344 = 346,
+  CUBLASLT_MATMUL_TILE_64x352 = 347,
+  CUBLASLT_MATMUL_TILE_64x360 = 348,
+  CUBLASLT_MATMUL_TILE_64x368 = 349,
+  CUBLASLT_MATMUL_TILE_64x376 = 350,
+  CUBLASLT_MATMUL_TILE_64x392 = 351,
+  CUBLASLT_MATMUL_TILE_64x400 = 352,
+  CUBLASLT_MATMUL_TILE_64x408 = 353,
+  CUBLASLT_MATMUL_TILE_64x416 = 354,
+  CUBLASLT_MATMUL_TILE_64x424 = 355,
+  CUBLASLT_MATMUL_TILE_64x432 = 356,
+  CUBLASLT_MATMUL_TILE_64x440 = 357,
+  CUBLASLT_MATMUL_TILE_64x456 = 358,
+  CUBLASLT_MATMUL_TILE_64x464 = 359,
+  CUBLASLT_MATMUL_TILE_64x472 = 360,
+  CUBLASLT_MATMUL_TILE_64x480 = 361,
+  CUBLASLT_MATMUL_TILE_64x488 = 362,
+  CUBLASLT_MATMUL_TILE_64x496 = 363,
+  CUBLASLT_MATMUL_TILE_64x504 = 364,
+  CUBLASLT_MATMUL_TILE_64x520 = 365,
+  CUBLASLT_MATMUL_TILE_64x528 = 366,
+  CUBLASLT_MATMUL_TILE_64x536 = 367,
+  CUBLASLT_MATMUL_TILE_64x544 = 368,
+  CUBLASLT_MATMUL_TILE_64x552 = 369,
+  CUBLASLT_MATMUL_TILE_64x560 = 370,
+  CUBLASLT_MATMUL_TILE_64x568 = 371,
+  CUBLASLT_MATMUL_TILE_64x584 = 372,
+  CUBLASLT_MATMUL_TILE_64x592 = 373,
+  CUBLASLT_MATMUL_TILE_64x600 = 374,
+  CUBLASLT_MATMUL_TILE_64x608 = 375,
+  CUBLASLT_MATMUL_TILE_64x616 = 376,
+  CUBLASLT_MATMUL_TILE_64x624 = 377,
+  CUBLASLT_MATMUL_TILE_64x632 = 378,
+  CUBLASLT_MATMUL_TILE_64x648 = 379,
+  CUBLASLT_MATMUL_TILE_64x656 = 380,
+  CUBLASLT_MATMUL_TILE_64x664 = 381,
+  CUBLASLT_MATMUL_TILE_64x672 = 382,
+  CUBLASLT_MATMUL_TILE_64x680 = 383,
+  CUBLASLT_MATMUL_TILE_64x688 = 384,
+  CUBLASLT_MATMUL_TILE_64x696 = 385,
+  CUBLASLT_MATMUL_TILE_64x712 = 386,
+  CUBLASLT_MATMUL_TILE_64x720 = 387,
+  CUBLASLT_MATMUL_TILE_64x728 = 388,
+  CUBLASLT_MATMUL_TILE_64x736 = 389,
+  CUBLASLT_MATMUL_TILE_64x744 = 390,
+  CUBLASLT_MATMUL_TILE_64x752 = 391,
+  CUBLASLT_MATMUL_TILE_64x760 = 392,
+  CUBLASLT_MATMUL_TILE_128x8 = 393,
+  CUBLASLT_MATMUL_TILE_128x16 = 394,
+  CUBLASLT_MATMUL_TILE_128x24 = 395,
+  CUBLASLT_MATMUL_TILE_128x40 = 396,
+  CUBLASLT_MATMUL_TILE_128x48 = 397,
+  CUBLASLT_MATMUL_TILE_128x56 = 398,
+  CUBLASLT_MATMUL_TILE_128x72 = 399,
+  CUBLASLT_MATMUL_TILE_128x80 = 400,
+  CUBLASLT_MATMUL_TILE_128x88 = 401,
+  CUBLASLT_MATMUL_TILE_128x104 = 402,
+  CUBLASLT_MATMUL_TILE_128x112 = 403,
+  CUBLASLT_MATMUL_TILE_128x120 = 404,
+  CUBLASLT_MATMUL_TILE_128x136 = 405,
+  CUBLASLT_MATMUL_TILE_128x144 = 406,
+  CUBLASLT_MATMUL_TILE_128x152 = 407,
+  CUBLASLT_MATMUL_TILE_128x168 = 408,
+  CUBLASLT_MATMUL_TILE_128x176 = 409,
+  CUBLASLT_MATMUL_TILE_128x184 = 410,
+  CUBLASLT_MATMUL_TILE_128x200 = 411,
+  CUBLASLT_MATMUL_TILE_128x208 = 412,
+  CUBLASLT_MATMUL_TILE_128x216 = 413,
+  CUBLASLT_MATMUL_TILE_128x224 = 414,
+  CUBLASLT_MATMUL_TILE_128x232 = 415,
+  CUBLASLT_MATMUL_TILE_128x240 = 416,
+  CUBLASLT_MATMUL_TILE_128x248 = 417,
+  CUBLASLT_MATMUL_TILE_128x264 = 418,
+  CUBLASLT_MATMUL_TILE_128x272 = 419,
+  CUBLASLT_MATMUL_TILE_128x280 = 420,
+  CUBLASLT_MATMUL_TILE_128x288 = 421,
+  CUBLASLT_MATMUL_TILE_128x296 = 422,
+  CUBLASLT_MATMUL_TILE_128x304 = 423,
+  CUBLASLT_MATMUL_TILE_128x312 = 424,
+  CUBLASLT_MATMUL_TILE_128x328 = 425,
+  CUBLASLT_MATMUL_TILE_128x336 = 426,
+  CUBLASLT_MATMUL_TILE_128x344 = 427,
+  CUBLASLT_MATMUL_TILE_128x352 = 428,
+  CUBLASLT_MATMUL_TILE_128x360 = 429,
+  CUBLASLT_MATMUL_TILE_128x368 = 430,
+  CUBLASLT_MATMUL_TILE_128x376 = 431,
+  CUBLASLT_MATMUL_TILE_128x392 = 432,
+  CUBLASLT_MATMUL_TILE_128x400 = 433,
+  CUBLASLT_MATMUL_TILE_128x408 = 434,
+  CUBLASLT_MATMUL_TILE_128x416 = 435,
+  CUBLASLT_MATMUL_TILE_128x424 = 436,
+  CUBLASLT_MATMUL_TILE_128x432 = 437,
+  CUBLASLT_MATMUL_TILE_128x440 = 438,
+  CUBLASLT_MATMUL_TILE_128x448 = 439,
+  CUBLASLT_MATMUL_TILE_128x456 = 440,
+  CUBLASLT_MATMUL_TILE_128x464 = 441,
+  CUBLASLT_MATMUL_TILE_128x472 = 442,
+  CUBLASLT_MATMUL_TILE_128x480 = 443,
+  CUBLASLT_MATMUL_TILE_128x488 = 444,
+  CUBLASLT_MATMUL_TILE_128x496 = 445,
+  CUBLASLT_MATMUL_TILE_128x504 = 446,
+  CUBLASLT_MATMUL_TILE_128x512 = 447,
+  CUBLASLT_MATMUL_TILE_192x8 = 448,
+  CUBLASLT_MATMUL_TILE_192x16 = 449,
+  CUBLASLT_MATMUL_TILE_192x24 = 450,
+  CUBLASLT_MATMUL_TILE_192x32 = 451,
+  CUBLASLT_MATMUL_TILE_192x40 = 452,
+  CUBLASLT_MATMUL_TILE_192x48 = 453,
+  CUBLASLT_MATMUL_TILE_192x56 = 454,
+  CUBLASLT_MATMUL_TILE_192x72 = 455,
+  CUBLASLT_MATMUL_TILE_192x80 = 456,
+  CUBLASLT_MATMUL_TILE_192x88 = 457,
+  CUBLASLT_MATMUL_TILE_192x96 = 458,
+  CUBLASLT_MATMUL_TILE_192x104 = 459,
+  CUBLASLT_MATMUL_TILE_192x112 = 460,
+  CUBLASLT_MATMUL_TILE_192x120 = 461,
+  CUBLASLT_MATMUL_TILE_192x136 = 462,
+  CUBLASLT_MATMUL_TILE_192x144 = 463,
+  CUBLASLT_MATMUL_TILE_192x152 = 464,
+  CUBLASLT_MATMUL_TILE_192x160 = 465,
+  CUBLASLT_MATMUL_TILE_192x168 = 466,
+  CUBLASLT_MATMUL_TILE_192x176 = 467,
+  CUBLASLT_MATMUL_TILE_192x184 = 468,
+  CUBLASLT_MATMUL_TILE_192x200 = 469,
+  CUBLASLT_MATMUL_TILE_192x208 = 470,
+  CUBLASLT_MATMUL_TILE_192x216 = 471,
+  CUBLASLT_MATMUL_TILE_192x224 = 472,
+  CUBLASLT_MATMUL_TILE_192x232 = 473,
+  CUBLASLT_MATMUL_TILE_192x240 = 474,
+  CUBLASLT_MATMUL_TILE_192x248 = 475,
+  CUBLASLT_MATMUL_TILE_192x264 = 476,
+  CUBLASLT_MATMUL_TILE_192x272 = 477,
+  CUBLASLT_MATMUL_TILE_192x280 = 478,
+  CUBLASLT_MATMUL_TILE_192x288 = 479,
+  CUBLASLT_MATMUL_TILE_192x296 = 480,
+  CUBLASLT_MATMUL_TILE_192x304 = 481,
+  CUBLASLT_MATMUL_TILE_192x312 = 482,
+  CUBLASLT_MATMUL_TILE_192x320 = 483,
+  CUBLASLT_MATMUL_TILE_192x328 = 484,
+  CUBLASLT_MATMUL_TILE_192x336 = 485,
+  CUBLASLT_MATMUL_TILE_256x8 = 486,
+  CUBLASLT_MATMUL_TILE_256x16 = 487,
+  CUBLASLT_MATMUL_TILE_256x24 = 488,
+  CUBLASLT_MATMUL_TILE_256x40 = 489,
+  CUBLASLT_MATMUL_TILE_256x48 = 490,
+  CUBLASLT_MATMUL_TILE_256x56 = 491,
+  CUBLASLT_MATMUL_TILE_256x72 = 492,
+  CUBLASLT_MATMUL_TILE_256x80 = 493,
+  CUBLASLT_MATMUL_TILE_256x88 = 494,
+  CUBLASLT_MATMUL_TILE_256x96 = 495,
+  CUBLASLT_MATMUL_TILE_256x104 = 496,
+  CUBLASLT_MATMUL_TILE_256x112 = 497,
+  CUBLASLT_MATMUL_TILE_256x120 = 498,
+  CUBLASLT_MATMUL_TILE_256x136 = 499,
+  CUBLASLT_MATMUL_TILE_256x144 = 500,
+  CUBLASLT_MATMUL_TILE_256x152 = 501,
+  CUBLASLT_MATMUL_TILE_256x160 = 502,
+  CUBLASLT_MATMUL_TILE_256x168 = 503,
+  CUBLASLT_MATMUL_TILE_256x176 = 504,
+  CUBLASLT_MATMUL_TILE_256x184 = 505,
+  CUBLASLT_MATMUL_TILE_256x200 = 506,
+  CUBLASLT_MATMUL_TILE_256x208 = 507,
+  CUBLASLT_MATMUL_TILE_256x216 = 508,
+  CUBLASLT_MATMUL_TILE_256x224 = 509,
+  CUBLASLT_MATMUL_TILE_256x232 = 510,
+  CUBLASLT_MATMUL_TILE_256x240 = 511,
+  CUBLASLT_MATMUL_TILE_256x248 = 512,
+  CUBLASLT_MATMUL_TILE_256x256 = 513,
+  CUBLASLT_MATMUL_TILE_320x8 = 514,
+  CUBLASLT_MATMUL_TILE_320x16 = 515,
+  CUBLASLT_MATMUL_TILE_320x24 = 516,
+  CUBLASLT_MATMUL_TILE_320x32 = 517,
+  CUBLASLT_MATMUL_TILE_320x40 = 518,
+  CUBLASLT_MATMUL_TILE_320x48 = 519,
+  CUBLASLT_MATMUL_TILE_320x56 = 520,
+  CUBLASLT_MATMUL_TILE_320x72 = 521,
+  CUBLASLT_MATMUL_TILE_320x80 = 522,
+  CUBLASLT_MATMUL_TILE_320x88 = 523,
+  CUBLASLT_MATMUL_TILE_320x96 = 524,
+  CUBLASLT_MATMUL_TILE_320x104 = 525,
+  CUBLASLT_MATMUL_TILE_320x112 = 526,
+  CUBLASLT_MATMUL_TILE_320x120 = 527,
+  CUBLASLT_MATMUL_TILE_320x136 = 528,
+  CUBLASLT_MATMUL_TILE_320x144 = 529,
+  CUBLASLT_MATMUL_TILE_320x152 = 530,
+  CUBLASLT_MATMUL_TILE_320x160 = 531,
+  CUBLASLT_MATMUL_TILE_320x168 = 532,
+  CUBLASLT_MATMUL_TILE_320x176 = 533,
+  CUBLASLT_MATMUL_TILE_320x184 = 534,
+  CUBLASLT_MATMUL_TILE_320x192 = 535,
+  CUBLASLT_MATMUL_TILE_320x200 = 536,
+  CUBLASLT_MATMUL_TILE_384x8 = 537,
+  CUBLASLT_MATMUL_TILE_384x16 = 538,
+  CUBLASLT_MATMUL_TILE_384x24 = 539,
+  CUBLASLT_MATMUL_TILE_384x32 = 540,
+  CUBLASLT_MATMUL_TILE_384x40 = 541,
+  CUBLASLT_MATMUL_TILE_384x48 = 542,
+  CUBLASLT_MATMUL_TILE_384x56 = 543,
+  CUBLASLT_MATMUL_TILE_384x72 = 544,
+  CUBLASLT_MATMUL_TILE_384x80 = 545,
+  CUBLASLT_MATMUL_TILE_384x88 = 546,
+  CUBLASLT_MATMUL_TILE_384x96 = 547,
+  CUBLASLT_MATMUL_TILE_384x104 = 548,
+  CUBLASLT_MATMUL_TILE_384x112 = 549,
+  CUBLASLT_MATMUL_TILE_384x120 = 550,
+  CUBLASLT_MATMUL_TILE_384x136 = 551,
+  CUBLASLT_MATMUL_TILE_384x144 = 552,
+  CUBLASLT_MATMUL_TILE_384x152 = 553,
+  CUBLASLT_MATMUL_TILE_384x160 = 554,
+  CUBLASLT_MATMUL_TILE_384x168 = 555,
+  CUBLASLT_MATMUL_TILE_448x8 = 556,
+  CUBLASLT_MATMUL_TILE_448x16 = 557,
+  CUBLASLT_MATMUL_TILE_448x24 = 558,
+  CUBLASLT_MATMUL_TILE_448x32 = 559,
+  CUBLASLT_MATMUL_TILE_448x40 = 560,
+  CUBLASLT_MATMUL_TILE_448x48 = 561,
+  CUBLASLT_MATMUL_TILE_448x56 = 562,
+  CUBLASLT_MATMUL_TILE_448x72 = 563,
+  CUBLASLT_MATMUL_TILE_448x80 = 564,
+  CUBLASLT_MATMUL_TILE_448x88 = 565,
+  CUBLASLT_MATMUL_TILE_448x96 = 566,
+  CUBLASLT_MATMUL_TILE_448x104 = 567,
+  CUBLASLT_MATMUL_TILE_448x112 = 568,
+  CUBLASLT_MATMUL_TILE_448x120 = 569,
+  CUBLASLT_MATMUL_TILE_448x128 = 570,
+  CUBLASLT_MATMUL_TILE_448x136 = 571,
+  CUBLASLT_MATMUL_TILE_448x144 = 572,
+  CUBLASLT_MATMUL_TILE_512x8 = 573,
+  CUBLASLT_MATMUL_TILE_512x16 = 574,
+  CUBLASLT_MATMUL_TILE_512x24 = 575,
+  CUBLASLT_MATMUL_TILE_512x32 = 576,
+  CUBLASLT_MATMUL_TILE_512x40 = 577,
+  CUBLASLT_MATMUL_TILE_512x48 = 578,
+  CUBLASLT_MATMUL_TILE_512x56 = 579,
+  CUBLASLT_MATMUL_TILE_512x72 = 580,
+  CUBLASLT_MATMUL_TILE_512x80 = 581,
+  CUBLASLT_MATMUL_TILE_512x88 = 582,
+  CUBLASLT_MATMUL_TILE_512x96 = 583,
+  CUBLASLT_MATMUL_TILE_512x104 = 584,
+  CUBLASLT_MATMUL_TILE_512x112 = 585,
+  CUBLASLT_MATMUL_TILE_512x120 = 586,
+  CUBLASLT_MATMUL_TILE_512x128 = 587,
+  CUBLASLT_MATMUL_TILE_576x8 = 588,
+  CUBLASLT_MATMUL_TILE_576x16 = 589,
+  CUBLASLT_MATMUL_TILE_576x24 = 590,
+  CUBLASLT_MATMUL_TILE_576x32 = 591,
+  CUBLASLT_MATMUL_TILE_576x40 = 592,
+  CUBLASLT_MATMUL_TILE_576x48 = 593,
+  CUBLASLT_MATMUL_TILE_576x56 = 594,
+  CUBLASLT_MATMUL_TILE_576x72 = 595,
+  CUBLASLT_MATMUL_TILE_576x80 = 596,
+  CUBLASLT_MATMUL_TILE_576x88 = 597,
+  CUBLASLT_MATMUL_TILE_576x96 = 598,
+  CUBLASLT_MATMUL_TILE_576x104 = 599,
+  CUBLASLT_MATMUL_TILE_576x112 = 600,
+  CUBLASLT_MATMUL_TILE_640x8 = 601,
+  CUBLASLT_MATMUL_TILE_640x16 = 602,
+  CUBLASLT_MATMUL_TILE_640x24 = 603,
+  CUBLASLT_MATMUL_TILE_640x32 = 604,
+  CUBLASLT_MATMUL_TILE_640x40 = 605,
+  CUBLASLT_MATMUL_TILE_640x48 = 606,
+  CUBLASLT_MATMUL_TILE_640x56 = 607,
+  CUBLASLT_MATMUL_TILE_640x72 = 608,
+  CUBLASLT_MATMUL_TILE_640x80 = 609,
+  CUBLASLT_MATMUL_TILE_640x88 = 610,
+  CUBLASLT_MATMUL_TILE_640x96 = 611,
+  CUBLASLT_MATMUL_TILE_704x8 = 612,
+  CUBLASLT_MATMUL_TILE_704x16 = 613,
+  CUBLASLT_MATMUL_TILE_704x24 = 614,
+  CUBLASLT_MATMUL_TILE_704x32 = 615,
+  CUBLASLT_MATMUL_TILE_704x40 = 616,
+  CUBLASLT_MATMUL_TILE_704x48 = 617,
+  CUBLASLT_MATMUL_TILE_704x56 = 618,
+  CUBLASLT_MATMUL_TILE_704x72 = 619,
+  CUBLASLT_MATMUL_TILE_704x80 = 620,
+  CUBLASLT_MATMUL_TILE_704x88 = 621,
+  CUBLASLT_MATMUL_TILE_768x8 = 622,
+  CUBLASLT_MATMUL_TILE_768x16 = 623,
+  CUBLASLT_MATMUL_TILE_768x24 = 624,
+  CUBLASLT_MATMUL_TILE_768x32 = 625,
+  CUBLASLT_MATMUL_TILE_768x40 = 626,
+  CUBLASLT_MATMUL_TILE_768x48 = 627,
+  CUBLASLT_MATMUL_TILE_768x56 = 628,
+  CUBLASLT_MATMUL_TILE_768x72 = 629,
+  CUBLASLT_MATMUL_TILE_768x80 = 630,
+  CUBLASLT_MATMUL_TILE_END = 631;
 
 /** Size and number of stages in which elements are read into shared memory
  *
@@ -13763,7 +14358,19 @@ public static native void cublasZtrmm(@Cast("char") byte side,
   CUBLASLT_SEARCH_RESERVED_04 = 4,
   /** reserved for future use
    */
-  CUBLASLT_SEARCH_RESERVED_05 = 5;
+  CUBLASLT_SEARCH_RESERVED_05 = 5,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_06 = 6,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_07 = 7,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_08 = 8,
+  /** reserved for future use
+   */
+  CUBLASLT_SEARCH_RESERVED_09 = 9;
 
 /** Algo search preference to fine tune the heuristic function. */
 /** enum cublasLtMatmulPreferenceAttributes_t */
@@ -14431,7 +15038,7 @@ public static native void cublasZtrmm(@Cast("char") byte side,
 // #include "driver_types.h"
 // #include "cuComplex.h" /* import complex data type */
 
-// #include "cublas_api.h"
+// #include "cublas_v2.h"
 
 // #if defined(__cplusplus)
 // Targeting ../cublas/cublasXtContext.java
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
index cca6078361..9ef4c848b7 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
@@ -146,8 +146,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 // #define CUDNN_VERSION_H_
 
 public static final int CUDNN_MAJOR = 9;
-public static final int CUDNN_MINOR = 3;
-public static final int CUDNN_PATCHLEVEL = 0;
+public static final int CUDNN_MINOR = 5;
+public static final int CUDNN_PATCHLEVEL = 1;
 
 public static final int CUDNN_VERSION = (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL);
 
@@ -228,8 +228,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 
 /* These version numbers are autogenerated, do not edit manually. */
 public static final int CUDNN_GRAPH_MAJOR = 9;
-public static final int CUDNN_GRAPH_MINOR = 3;
-public static final int CUDNN_GRAPH_PATCH = 0;
+public static final int CUDNN_GRAPH_MINOR = 5;
+public static final int CUDNN_GRAPH_PATCH = 1;
 
 // #if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
 // #error Version mismatch in cuDNN GRAPH!!!
@@ -301,6 +301,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH      = 2008,
     CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES  = 2009,
     CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE      = 2010,
+    CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
 
     CUDNN_STATUS_NOT_SUPPORTED                              = 3000,
     CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN                = 3001,
@@ -315,6 +316,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT   = 3010,
     CUDNN_STATUS_NOT_SUPPORTED_PADDING                      = 3011,
     CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM             = 3012,
+    CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API        = 3013,
 
     CUDNN_STATUS_INTERNAL_ERROR                          = 4000,
     CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED       = 4001,
@@ -639,6 +641,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
     CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
     CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION        = 405,
+    CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE               = 406,
 
     CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID            = 500,
     CUDNN_ATTR_INTERMEDIATE_INFO_SIZE                 = 501,
@@ -704,6 +707,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_ATTR_OPERATIONGRAPH_HANDLE                   = 800,
     CUDNN_ATTR_OPERATIONGRAPH_OPS                      = 801,
     CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT      = 802,
+    CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
 
     CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT       = 900,
     CUDNN_ATTR_TENSOR_DATA_TYPE            = 901,
@@ -806,6 +810,11 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_ATTR_OPERATION_SIGNAL_XDESC    = 1903,
     CUDNN_ATTR_OPERATION_SIGNAL_YDESC    = 1904,
 
+    CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC  = 1950,
+    CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC           = 1951,
+    CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC   = 1952,
+    CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
+
     CUDNN_ATTR_OPERATION_NORM_FWD_MODE                     = 2000,
     CUDNN_ATTR_OPERATION_NORM_FWD_PHASE                    = 2001,
     CUDNN_ATTR_OPERATION_NORM_FWD_XDESC                    = 2002,
@@ -847,7 +856,10 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_ATTR_OPERATION_RNG_YDESC       = 2310,
     CUDNN_ATTR_OPERATION_RNG_SEED        = 2311,
     CUDNN_ATTR_OPERATION_RNG_DESC        = 2312,
-    CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313;
+    CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
+
+    CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH            = 2400,
+    CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401;
 
 /** enum cudnnBackendAttributeType_t */
 public static final int
@@ -917,7 +929,9 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
     CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
     CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
-    CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33;
+    CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
+    CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
+    CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35;
 
 /** enum cudnnBackendNumericalNote_t */
 public static final int
@@ -938,7 +952,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION             = 0,
     CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
     CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER   = 2,
-    CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 3;
+    CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API  = 3,
+    CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4;
 
 /** enum cudnnBackendKnobType_t */
 public static final int
@@ -1057,6 +1072,16 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 
 public static native @Cast("cudnnStatus_t") int cudnnBackendExecute(cudnnContext handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
 
+public static native @Cast("cudnnStatus_t") int cudnnBackendPopulateCudaGraph(cudnnContext handle,
+                              cudnnBackendDescriptor_t executionPlan,
+                              cudnnBackendDescriptor_t variantPack,
+                              CUgraph_st graph);
+
+public static native @Cast("cudnnStatus_t") int cudnnBackendUpdateCudaGraph(cudnnContext handle,
+                            cudnnBackendDescriptor_t executionPlan,
+                            cudnnBackendDescriptor_t variantPack,
+                            CUgraph_st graph);
+
 // #if defined(__cplusplus)
 // #endif
 
@@ -1128,8 +1153,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 
 /* These version numbers are autogenerated, do not edit manually. */
 public static final int CUDNN_OPS_MAJOR = 9;
-public static final int CUDNN_OPS_MINOR = 3;
-public static final int CUDNN_OPS_PATCH = 0;
+public static final int CUDNN_OPS_MINOR = 5;
+public static final int CUDNN_OPS_PATCH = 1;
 
 // #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
 // #error Version mismatch in cuDNN OPS INFER!!!
@@ -2303,7 +2328,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
     @Cast("size_t") long reserveSpaceSizeInBytes);
 
 /* Performs backward pass of Batch Normalization layer. Returns x gradient,
-* bnScale gradient and bnBias gradient */
+ * bnScale gradient and bnBias gradient */
 public static native @Cast("cudnnStatus_t") @Deprecated int cudnnBatchNormalizationBackward(cudnnContext handle,
                                 @Cast("cudnnBatchNormMode_t") int mode,
                                 @Const Pointer alphaDataDiff,
@@ -2557,8 +2582,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 
 /* These version numbers are autogenerated, do not edit manually. */
 public static final int CUDNN_ADV_MAJOR = 9;
-public static final int CUDNN_ADV_MINOR = 3;
-public static final int CUDNN_ADV_PATCH = 0;
+public static final int CUDNN_ADV_MINOR = 5;
+public static final int CUDNN_ADV_PATCH = 1;
 
 // #if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL)
 // #error Version mismatch in cuDNN ADV INFER!!!
@@ -3374,8 +3399,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
                                   Pointer reserveSpace);
 
 /*
-* CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
-*/
+ * CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
+ */
 /* Input normalization mode for loss function */
 /** enum cudnnLossNormalizationMode_t */
 public static final int
@@ -3655,8 +3680,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
 
 /* These version numbers are autogenerated, do not edit manually. */
 public static final int CUDNN_CNN_MAJOR = 9;
-public static final int CUDNN_CNN_MINOR = 3;
-public static final int CUDNN_CNN_PATCH = 0;
+public static final int CUDNN_CNN_MINOR = 5;
+public static final int CUDNN_CNN_PATCH = 1;
 
 // #if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL)
 // #error Version mismatch in cuDNN CNN INFER!!!
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
index 61b09ebbe2..14c0058238 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
@@ -90,11 +90,11 @@ public class cufft extends org.bytedeco.cuda.presets.cufft {
 // #endif
 
 public static final int CUFFT_VER_MAJOR = 11;
-public static final int CUFFT_VER_MINOR = 2;
-public static final int CUFFT_VER_PATCH = 6;
-public static final int CUFFT_VER_BUILD = 28;
+public static final int CUFFT_VER_MINOR = 3;
+public static final int CUFFT_VER_PATCH = 0;
+public static final int CUFFT_VER_BUILD = 4;
 
-public static final int CUFFT_VERSION = 11206;
+public static final int CUFFT_VERSION = 11300;
 
 // CUFFT API function return values
 /** enum cufftResult */
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
index 5e744d4351..8bea75dc1d 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
@@ -777,8 +777,9 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
   CUPTI_ACTIVITY_KIND_MARKER = 12,
 
   /**
-   * Extended, optional, data about a marker. The corresponding
-   * activity record structure is \ref CUpti_ActivityMarkerData.
+   * Extended, optional, data about a marker. User must enable
+   * CUPTI_ACTIVITY_KIND_MARKER as well to get records for marker data.
+   * The corresponding activity record structure is \ref CUpti_ActivityMarkerData.
    */
   CUPTI_ACTIVITY_KIND_MARKER_DATA = 13,
 
@@ -4105,7 +4106,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
   /**
    * Domain containing callback points for various states.
    */
-  CUPTI_CB_DOMAIN_STATE = 6,
+  CUPTI_CB_DOMAIN_STATE             = 6,
 
   CUPTI_CB_DOMAIN_SIZE = 7,
 
@@ -4295,6 +4296,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
 // Targeting ../cupti/CUpti_StateData.java
 
 
+
 /**
  * \brief An ID for a driver API, runtime API, resource or
  * synchronization callback.
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
index 93bccbcccf..7d16bdb85a 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
@@ -92,7 +92,7 @@ public class curand extends org.bytedeco.cuda.presets.curand {
 public static final int CURAND_VER_MAJOR = 10;
 public static final int CURAND_VER_MINOR = 3;
 public static final int CURAND_VER_PATCH = 7;
-public static final int CURAND_VER_BUILD = 37;
+public static final int CURAND_VER_BUILD = 77;
 public static final int CURAND_VERSION = (CURAND_VER_MAJOR * 1000 + 
                         CURAND_VER_MINOR *  100 + 
                         CURAND_VER_PATCH);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
index d23eac2aea..a6b20b65e1 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
@@ -89,9 +89,9 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
 //   #endif
 
   public static final int CUSOLVER_VER_MAJOR = 11;
-  public static final int CUSOLVER_VER_MINOR = 6;
-  public static final int CUSOLVER_VER_PATCH = 4;
-  public static final int CUSOLVER_VER_BUILD = 38;
+  public static final int CUSOLVER_VER_MINOR = 7;
+  public static final int CUSOLVER_VER_PATCH = 1;
+  public static final int CUSOLVER_VER_BUILD = 2;
   public static final int CUSOLVER_VERSION =                                                     
       (CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH);
 
@@ -370,7 +370,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
 //   #include <stdio.h>
 
 //   #include "cuComplex.h" /* import complex data type */
-//   #include "cublas_api.h"
+//   #include "cublas_v2.h"
 //   #include "cusolver_common.h"
 
   /*******************************************************************************/
@@ -12217,6 +12217,78 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
       @Cast("size_t") long workspaceInBytesOnHost,
       int[] info);
 
+  /* 64-bit API for batched SYEVD */
+  public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched_bufferSize(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobz,
+      @Cast("cublasFillMode_t") int uplo,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      @Const Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      @Const Pointer W,
+      @Cast("cudaDataType") int computeType,
+      @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice,
+      @Cast("size_t*") SizeTPointer workspaceInBytesOnHost,
+      @Cast("int64_t") long batchSize);
+
+  public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobz,
+      @Cast("cublasFillMode_t") int uplo,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      IntPointer info,
+      @Cast("int64_t") long batchSize);
+  public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobz,
+      @Cast("cublasFillMode_t") int uplo,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      IntBuffer info,
+      @Cast("int64_t") long batchSize);
+  public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobz,
+      @Cast("cublasFillMode_t") int uplo,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      int[] info,
+      @Cast("int64_t") long batchSize);
+
   /* 64-bit API for SYEVDX */
   public static native @Cast("cusolverStatus_t") int cusolverDnXsyevdx_bufferSize(
       cusolverDnContext handle,
@@ -12349,6 +12421,98 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
       @Cast("size_t") long workspaceInBytesOnHost,
       int[] info);
 
+  /* 64-bit API for GEEV */
+  public static native @Cast("cusolverStatus_t") int cusolverDnXgeev_bufferSize(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobvl,
+      @Cast("cusolverEigMode_t") int jobvr,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      @Const Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      @Const Pointer W,
+      @Cast("cudaDataType") int dataTypeVL,
+      @Const Pointer VL,
+      @Cast("int64_t") long ldvl,
+      @Cast("cudaDataType") int dataTypeVR,
+      @Const Pointer VR,
+      @Cast("int64_t") long ldvr,
+      @Cast("cudaDataType") int computeType,
+      @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice,
+      @Cast("size_t*") SizeTPointer workspaceInBytesOnHost);
+
+  public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobvl,
+      @Cast("cusolverEigMode_t") int jobvr,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int dataTypeVL,
+      Pointer VL,
+      @Cast("int64_t") long ldvl,
+      @Cast("cudaDataType") int dataTypeVR,
+      Pointer VR,
+      @Cast("int64_t") long ldvr,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      IntPointer info);
+  public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobvl,
+      @Cast("cusolverEigMode_t") int jobvr,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int dataTypeVL,
+      Pointer VL,
+      @Cast("int64_t") long ldvl,
+      @Cast("cudaDataType") int dataTypeVR,
+      Pointer VR,
+      @Cast("int64_t") long ldvr,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      IntBuffer info);
+  public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+      cusolverDnContext handle,
+      cusolverDnParams params,
+      @Cast("cusolverEigMode_t") int jobvl,
+      @Cast("cusolverEigMode_t") int jobvr,
+      @Cast("int64_t") long n,
+      @Cast("cudaDataType") int dataTypeA,
+      Pointer A,
+      @Cast("int64_t") long lda,
+      @Cast("cudaDataType") int dataTypeW,
+      Pointer W,
+      @Cast("cudaDataType") int dataTypeVL,
+      Pointer VL,
+      @Cast("int64_t") long ldvl,
+      @Cast("cudaDataType") int dataTypeVR,
+      Pointer VR,
+      @Cast("int64_t") long ldvr,
+      @Cast("cudaDataType") int computeType,
+      Pointer bufferOnDevice,
+      @Cast("size_t") long workspaceInBytesOnDevice,
+      Pointer bufferOnHost,
+      @Cast("size_t") long workspaceInBytesOnHost,
+      int[] info);
+
   /* 64-bit API for GESVD */
   public static native @Cast("cusolverStatus_t") int cusolverDnXgesvd_bufferSize(
       cusolverDnContext handle,
@@ -14227,7 +14391,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
 //   #define CUSOLVERSP_H_
 
 //   #include "cusparse.h"
-//   #include "cublas_api.h"
+//   #include "cublas_v2.h"
 //   #include "cusolver_common.h"
 
 //   #if defined(__cplusplus)
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
index 616e2af6e5..48ab3b11f1 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
@@ -84,8 +84,8 @@ public class cusparse extends org.bytedeco.cuda.presets.cusparse {
 
 public static final int CUSPARSE_VER_MAJOR = 12;
 public static final int CUSPARSE_VER_MINOR = 5;
-public static final int CUSPARSE_VER_PATCH = 2;
-public static final int CUSPARSE_VER_BUILD = 23;
+public static final int CUSPARSE_VER_PATCH = 4;
+public static final int CUSPARSE_VER_BUILD = 2;
 public static final int CUSPARSE_VERSION = (CUSPARSE_VER_MAJOR * 1000 + 
                           CUSPARSE_VER_MINOR *  100 + 
                           CUSPARSE_VER_PATCH);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
index fa5d6f366d..734ac977e3 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
@@ -33,11 +33,11 @@ public class nccl extends org.bytedeco.cuda.presets.nccl {
 // #endif
 
 public static final int NCCL_MAJOR = 2;
-public static final int NCCL_MINOR = 22;
-public static final int NCCL_PATCH = 3;
+public static final int NCCL_MINOR = 23;
+public static final int NCCL_PATCH = 4;
 public static final String NCCL_SUFFIX = "";
 
-public static final int NCCL_VERSION_CODE = 22203;
+public static final int NCCL_VERSION_CODE = 22304;
 // #define NCCL_VERSION(X,Y,Z) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z))
 
 // #ifdef __cplusplus
@@ -194,6 +194,15 @@ public class nccl extends org.bytedeco.cuda.presets.nccl {
 public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @ByPtrPtr ncclComm newcomm, ncclConfig_t config);
 public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @Cast("ncclComm**") PointerPointer newcomm, ncclConfig_t config);
 
+/* Creates a new communicator (multi thread/process version), similar to ncclCommInitRankConfig.
+ * Allows to use more than one ncclUniqueId (up to one per rank), indicated by nId, to accelerate the init operation.
+ * The number of ncclUniqueIds and their order must be the same for every rank.
+ */
+public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+
 /* Returns a string for each error code. */
 public static native @Cast("const char*") BytePointer ncclGetErrorString(@Cast("ncclResult_t") int result);
 public static native @Cast("const char*") BytePointer pncclGetErrorString(@Cast("ncclResult_t") int result);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
index b69843630e..116d7f9aef 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
@@ -88,7 +88,7 @@ public class nppc extends org.bytedeco.cuda.presets.nppc {
 /**
  * Build version
  */
-public static final int NPP_VER_BUILD = 23;
+public static final int NPP_VER_BUILD = 54;
 
 /**
  * Full version
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
index 0a17fa078e..5a3a89a2e6 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
@@ -88,7 +88,7 @@ public class nvjpeg extends org.bytedeco.cuda.presets.nvjpeg {
 public static final int NVJPEG_VER_MAJOR = 12;
 public static final int NVJPEG_VER_MINOR = 3;
 public static final int NVJPEG_VER_PATCH = 3;
-public static final int NVJPEG_VER_BUILD = 23;
+public static final int NVJPEG_VER_BUILD = 54;
 
 /* nvJPEG status enums, returned by nvJPEG API */
 /** enum nvjpegStatus_t */
diff --git a/nvcodec/README.md b/nvcodec/README.md
index fdf7a1ba99..f961919dfe 100644
--- a/nvcodec/README.md
+++ b/nvcodec/README.md
@@ -62,7 +62,7 @@ You can find more encoder and decoder samples in the [`samples`](samples) subdir
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
     </dependencies>
     <build>
diff --git a/nvcodec/platform/pom.xml b/nvcodec/platform/pom.xml
index b3f7b54c5e..b3dccbc6be 100644
--- a/nvcodec/platform/pom.xml
+++ b/nvcodec/platform/pom.xml
@@ -23,7 +23,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda-platform</artifactId>
-      <version>12.6-9.3-${project.parent.version}</version>
+      <version>12.6-9.5-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git a/nvcodec/pom.xml b/nvcodec/pom.xml
index 78f8677ad0..c0869ff538 100644
--- a/nvcodec/pom.xml
+++ b/nvcodec/pom.xml
@@ -18,7 +18,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda</artifactId>
-      <version>12.6-9.3-${project.parent.version}</version>
+      <version>12.6-9.5-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
@@ -44,7 +44,7 @@
           <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda</artifactId>
-            <version>12.6-9.3-${project.parent.version}</version>
+            <version>12.6-9.5-${project.parent.version}</version>
           </dependency>
         </dependencies>
         <executions>
diff --git a/nvcodec/samples/pom.xml b/nvcodec/samples/pom.xml
index 8766cdee1b..e8687e9d59 100644
--- a/nvcodec/samples/pom.xml
+++ b/nvcodec/samples/pom.xml
@@ -23,7 +23,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
     </dependencies>
 </project>
diff --git a/nvcodec/samples/simple/pom.xml b/nvcodec/samples/simple/pom.xml
index 37581e84db..b3c98a37f9 100644
--- a/nvcodec/samples/simple/pom.xml
+++ b/nvcodec/samples/simple/pom.xml
@@ -19,7 +19,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
     </dependencies>
     <build>
diff --git a/onnxruntime/README.md b/onnxruntime/README.md
index 2167fdab1e..816a796f31 100644
--- a/onnxruntime/README.md
+++ b/onnxruntime/README.md
@@ -60,7 +60,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/onnxruntime/samples/pom.xml b/onnxruntime/samples/pom.xml
index 77b521a4f6..06a60167c4 100644
--- a/onnxruntime/samples/pom.xml
+++ b/onnxruntime/samples/pom.xml
@@ -26,7 +26,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/opencv/README.md b/opencv/README.md
index 72fcafdf05..5c17dba09e 100644
--- a/opencv/README.md
+++ b/opencv/README.md
@@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->
diff --git a/opencv/samples/pom.xml b/opencv/samples/pom.xml
index edfb74e1c5..587f109d2c 100644
--- a/opencv/samples/pom.xml
+++ b/opencv/samples/pom.xml
@@ -26,7 +26,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->
diff --git a/platform/pom.xml b/platform/pom.xml
index 02a54b050b..830ba19b12 100644
--- a/platform/pom.xml
+++ b/platform/pom.xml
@@ -272,7 +272,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda-platform</artifactId>
-      <version>12.6-9.3-${project.version}</version>
+      <version>12.6-9.5-${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
@@ -312,12 +312,12 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>tensorrt-platform</artifactId>
-      <version>10.3-${project.version}</version>
+      <version>10.5-${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>tritonserver-platform</artifactId>
-      <version>2.48.0-${project.version}</version>
+      <version>2.50.0-${project.version}</version>
     </dependency>
 <!--    <dependency>-->
 <!--      <groupId>org.bytedeco</groupId>-->
diff --git a/pytorch/README.md b/pytorch/README.md
index 220d9da081..5ecaba34d7 100644
--- a/pytorch/README.md
+++ b/pytorch/README.md
@@ -62,7 +62,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->
diff --git a/pytorch/pom.xml b/pytorch/pom.xml
index 5f697e12b5..575b759fa7 100644
--- a/pytorch/pom.xml
+++ b/pytorch/pom.xml
@@ -27,7 +27,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda</artifactId>
-      <version>12.6-9.3-${project.parent.version}</version>
+      <version>12.6-9.5-${project.parent.version}</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
@@ -52,7 +52,7 @@
           <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform</artifactId>
-            <version>12.6-9.3-${project.parent.version}</version>
+            <version>12.6-9.5-${project.parent.version}</version>
           </dependency>
           <dependency>
             <groupId>org.bytedeco</groupId>
diff --git a/pytorch/samples/pom.xml b/pytorch/samples/pom.xml
index 1ee5668173..97d4fb103a 100644
--- a/pytorch/samples/pom.xml
+++ b/pytorch/samples/pom.xml
@@ -26,7 +26,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->
diff --git a/tensorrt/README.md b/tensorrt/README.md
index 99eb8cbe7c..f255089e23 100644
--- a/tensorrt/README.md
+++ b/tensorrt/README.md
@@ -17,7 +17,7 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * TensorRT 10.3.0.26  https://developer.nvidia.com/tensorrt
+ * TensorRT 10.5.0.18  https://developer.nvidia.com/tensorrt
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -54,19 +54,19 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt-platform</artifactId>
-            <version>10.3-1.5.11-SNAPSHOT</version>
+            <version>10.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, NCCL, and TensorRT -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt-platform-redist</artifactId>
-            <version>10.3-1.5.11-SNAPSHOT</version>
+            <version>10.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/tensorrt/platform/pom.xml b/tensorrt/platform/pom.xml
index 8b458847cd..afc6e0dc55 100644
--- a/tensorrt/platform/pom.xml
+++ b/tensorrt/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tensorrt-platform</artifactId>
-  <version>10.3-${project.parent.version}</version>
+  <version>10.5-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for TensorRT</name>
 
   <properties>
@@ -23,7 +23,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda-platform</artifactId>
-      <version>12.6-9.3-${project.parent.version}</version>
+      <version>12.6-9.5-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git a/tensorrt/platform/redist/pom.xml b/tensorrt/platform/redist/pom.xml
index e0cfebbc47..cabae40357 100644
--- a/tensorrt/platform/redist/pom.xml
+++ b/tensorrt/platform/redist/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tensorrt-platform-redist</artifactId>
-  <version>10.3-${project.parent.version}</version>
+  <version>10.5-${project.parent.version}</version>
   <name>JavaCPP Presets Platform Redist for TensorRT</name>
 
   <properties>
diff --git a/tensorrt/pom.xml b/tensorrt/pom.xml
index 7a827cd556..da69643f07 100644
--- a/tensorrt/pom.xml
+++ b/tensorrt/pom.xml
@@ -11,14 +11,14 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tensorrt</artifactId>
-  <version>10.3-${project.parent.version}</version>
+  <version>10.5-${project.parent.version}</version>
   <name>JavaCPP Presets for TensorRT</name>
 
   <dependencies>
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>cuda</artifactId>
-      <version>12.6-9.3-${project.parent.version}</version>
+      <version>12.6-9.5-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
@@ -44,7 +44,7 @@
           <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda</artifactId>
-            <version>12.6-9.3-${project.parent.version}</version>
+            <version>12.6-9.5-${project.parent.version}</version>
           </dependency>
         </dependencies>
       </plugin>
diff --git a/tensorrt/samples/pom.xml b/tensorrt/samples/pom.xml
index 1e2d76f0a7..f7094fb21a 100644
--- a/tensorrt/samples/pom.xml
+++ b/tensorrt/samples/pom.xml
@@ -12,19 +12,19 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt-platform</artifactId>
-            <version>10.3-1.5.11-SNAPSHOT</version>
+            <version>10.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, NCCL, and TensorRT -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt-platform-redist</artifactId>
-            <version>10.3-1.5.11-SNAPSHOT</version>
+            <version>10.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
     </dependencies>
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
index 86b1490234..874a32baa8 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
@@ -51,11 +51,11 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer {
 /** TensorRT major version. */
 public static final int NV_TENSORRT_MAJOR = 10;
 /** TensorRT minor version. */
-public static final int NV_TENSORRT_MINOR = 3;
+public static final int NV_TENSORRT_MINOR = 5;
 /** TensorRT patch version. */
 public static final int NV_TENSORRT_PATCH = 0;
 /** TensorRT build number. */
-public static final int NV_TENSORRT_BUILD = 26;
+public static final int NV_TENSORRT_BUILD = 18;
 
 /** TensorRT LWS major version. */
 public static final int NV_TENSORRT_LWS_MAJOR = 0;
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
index 21d802657a..f27e1a3125 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
@@ -47,22 +47,27 @@ public class IBuilder extends INoCopy {
 
     /**
      *  \brief Determine whether the platform has fast native fp16.
+     * 
+     *  @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
      *  */
     
     
     //!
     //!
-    public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
+    //!
+    public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastFp16();
 
     /**
      *  \brief Determine whether the platform has fast native int8.
+     * 
+     *  @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
      *  */
     
     
     //!
     //!
     //!
-    public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
+    public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastInt8();
 
     /**
      *  \brief Get the maximum batch size DLA can support.
@@ -214,12 +219,15 @@ public class IBuilder extends INoCopy {
      *  */
     
     
+    //!
     //!
     //!
     public native @NoException(true) void reset();
 
     /**
      *  \brief Determine whether the platform has TF32 support.
+     * 
+     *  @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
      *  */
     
     
@@ -230,7 +238,7 @@ public class IBuilder extends INoCopy {
     //!
     //!
     //!
-    public native @Cast("bool") @NoException(true) boolean platformHasTf32();
+    public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasTf32();
 
     /**
      *  \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
index ff6df4d439..6a0da55a28 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
@@ -1076,5 +1076,35 @@ public class IBuilderConfig extends INoCopy {
      * 
      *  @see IBuilderConfig::setRuntimePlatform()
      *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
     public native @NoException(true) RuntimePlatform getRuntimePlatform();
+
+    /**
+     *  \brief Set the maximum number of tactics to time when there is a choice of tactics.
+     * 
+     *  This function controls the number of tactics timed when there are multiple tactics to choose from.
+     * 
+     *  @see getMaxNbTactics()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMaxNbTactics(int maxNbTactics);
+
+    /**
+     *  \brief Query the maximum number of tactics timed when there is a choice.
+     * 
+     *  By default the value is -1, indicating TensorRT can determine the number of tactics based on its own heuristic.
+     * 
+     *  @see setMaxNbTactics()
+     *  */
+    public native @NoException(true) int getMaxNbTactics();
 }
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
index c13fb3e733..cf28b124bc 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
@@ -24,8 +24,8 @@
  * 
  *  \brief A convolution layer in a network definition.
  * 
- *  This layer performs a correlation operation between 3-dimensional filter with a 4-dimensional tensor to produce
- *  another 4-dimensional tensor.
+ *  This layer performs a correlation operation between 3 or 4 dimensional filter with a 4 or 5 dimensional tensor to
+ *  produce another 4 or 5 dimensional tensor.
  * 
  *  An optional bias argument is supported, which adds a per-channel constant to each value in the output.
  * 
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
index 7f67dfa1f9..5e010fb8b4 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
@@ -37,7 +37,6 @@
  *      * GatherMode::kDEFAULT: s = q + r - 1 - nbElementwiseDims
  *      * GatherMode::kND:      s = q + r - indices.d[q-1] - 1 - nbElementwiseDims
  *      * GatherMode::kELEMENT: s = q = r.
- *  The output can be a shape tensor only if the mode is GatherMode::kDEFAULT.
  * 
  *  The dimensions of the output likewise depends on the mode:
  * 
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
index b45379209e..496b7a76da 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
@@ -64,6 +64,7 @@ public class IIfConditional extends INoCopy {
     //!
     //!
     //!
+    //!
     public native @NoException(true) IConditionLayer setCondition(@ByRef ITensor condition);
 
     /**
@@ -75,6 +76,8 @@ public class IIfConditional extends INoCopy {
      *  Each output layer of an IIfConditional represents a single output of either the true-subgraph or the
      *  false-subgraph of an IIfConditional, depending on which subgraph was executed.
      * 
+     *  The shapes of the two tensors must be equal unless the condition is a build-time constant.
+     * 
      *  @see IIfConditionalOutputLayer
      *  */
     
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
index a3f4925524..4de57a9ba9 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
@@ -24,7 +24,9 @@
  * 
  *  \brief This layer represents an output of an IIfConditional.
  * 
- *  An IIfConditionalOutputLayer has exactly one output.
+ *  An IIfConditionalOutputLayer has two inputs and one output.
+ * 
+ *  @see IIfConditional::addOutput
  *  */
 @Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
 public class IIfConditionalOutputLayer extends IIfConditionalBoundaryLayer {
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
index be4006dce2..d082d0d05c 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
@@ -100,7 +100,7 @@ public class ILoopOutputLayer extends ILoopBoundaryLayer {
     /** The indices in the kCONCATENATE or kREVERSE cases are as follows:
     /**
     /** - 0: Contribution to the output tensor.  The contribution must come from inside the loop.
-    /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D Int32 or Int64 shape tensor.
+    /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D shape tensor of type Int32 or Int64.
     /**
     /** If this function is called with the value 1, then the function getNbInputs() changes
     /** from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
index 02cc0f8a0c..e6e24b0ec8 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
@@ -1037,7 +1037,7 @@ public class INetworkDefinition extends INoCopy {
      * 
      *  @see IParametricReLULayer
      * 
-     *  \warning Int32 tensors are not valid input tensors.
+     *  \warning Tensors of type Int32, Int64, Bool, or UInt8 are not allowed as inputs.
      * 
      *  @return The new parametric ReLU layer, or nullptr if it could not be created.
      *  */
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
index f337837446..5d8fd66a95 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
@@ -28,8 +28,8 @@
  *  Output, and an axis attribute.
  *  * Indices is an Int32 tensor that determines which locations in Output to set as on_value.
  *  * Values is a two-element (rank=1) tensor that consists of [off_value, on_value]
- *  * Depth is an Int32 shape tensor of rank 0, which contains the depth (number of classes) of the one-hot encoding.
- *    The depth tensor must be a build-time constant, and its value should be positive.
+ *  * Depth is an 0D tensor of type Int32 or Int64, which contains the depth (number of classes) of the one-hot encoding.
+ *    The depth tensor must be a positive build-time constant.
  *  * Output is a tensor with rank = rank(indices)+1, where the added dimension contains the one-hot encoding.
  *    The data types of Output is equal to the Values data type.
  *  * Axis is a scalar specifying to which dimension of the output one-hot encoding is added.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
index 4afc4105e5..a29c09124f 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
@@ -209,7 +209,7 @@ public class IResizeLayer extends ILayer {
      *  The indices in the dynamic case are as follows:
      * 
      *  - 0: Execution tensor to be resized.
-     *  - 1: The output dimensions, as a 1D Int32 shape tensor.
+     *  - 1: The output dimensions, as a 1D tensor of type Int32 or Int64.
      * 
      *  If this function is called with the value 1, then the function getNbInputs() changes
      *  from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
index 53f4045e1a..53422e0d04 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
@@ -26,7 +26,7 @@
  *  This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
  *  and a second transpose operation. The dimension types of the output are those of the reshape dimension.
  * 
- *  The layer has an optional second input. If present, it must be a 1D Int32 shape tensor,
+ *  The layer has an optional second input. If present, it must be a 1D tensor of type Int32 or Int64,
  *  and the reshape dimensions are taken from it.
  * 
  *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
@@ -138,7 +138,7 @@ public class IShuffleLayer extends ILayer {
     /** The indices in the dynamic case are as follows:
     /**
     /** - 0: Data or Shape tensor to be shuffled.
-    /** - 1: The dimensions for the reshape operation, as a 1D Int32 shape tensor.
+    /** - 1: The dimensions for the reshape operation, as a 1D tensor of type Int32 or Int64.
     /**
     /** If this function is called with the value 1, then the function getNbInputs() changes
     /** from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
index c270f9a7c9..e67903077e 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
@@ -34,7 +34,7 @@
  * 
  *  The slice layer selects for each dimension a start location from within the input tensor, and
  *  copies elements to the output tensor using the specified stride across the input tensor.
- *  Start, size, and stride tensors must be 1D Int32 shape tensors if not specified via Dims.
+ *  Start, size, and stride tensors must be 1D tensors of type Int32 or Int64 if not specified via Dims.
  * 
  *  An example of using slice on a tensor:
  *  input = {{0, 2, 4}, {1, 3, 5}}
@@ -72,10 +72,12 @@
  *  The following constraints must be satisfied to execute this layer on DLA:
  *  * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
  *  * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
- *  * sampleMode is kSTRICT_BOUNDS.
+ *  * sampleMode is kDEFAULT, kWRAP, or kFILL.
  *  * Strides are 1 for all dimensions.
- *  * Slicing is not performed on the first dimension
- *  * The input tensor has four dimensions
+ *  * Slicing is not performed on the first dimension.
+ *  * The input tensor has four dimensions.
+ *  * For kFILL sliceMode, the fill value input is a scalar output of an IConstantLayer with value 0 that is not
+ *    consumed by any other layer.
  * 
  *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
  *  */
@@ -233,15 +235,15 @@ public class ISliceLayer extends ILayer {
      *  The indices are as follows:
      * 
      *  - 0: Tensor to be sliced.
-     *  - 1: The start tensor to begin slicing, as a 1D Int32 shape tensor.
-     *  - 2: The size tensor of the resulting slice, as a 1D Int32 shape tensor.
-     *  - 3: The stride of the slicing operation, as a 1D Int32 shape tensor.
+     *  - 1: The start tensor to begin slicing, as a 1D tensor of type Int32 or Int64.
+     *  - 2: The size tensor of the resulting slice, as a 1D tensor of type Int32 or Int64.
+     *  - 3: The stride of the slicing operation, as a 1D tensor of type Int32 or Int64.
      *  - 4: Value for the kFILL slice mode. The fill value data type should either be the same
      *       or be implicitly convertible to the input data type.
      *       Implicit data type conversion is supported among kFLOAT, kHALF, kINT8, and kFP8 data types.
      *       This input is disallowed for other modes.
      *  - 5: The axes tensor indicating the corresponding axes that start, size, and stride
-     *       should apply to, as a 1D Int32 shape tensor. Negative values for axes
+     *       should apply to, as a 1D tensor or type Int32 or Int64. Negative values for axes
      *       indicate indexing from the back of the input tensor. Values must be unique and be
      *       within the interval of [-rank(input), rank(input)-1].
      * 
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
index e3ae1a877f..b8aa86cdfe 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
@@ -102,4 +102,6 @@ public class VBuilderConfig extends VRoot {
     public native @NoException(true) void setRuntimePlatform(RuntimePlatform runtimePlatform);
     public native @NoException(true) void setRuntimePlatform(@Cast("nvinfer1::RuntimePlatform") int runtimePlatform);
     public native @NoException(true) RuntimePlatform getRuntimePlatform();
+    public native @NoException(true) void setMaxNbTactics(int maxTactics);
+    public native @NoException(true) int getMaxNbTactics();
 }
diff --git a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
index 7d1fa17238..631ed304ab 100644
--- a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
+++ b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
@@ -51,7 +51,7 @@
                        "NvInferLegacyDims.h", "NvInferRuntime.h", "NvInfer.h", "NvInferImpl.h"/*, "NvUtils.h"*/},
             exclude = "NvInferRuntimeBase.h",
             link = "nvinfer@.10",
-            preload = "nvinfer_builder_resource@.10.3.0"
+            preload = "nvinfer_builder_resource@.10.5.0"
         ),
         @Platform(
             value = "linux-arm64",
diff --git a/tritonserver/README.md b/tritonserver/README.md
index f3cc656be1..3c88de0308 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -23,7 +23,7 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * Triton Inference Server 2.48.0  https://github.com/triton-inference-server/server
+ * Triton Inference Server 2.50.0  https://github.com/triton-inference-server/server
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -51,9 +51,9 @@ This sample intends to show how to call the Java-mapped C API of Triton to execu
 
  1. Get the source code of Triton Inference Server to prepare the model repository:
 ```bash
- $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.48.0.tar.gz
- $ tar zxvf v2.48.0.tar.gz
- $ cd server-2.48.0/docs/examples/model_repository
+ $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.50.0.tar.gz
+ $ tar zxvf v2.50.0.tar.gz
+ $ cd server-2.50.0/docs/examples/model_repository
  $ mkdir models
  $ cd models; cp -a ../simple .
 ```
@@ -61,7 +61,7 @@ Now, this `models` directory will be our model repository.
 
  2. Start the Docker container to run the sample (assuming we are under the `models` directory created above):
 ```bash
- $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.07-py3 bash
+ $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.09-py3 bash
  $ apt update
  $ apt install -y openjdk-11-jdk
  $ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz
diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
index f438e39cad..611be9089e 100755
--- a/tritonserver/cppbuild.sh
+++ b/tritonserver/cppbuild.sh
@@ -11,9 +11,9 @@ INCLUDE_DEVELOPER_TOOLS_SERVER=${INCLUDE_DEVELOPER_TOOLS_SERVER:=1}
 
 if [[ ! -f "/opt/tritonserver/include/triton/developer_tools/generic_server_wrapper.h" ]] && [[ ! -f "/opt/tritonserver/lib/libtritondevelopertoolsserver.so" ]] && [[ ${INCLUDE_DEVELOPER_TOOLS_SERVER} -ne 0 ]]; then
     TOOLS_BRANCH=${TOOLS_BRANCH:="https://github.com/triton-inference-server/developer_tools.git"}
-    TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.07"}
+    TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.09"}
     TRITON_CORE_REPO=${TRITON_CORE_REPO:="https://github.com/triton-inference-server/core.git"}
-    TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.07"}
+    TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.09"}
     TRITON_HOME="/opt/tritonserver"
     BUILD_HOME="$PWD"/tritonbuild
     mkdir -p ${BUILD_HOME} && cd ${BUILD_HOME}
diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
index d83b464263..82851eac3d 100644
--- a/tritonserver/platform/pom.xml
+++ b/tritonserver/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform</artifactId>
-  <version>2.48.0-${project.parent.version}</version>
+  <version>2.50.0-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for Triton Inference Server</name>
 
   <properties>
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
index 8e138ae38b..638848272a 100644
--- a/tritonserver/platform/redist/pom.xml
+++ b/tritonserver/platform/redist/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform-redist</artifactId>
-  <version>2.48.0-${project.parent.version}</version>
+  <version>2.50.0-${project.parent.version}</version>
   <name>JavaCPP Presets Platform Redist for Triton Inference Server</name>
 
   <properties>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index 95a6320848..5c6b326cf9 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -11,7 +11,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver</artifactId>
-  <version>2.48.0-${project.parent.version}</version>
+  <version>2.50.0-${project.parent.version}</version>
   <name>JavaCPP Presets for Triton Inference Server</name>
 
   <dependencies>
diff --git a/tritonserver/samples/simple/pom.xml b/tritonserver/samples/simple/pom.xml
index 13e9fa7fc4..817cab07f5 100644
--- a/tritonserver/samples/simple/pom.xml
+++ b/tritonserver/samples/simple/pom.xml
@@ -12,7 +12,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tritonserver-platform</artifactId>
-            <version>2.48.0-1.5.11-SNAPSHOT</version>
+            <version>2.50.0-1.5.11-SNAPSHOT</version>
             <classifier>shaded</classifier>
         </dependency>
     </dependencies>
diff --git a/tritonserver/samples/simplecpp/pom.xml b/tritonserver/samples/simplecpp/pom.xml
index 5a5ebb7d4e..fa631c00b0 100644
--- a/tritonserver/samples/simplecpp/pom.xml
+++ b/tritonserver/samples/simplecpp/pom.xml
@@ -12,7 +12,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tritonserver-platform</artifactId>
-            <version>2.48.0-1.5.11-SNAPSHOT</version>
+            <version>2.50.0-1.5.11-SNAPSHOT</version>
             <classifier>shaded</classifier>
         </dependency>
         <dependency>
diff --git a/tritonserver/samples/unsupported/pom.xml b/tritonserver/samples/unsupported/pom.xml
index 7145be0246..b3a891ddcd 100644
--- a/tritonserver/samples/unsupported/pom.xml
+++ b/tritonserver/samples/unsupported/pom.xml
@@ -13,17 +13,17 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt-platform</artifactId>
-            <version>10.3-1.5.11-SNAPSHOT</version>
+            <version>10.5-1.5.11-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tritonserver-platform</artifactId>
-            <version>2.48.0-1.5.11-SNAPSHOT</version>
+            <version>2.50.0-1.5.11-SNAPSHOT</version>
             <classifier>shaded</classifier>
         </dependency>
     </dependencies>
diff --git a/tvm/README.md b/tvm/README.md
index d40568e7c1..2ae70e5630 100644
--- a/tvm/README.md
+++ b/tvm/README.md
@@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->
diff --git a/tvm/samples/pom.xml b/tvm/samples/pom.xml
index 727d14f5a3..1299336d26 100644
--- a/tvm/samples/pom.xml
+++ b/tvm/samples/pom.xml
@@ -26,7 +26,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>12.6-9.3-1.5.11-SNAPSHOT</version>
+            <version>12.6-9.5-1.5.11-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled full version of MKL -->