diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml
index ce2cb1e142..14b944e8f2 100644
--- a/.github/actions/deploy-ubuntu/action.yml
+++ b/.github/actions/deploy-ubuntu/action.yml
@@ -41,18 +41,18 @@ runs:
export ARCH=arm64
export ARCH_CUDA=sbsa
export PREFIX=aarch64-linux-gnu
- export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.aarch64.rpm
- export CUDNN=cuda-12-9.3.0.75-1.aarch64
- export NCCL=2.22.3-1+cuda12.5.aarch64
- export NVCOMP=nvcomp-linux-sbsa-4.0.0-cuda12.5
+ export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.aarch64.rpm
+ export CUDNN=cuda-12-9.5.1.17-1.aarch64
+ export NCCL=2.23.4-1+cuda12.6.aarch64
+ export NVCOMP=nvcomp-linux-sbsa-4.0.1-cuda12.x
export USERLAND_BUILDME="buildme --aarch64"
elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
export ARCH=ppc64el
export ARCH_CUDA=ppc64le
export PREFIX=powerpc64le-linux-gnu
- export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.ppc64le.rpm
- export CUDNN=cuda-12-9.3.0.75-1.ppc64le
- export NCCL=2.22.3-1+cuda12.5.ppc64le
+ export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.ppc64le.rpm
+ export CUDNN=cuda-12-9.5.1.17-1.ppc64le
+ export NCCL=2.23.4-1+cuda12.6.ppc64le
elif [[ "$CI_DEPLOY_PLATFORM" == "linux-riscv64" ]]; then
export ARCH=riscv64
export PREFIX=riscv64-linux-gnu
@@ -63,10 +63,10 @@ runs:
export ARCH=amd64
export ARCH_CUDA=x86_64
export PREFIX=x86_64-linux-gnu
- export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.x86_64.rpm
- export CUDNN=cuda-12-9.3.0.75-1.x86_64
- export NCCL=2.22.3-1+cuda12.5.x86_64
- export NVCOMP=nvcomp-linux-x86_64-4.0.0-cuda12.5
+ export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.x86_64.rpm
+ export CUDNN=cuda-12-9.5.1.17-1.x86_64
+ export NCCL=2.23.4-1+cuda12.6.x86_64
+ export NVCOMP=nvcomp-linux-x86_64-4.0.1-cuda12.x
fi
echo "ARCH=$ARCH" >> $GITHUB_ENV
echo "PREFIX=$PREFIX" >> $GITHUB_ENV
@@ -165,7 +165,7 @@ runs:
if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
echo Installing CUDA, cuDNN, nvCOMP, etc
- curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/$CUDA
+ curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/$CUDA
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-$CUDNN.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-devel-$CUDNN.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libnccl-$NCCL.rpm
@@ -183,7 +183,7 @@ runs:
for f in /usr/local/cuda/lib64/libcudnn*so.9.*; do $SUDO ln -sf $f ${f:0:${#f}-4}; $SUDO ln -sf $f ${f:0:${#f}-6}; done
if [[ -n ${NVCOMP:-} ]]; then
- curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/$NVCOMP.tar.gz
+ curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/$NVCOMP.tar.gz
$SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=1 lib/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=2 nvcomp/lib/
$SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=1 include/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=2 nvcomp/include/
rm -f $NVCOMP.tar.gz
@@ -213,16 +213,16 @@ runs:
if [[ "$CI_DEPLOY_PLATFORM" == "linux-arm64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
echo Installing TensorRT
# python3 -m gdown 1LZRCv4ZAGiDQAu4pvADJIGntq4cGl5tU
- curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz
- $SUDO tar -hxvf TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz -C /usr/local/
+ curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz
+ $SUDO tar -hxvf TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/
$SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
fi
if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
echo Installing TensorRT
# python3 -m gdown 1dVhD-DEYY42QbZe1GXl-vxe3k6KqWGsL
- curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz
- $SUDO tar -hxvf TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz -C /usr/local/
+ curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz
+ $SUDO tar -hxvf TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/
$SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
fi
diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml
index 3783ecc388..1869894efa 100644
--- a/.github/actions/deploy-windows/action.yml
+++ b/.github/actions/deploy-windows/action.yml
@@ -99,22 +99,22 @@ runs:
if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" (
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
- curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/cuda_12.6.0_560.76_windows.exe
- curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip
+ curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.94_windows.exe
+ curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
- curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/nvcomp-windows-x86_64-4.0.0-cuda12.5.zip
+ curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
cuda_11.8.0_522.06_windows.exe -s
bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
- cuda_12.6.0_560.76_windows.exe -s
+ cuda_12.6.2_560.94_windows.exe -s
bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old' 'C:/Program Files/NVIDIA Corporation/NvToolsExt'"
bash -c "ls 'C:/Program Files/NVIDIA Corporation/NvToolsExt'"
- unzip cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip
+ unzip cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
unzip zlib123dllx64.zip
- unzip nvcomp-windows-x86_64-4.0.0-cuda12.5.zip
- move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
- move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
- move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
+ unzip nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
+ move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
+ move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
+ move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
move nvcomp\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
move nvcomp\include\device "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
@@ -144,9 +144,9 @@ runs:
if "%CI_DEPLOY_MODULE%"=="tensorrt" (
echo Installing TensorRT
rem python -m gdown 1GfmJ1BKbacLpUU-0i_mGu0sjrAS0Xzzi
- curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip
- unzip TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip
- move TensorRT-10.3.0.26 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
+ curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
+ unzip TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
+ move TensorRT-10.5.0.18 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
)
if "%CI_DEPLOY_MODULE%"=="mkl" (
@@ -221,7 +221,7 @@ runs:
set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
set "CUDA_PATH_V12_6=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\libnvvp;%PATH%"
- echo CUDA Version 12.6.0>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt"
+ echo CUDA Version 12.6.2>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt"
)
set "CCACHE_DIR=%USERPROFILE%\ccache"
set "PATH=C:\hostedtoolcache\windows\Python\3.9.13\x64;C:\msys64\%MSYSTEM%\bin;C:\msys64\usr\bin;%ProgramFiles%\apache-maven-3.6.3\bin;%PATH%"
diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml
index d04f7b44a6..9c1cfa0c28 100644
--- a/.github/workflows/tritonserver.yml
+++ b/.github/workflows/tritonserver.yml
@@ -19,6 +19,6 @@ env:
jobs:
linux-x86_64:
runs-on: ubuntu-20.04
- container: nvcr.io/nvidia/tritonserver:24.07-tf2-python-py3
+ container: nvcr.io/nvidia/tritonserver:24.09-tf2-python-py3
steps:
- uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5a91468477..78f7f723aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,7 @@
* Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481))
* Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472))
* Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475))
- * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.0, cuDNN 9.3.0, NCCL 2.22.3, nvCOMP 4.0.0, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.3.0.26, Triton Inference Server 2.48.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
+ * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.0.1, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.50.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
### January 29, 2024 version 1.5.10
* Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463))
diff --git a/README.md b/README.md
index a1e3e768d4..8f477741dc 100644
--- a/README.md
+++ b/README.md
@@ -217,8 +217,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
* Caffe 1.0 https://github.com/BVLC/caffe
* OpenPose 1.7.0 https://github.com/CMU-Perceptual-Computing-Lab/openpose
* CUDA 12.6.x https://developer.nvidia.com/cuda-downloads
- * cuDNN 9.3.x https://developer.nvidia.com/cudnn
- * NCCL 2.22.x https://developer.nvidia.com/nccl
+ * cuDNN 9.5.x https://developer.nvidia.com/cudnn
+ * NCCL 2.23.x https://developer.nvidia.com/nccl
* nvCOMP 4.0.x https://developer.nvidia.com/nvcomp
* NVIDIA Video Codec SDK 12.2.x https://developer.nvidia.com/nvidia-video-codec-sdk
* OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader
@@ -227,8 +227,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
* SentencePiece 0.2.0 https://github.com/google/sentencepiece
* TensorFlow 1.15.x https://github.com/tensorflow/tensorflow
* TensorFlow Lite 2.18.x https://github.com/tensorflow/tensorflow
- * TensorRT 10.3.x https://developer.nvidia.com/tensorrt
- * Triton Inference Server 2.48.x https://developer.nvidia.com/nvidia-triton-inference-server
+ * TensorRT 10.5.x https://developer.nvidia.com/tensorrt
+ * Triton Inference Server 2.50.x https://developer.nvidia.com/nvidia-triton-inference-server
* The Arcade Learning Environment 0.8.x https://github.com/mgbellemare/Arcade-Learning-Environment
* DepthAI 2.24.x https://github.com/luxonis/depthai-core
* ONNX 1.17.x https://github.com/onnx/onnx
diff --git a/cuda/README.md b/cuda/README.md
index a969df66ec..598d815f7b 100644
--- a/cuda/README.md
+++ b/cuda/README.md
@@ -25,10 +25,10 @@ Introduction
------------
This directory contains the JavaCPP Presets module for:
- * CUDA 12.6.0 https://developer.nvidia.com/cuda-zone
- * cuDNN 9.3.0 https://developer.nvidia.com/cudnn
- * NCCL 2.22.3 https://developer.nvidia.com/nccl
- * nvCOMP 4.0.0 https://developer.nvidia.com/nvcomp
+ * CUDA 12.6.2 https://developer.nvidia.com/cuda-zone
+ * cuDNN 9.5.1 https://developer.nvidia.com/cudnn
+ * NCCL 2.23.4 https://developer.nvidia.com/nccl
+ * nvCOMP 4.0.1 https://developer.nvidia.com/nvcomp
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
@@ -67,14 +67,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
cuda-platform
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/cuda/platform/pom.xml b/cuda/platform/pom.xml
index f374c4fc33..b84a14f18a 100644
--- a/cuda/platform/pom.xml
+++ b/cuda/platform/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
cuda-platform
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
JavaCPP Presets Platform for CUDA
diff --git a/cuda/platform/redist/pom.xml b/cuda/platform/redist/pom.xml
index 7885ff8e03..056aa0ed30 100644
--- a/cuda/platform/redist/pom.xml
+++ b/cuda/platform/redist/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
JavaCPP Presets Platform Redist for CUDA
diff --git a/cuda/pom.xml b/cuda/pom.xml
index d0bb5940d5..48401ddffe 100644
--- a/cuda/pom.xml
+++ b/cuda/pom.xml
@@ -11,7 +11,7 @@
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
JavaCPP Presets for CUDA
diff --git a/cuda/samples/pom.xml b/cuda/samples/pom.xml
index 0edae05bfd..8a9cc92880 100644
--- a/cuda/samples/pom.xml
+++ b/cuda/samples/pom.xml
@@ -12,14 +12,14 @@
org.bytedeco
cuda-platform
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
index 6be405416e..d672e808b2 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java
@@ -13,7 +13,7 @@
import static org.bytedeco.cuda.global.cublas.*;
-/** Results structure used by cublasLtMatmulGetAlgo.
+/** Results structure used by cublasLtMatmulAlgoGetHeuristic
*
* Holds returned configured algo descriptor and its runtime properties.
*/
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
index 7531cf23df..b0e5b26c98 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java
@@ -16,6 +16,8 @@
/**
* \brief The activity record providing detailed information for a marker.
*
+ * User must enable CUPTI_ACTIVITY_KIND_MARKER as well
+ * to get records for marker data.
* The marker data contains color, payload, and category.
* (CUPTI_ACTIVITY_KIND_MARKER_DATA).
*/
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
index 0e79317c8d..533a9e0407 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java
@@ -13,6 +13,7 @@
import static org.bytedeco.cuda.global.cupti.*;
+
/**
* \brief Data passed into a runtime or driver API callback function.
*
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
index 04a7bfcb98..8faa8693ef 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
@@ -103,8 +103,8 @@ public class cublas extends org.bytedeco.cuda.presets.cublas {
public static final int CUBLAS_VER_MAJOR = 12;
public static final int CUBLAS_VER_MINOR = 6;
-public static final int CUBLAS_VER_PATCH = 0;
-public static final int CUBLAS_VER_BUILD = 22;
+public static final int CUBLAS_VER_PATCH = 3;
+public static final int CUBLAS_VER_BUILD = 3;
public static final int CUBLAS_VERSION = (CUBLAS_VER_MAJOR * 10000 + CUBLAS_VER_MINOR * 100 + CUBLAS_VER_PATCH);
/* CUBLAS status type returns */
@@ -12713,7 +12713,602 @@ public static native void cublasZtrmm(@Cast("char") byte side,
CUBLASLT_MATMUL_TILE_128x96 = 33,
CUBLASLT_MATMUL_TILE_32x256 = 34,
CUBLASLT_MATMUL_TILE_256x32 = 35,
- CUBLASLT_MATMUL_TILE_END = 36;
+ CUBLASLT_MATMUL_TILE_8x128 = 36,
+ CUBLASLT_MATMUL_TILE_8x192 = 37,
+ CUBLASLT_MATMUL_TILE_8x256 = 38,
+ CUBLASLT_MATMUL_TILE_8x320 = 39,
+ CUBLASLT_MATMUL_TILE_8x384 = 40,
+ CUBLASLT_MATMUL_TILE_8x448 = 41,
+ CUBLASLT_MATMUL_TILE_8x512 = 42,
+ CUBLASLT_MATMUL_TILE_8x576 = 43,
+ CUBLASLT_MATMUL_TILE_8x640 = 44,
+ CUBLASLT_MATMUL_TILE_8x704 = 45,
+ CUBLASLT_MATMUL_TILE_8x768 = 46,
+ CUBLASLT_MATMUL_TILE_16x64 = 47,
+ CUBLASLT_MATMUL_TILE_16x128 = 48,
+ CUBLASLT_MATMUL_TILE_16x192 = 49,
+ CUBLASLT_MATMUL_TILE_16x256 = 50,
+ CUBLASLT_MATMUL_TILE_16x320 = 51,
+ CUBLASLT_MATMUL_TILE_16x384 = 52,
+ CUBLASLT_MATMUL_TILE_16x448 = 53,
+ CUBLASLT_MATMUL_TILE_16x512 = 54,
+ CUBLASLT_MATMUL_TILE_16x576 = 55,
+ CUBLASLT_MATMUL_TILE_16x640 = 56,
+ CUBLASLT_MATMUL_TILE_16x704 = 57,
+ CUBLASLT_MATMUL_TILE_16x768 = 58,
+ CUBLASLT_MATMUL_TILE_24x64 = 59,
+ CUBLASLT_MATMUL_TILE_24x128 = 60,
+ CUBLASLT_MATMUL_TILE_24x192 = 61,
+ CUBLASLT_MATMUL_TILE_24x256 = 62,
+ CUBLASLT_MATMUL_TILE_24x320 = 63,
+ CUBLASLT_MATMUL_TILE_24x384 = 64,
+ CUBLASLT_MATMUL_TILE_24x448 = 65,
+ CUBLASLT_MATMUL_TILE_24x512 = 66,
+ CUBLASLT_MATMUL_TILE_24x576 = 67,
+ CUBLASLT_MATMUL_TILE_24x640 = 68,
+ CUBLASLT_MATMUL_TILE_24x704 = 69,
+ CUBLASLT_MATMUL_TILE_24x768 = 70,
+ CUBLASLT_MATMUL_TILE_32x192 = 71,
+ CUBLASLT_MATMUL_TILE_32x320 = 72,
+ CUBLASLT_MATMUL_TILE_32x384 = 73,
+ CUBLASLT_MATMUL_TILE_32x448 = 74,
+ CUBLASLT_MATMUL_TILE_32x512 = 75,
+ CUBLASLT_MATMUL_TILE_32x576 = 76,
+ CUBLASLT_MATMUL_TILE_32x640 = 77,
+ CUBLASLT_MATMUL_TILE_32x704 = 78,
+ CUBLASLT_MATMUL_TILE_32x768 = 79,
+ CUBLASLT_MATMUL_TILE_40x64 = 80,
+ CUBLASLT_MATMUL_TILE_40x128 = 81,
+ CUBLASLT_MATMUL_TILE_40x192 = 82,
+ CUBLASLT_MATMUL_TILE_40x256 = 83,
+ CUBLASLT_MATMUL_TILE_40x320 = 84,
+ CUBLASLT_MATMUL_TILE_40x384 = 85,
+ CUBLASLT_MATMUL_TILE_40x448 = 86,
+ CUBLASLT_MATMUL_TILE_40x512 = 87,
+ CUBLASLT_MATMUL_TILE_40x576 = 88,
+ CUBLASLT_MATMUL_TILE_40x640 = 89,
+ CUBLASLT_MATMUL_TILE_40x704 = 90,
+ CUBLASLT_MATMUL_TILE_40x768 = 91,
+ CUBLASLT_MATMUL_TILE_48x64 = 92,
+ CUBLASLT_MATMUL_TILE_48x128 = 93,
+ CUBLASLT_MATMUL_TILE_48x192 = 94,
+ CUBLASLT_MATMUL_TILE_48x256 = 95,
+ CUBLASLT_MATMUL_TILE_48x320 = 96,
+ CUBLASLT_MATMUL_TILE_48x384 = 97,
+ CUBLASLT_MATMUL_TILE_48x448 = 98,
+ CUBLASLT_MATMUL_TILE_48x512 = 99,
+ CUBLASLT_MATMUL_TILE_48x576 = 100,
+ CUBLASLT_MATMUL_TILE_48x640 = 101,
+ CUBLASLT_MATMUL_TILE_48x704 = 102,
+ CUBLASLT_MATMUL_TILE_48x768 = 103,
+ CUBLASLT_MATMUL_TILE_56x64 = 104,
+ CUBLASLT_MATMUL_TILE_56x128 = 105,
+ CUBLASLT_MATMUL_TILE_56x192 = 106,
+ CUBLASLT_MATMUL_TILE_56x256 = 107,
+ CUBLASLT_MATMUL_TILE_56x320 = 108,
+ CUBLASLT_MATMUL_TILE_56x384 = 109,
+ CUBLASLT_MATMUL_TILE_56x448 = 110,
+ CUBLASLT_MATMUL_TILE_56x512 = 111,
+ CUBLASLT_MATMUL_TILE_56x576 = 112,
+ CUBLASLT_MATMUL_TILE_56x640 = 113,
+ CUBLASLT_MATMUL_TILE_56x704 = 114,
+ CUBLASLT_MATMUL_TILE_56x768 = 115,
+ CUBLASLT_MATMUL_TILE_64x192 = 116,
+ CUBLASLT_MATMUL_TILE_64x320 = 117,
+ CUBLASLT_MATMUL_TILE_64x384 = 118,
+ CUBLASLT_MATMUL_TILE_64x448 = 119,
+ CUBLASLT_MATMUL_TILE_64x576 = 120,
+ CUBLASLT_MATMUL_TILE_64x640 = 121,
+ CUBLASLT_MATMUL_TILE_64x704 = 122,
+ CUBLASLT_MATMUL_TILE_64x768 = 123,
+ CUBLASLT_MATMUL_TILE_72x64 = 124,
+ CUBLASLT_MATMUL_TILE_72x128 = 125,
+ CUBLASLT_MATMUL_TILE_72x192 = 126,
+ CUBLASLT_MATMUL_TILE_72x256 = 127,
+ CUBLASLT_MATMUL_TILE_72x320 = 128,
+ CUBLASLT_MATMUL_TILE_72x384 = 129,
+ CUBLASLT_MATMUL_TILE_72x448 = 130,
+ CUBLASLT_MATMUL_TILE_72x512 = 131,
+ CUBLASLT_MATMUL_TILE_72x576 = 132,
+ CUBLASLT_MATMUL_TILE_72x640 = 133,
+ CUBLASLT_MATMUL_TILE_80x64 = 134,
+ CUBLASLT_MATMUL_TILE_80x128 = 135,
+ CUBLASLT_MATMUL_TILE_80x192 = 136,
+ CUBLASLT_MATMUL_TILE_80x256 = 137,
+ CUBLASLT_MATMUL_TILE_80x320 = 138,
+ CUBLASLT_MATMUL_TILE_80x384 = 139,
+ CUBLASLT_MATMUL_TILE_80x448 = 140,
+ CUBLASLT_MATMUL_TILE_80x512 = 141,
+ CUBLASLT_MATMUL_TILE_80x576 = 142,
+ CUBLASLT_MATMUL_TILE_88x64 = 143,
+ CUBLASLT_MATMUL_TILE_88x128 = 144,
+ CUBLASLT_MATMUL_TILE_88x192 = 145,
+ CUBLASLT_MATMUL_TILE_88x256 = 146,
+ CUBLASLT_MATMUL_TILE_88x320 = 147,
+ CUBLASLT_MATMUL_TILE_88x384 = 148,
+ CUBLASLT_MATMUL_TILE_88x448 = 149,
+ CUBLASLT_MATMUL_TILE_88x512 = 150,
+ CUBLASLT_MATMUL_TILE_96x192 = 151,
+ CUBLASLT_MATMUL_TILE_96x256 = 152,
+ CUBLASLT_MATMUL_TILE_96x320 = 153,
+ CUBLASLT_MATMUL_TILE_96x384 = 154,
+ CUBLASLT_MATMUL_TILE_96x448 = 155,
+ CUBLASLT_MATMUL_TILE_96x512 = 156,
+ CUBLASLT_MATMUL_TILE_104x64 = 157,
+ CUBLASLT_MATMUL_TILE_104x128 = 158,
+ CUBLASLT_MATMUL_TILE_104x192 = 159,
+ CUBLASLT_MATMUL_TILE_104x256 = 160,
+ CUBLASLT_MATMUL_TILE_104x320 = 161,
+ CUBLASLT_MATMUL_TILE_104x384 = 162,
+ CUBLASLT_MATMUL_TILE_104x448 = 163,
+ CUBLASLT_MATMUL_TILE_112x64 = 164,
+ CUBLASLT_MATMUL_TILE_112x128 = 165,
+ CUBLASLT_MATMUL_TILE_112x192 = 166,
+ CUBLASLT_MATMUL_TILE_112x256 = 167,
+ CUBLASLT_MATMUL_TILE_112x320 = 168,
+ CUBLASLT_MATMUL_TILE_112x384 = 169,
+ CUBLASLT_MATMUL_TILE_120x64 = 170,
+ CUBLASLT_MATMUL_TILE_120x128 = 171,
+ CUBLASLT_MATMUL_TILE_120x192 = 172,
+ CUBLASLT_MATMUL_TILE_120x256 = 173,
+ CUBLASLT_MATMUL_TILE_120x320 = 174,
+ CUBLASLT_MATMUL_TILE_120x384 = 175,
+ CUBLASLT_MATMUL_TILE_128x320 = 176,
+ CUBLASLT_MATMUL_TILE_128x384 = 177,
+ CUBLASLT_MATMUL_TILE_136x64 = 178,
+ CUBLASLT_MATMUL_TILE_136x128 = 179,
+ CUBLASLT_MATMUL_TILE_136x192 = 180,
+ CUBLASLT_MATMUL_TILE_136x256 = 181,
+ CUBLASLT_MATMUL_TILE_136x320 = 182,
+ CUBLASLT_MATMUL_TILE_144x64 = 183,
+ CUBLASLT_MATMUL_TILE_144x128 = 184,
+ CUBLASLT_MATMUL_TILE_144x192 = 185,
+ CUBLASLT_MATMUL_TILE_144x256 = 186,
+ CUBLASLT_MATMUL_TILE_144x320 = 187,
+ CUBLASLT_MATMUL_TILE_152x64 = 188,
+ CUBLASLT_MATMUL_TILE_152x128 = 189,
+ CUBLASLT_MATMUL_TILE_152x192 = 190,
+ CUBLASLT_MATMUL_TILE_152x256 = 191,
+ CUBLASLT_MATMUL_TILE_152x320 = 192,
+ CUBLASLT_MATMUL_TILE_160x64 = 193,
+ CUBLASLT_MATMUL_TILE_160x192 = 194,
+ CUBLASLT_MATMUL_TILE_160x256 = 195,
+ CUBLASLT_MATMUL_TILE_168x64 = 196,
+ CUBLASLT_MATMUL_TILE_168x128 = 197,
+ CUBLASLT_MATMUL_TILE_168x192 = 198,
+ CUBLASLT_MATMUL_TILE_168x256 = 199,
+ CUBLASLT_MATMUL_TILE_176x64 = 200,
+ CUBLASLT_MATMUL_TILE_176x128 = 201,
+ CUBLASLT_MATMUL_TILE_176x192 = 202,
+ CUBLASLT_MATMUL_TILE_176x256 = 203,
+ CUBLASLT_MATMUL_TILE_184x64 = 204,
+ CUBLASLT_MATMUL_TILE_184x128 = 205,
+ CUBLASLT_MATMUL_TILE_184x192 = 206,
+ CUBLASLT_MATMUL_TILE_184x256 = 207,
+ CUBLASLT_MATMUL_TILE_192x64 = 208,
+ CUBLASLT_MATMUL_TILE_192x192 = 209,
+ CUBLASLT_MATMUL_TILE_192x256 = 210,
+ CUBLASLT_MATMUL_TILE_200x64 = 211,
+ CUBLASLT_MATMUL_TILE_200x128 = 212,
+ CUBLASLT_MATMUL_TILE_200x192 = 213,
+ CUBLASLT_MATMUL_TILE_208x64 = 214,
+ CUBLASLT_MATMUL_TILE_208x128 = 215,
+ CUBLASLT_MATMUL_TILE_208x192 = 216,
+ CUBLASLT_MATMUL_TILE_216x64 = 217,
+ CUBLASLT_MATMUL_TILE_216x128 = 218,
+ CUBLASLT_MATMUL_TILE_216x192 = 219,
+ CUBLASLT_MATMUL_TILE_224x64 = 220,
+ CUBLASLT_MATMUL_TILE_224x128 = 221,
+ CUBLASLT_MATMUL_TILE_224x192 = 222,
+ CUBLASLT_MATMUL_TILE_232x64 = 223,
+ CUBLASLT_MATMUL_TILE_232x128 = 224,
+ CUBLASLT_MATMUL_TILE_232x192 = 225,
+ CUBLASLT_MATMUL_TILE_240x64 = 226,
+ CUBLASLT_MATMUL_TILE_240x128 = 227,
+ CUBLASLT_MATMUL_TILE_240x192 = 228,
+ CUBLASLT_MATMUL_TILE_248x64 = 229,
+ CUBLASLT_MATMUL_TILE_248x128 = 230,
+ CUBLASLT_MATMUL_TILE_248x192 = 231,
+ CUBLASLT_MATMUL_TILE_256x192 = 232,
+ CUBLASLT_MATMUL_TILE_264x64 = 233,
+ CUBLASLT_MATMUL_TILE_264x128 = 234,
+ CUBLASLT_MATMUL_TILE_272x64 = 235,
+ CUBLASLT_MATMUL_TILE_272x128 = 236,
+ CUBLASLT_MATMUL_TILE_280x64 = 237,
+ CUBLASLT_MATMUL_TILE_280x128 = 238,
+ CUBLASLT_MATMUL_TILE_288x64 = 239,
+ CUBLASLT_MATMUL_TILE_288x128 = 240,
+ CUBLASLT_MATMUL_TILE_296x64 = 241,
+ CUBLASLT_MATMUL_TILE_296x128 = 242,
+ CUBLASLT_MATMUL_TILE_304x64 = 243,
+ CUBLASLT_MATMUL_TILE_304x128 = 244,
+ CUBLASLT_MATMUL_TILE_312x64 = 245,
+ CUBLASLT_MATMUL_TILE_312x128 = 246,
+ CUBLASLT_MATMUL_TILE_320x64 = 247,
+ CUBLASLT_MATMUL_TILE_320x128 = 248,
+ CUBLASLT_MATMUL_TILE_328x64 = 249,
+ CUBLASLT_MATMUL_TILE_328x128 = 250,
+ CUBLASLT_MATMUL_TILE_336x64 = 251,
+ CUBLASLT_MATMUL_TILE_336x128 = 252,
+ CUBLASLT_MATMUL_TILE_344x64 = 253,
+ CUBLASLT_MATMUL_TILE_344x128 = 254,
+ CUBLASLT_MATMUL_TILE_352x64 = 255,
+ CUBLASLT_MATMUL_TILE_352x128 = 256,
+ CUBLASLT_MATMUL_TILE_360x64 = 257,
+ CUBLASLT_MATMUL_TILE_360x128 = 258,
+ CUBLASLT_MATMUL_TILE_368x64 = 259,
+ CUBLASLT_MATMUL_TILE_368x128 = 260,
+ CUBLASLT_MATMUL_TILE_376x64 = 261,
+ CUBLASLT_MATMUL_TILE_376x128 = 262,
+ CUBLASLT_MATMUL_TILE_384x64 = 263,
+ CUBLASLT_MATMUL_TILE_384x128 = 264,
+ CUBLASLT_MATMUL_TILE_392x64 = 265,
+ CUBLASLT_MATMUL_TILE_400x64 = 266,
+ CUBLASLT_MATMUL_TILE_408x64 = 267,
+ CUBLASLT_MATMUL_TILE_416x64 = 268,
+ CUBLASLT_MATMUL_TILE_424x64 = 269,
+ CUBLASLT_MATMUL_TILE_432x64 = 270,
+ CUBLASLT_MATMUL_TILE_440x64 = 271,
+ CUBLASLT_MATMUL_TILE_448x64 = 272,
+ CUBLASLT_MATMUL_TILE_456x64 = 273,
+ CUBLASLT_MATMUL_TILE_464x64 = 274,
+ CUBLASLT_MATMUL_TILE_472x64 = 275,
+ CUBLASLT_MATMUL_TILE_480x64 = 276,
+ CUBLASLT_MATMUL_TILE_488x64 = 277,
+ CUBLASLT_MATMUL_TILE_496x64 = 278,
+ CUBLASLT_MATMUL_TILE_504x64 = 279,
+ CUBLASLT_MATMUL_TILE_520x64 = 280,
+ CUBLASLT_MATMUL_TILE_528x64 = 281,
+ CUBLASLT_MATMUL_TILE_536x64 = 282,
+ CUBLASLT_MATMUL_TILE_544x64 = 283,
+ CUBLASLT_MATMUL_TILE_552x64 = 284,
+ CUBLASLT_MATMUL_TILE_560x64 = 285,
+ CUBLASLT_MATMUL_TILE_568x64 = 286,
+ CUBLASLT_MATMUL_TILE_576x64 = 287,
+ CUBLASLT_MATMUL_TILE_584x64 = 288,
+ CUBLASLT_MATMUL_TILE_592x64 = 289,
+ CUBLASLT_MATMUL_TILE_600x64 = 290,
+ CUBLASLT_MATMUL_TILE_608x64 = 291,
+ CUBLASLT_MATMUL_TILE_616x64 = 292,
+ CUBLASLT_MATMUL_TILE_624x64 = 293,
+ CUBLASLT_MATMUL_TILE_632x64 = 294,
+ CUBLASLT_MATMUL_TILE_640x64 = 295,
+ CUBLASLT_MATMUL_TILE_648x64 = 296,
+ CUBLASLT_MATMUL_TILE_656x64 = 297,
+ CUBLASLT_MATMUL_TILE_664x64 = 298,
+ CUBLASLT_MATMUL_TILE_672x64 = 299,
+ CUBLASLT_MATMUL_TILE_680x64 = 300,
+ CUBLASLT_MATMUL_TILE_688x64 = 301,
+ CUBLASLT_MATMUL_TILE_696x64 = 302,
+ CUBLASLT_MATMUL_TILE_704x64 = 303,
+ CUBLASLT_MATMUL_TILE_712x64 = 304,
+ CUBLASLT_MATMUL_TILE_720x64 = 305,
+ CUBLASLT_MATMUL_TILE_728x64 = 306,
+ CUBLASLT_MATMUL_TILE_736x64 = 307,
+ CUBLASLT_MATMUL_TILE_744x64 = 308,
+ CUBLASLT_MATMUL_TILE_752x64 = 309,
+ CUBLASLT_MATMUL_TILE_760x64 = 310,
+ CUBLASLT_MATMUL_TILE_768x64 = 311,
+ CUBLASLT_MATMUL_TILE_64x16 = 312,
+ CUBLASLT_MATMUL_TILE_64x24 = 313,
+ CUBLASLT_MATMUL_TILE_64x40 = 314,
+ CUBLASLT_MATMUL_TILE_64x48 = 315,
+ CUBLASLT_MATMUL_TILE_64x56 = 316,
+ CUBLASLT_MATMUL_TILE_64x72 = 317,
+ CUBLASLT_MATMUL_TILE_64x80 = 318,
+ CUBLASLT_MATMUL_TILE_64x88 = 319,
+ CUBLASLT_MATMUL_TILE_64x104 = 320,
+ CUBLASLT_MATMUL_TILE_64x112 = 321,
+ CUBLASLT_MATMUL_TILE_64x120 = 322,
+ CUBLASLT_MATMUL_TILE_64x136 = 323,
+ CUBLASLT_MATMUL_TILE_64x144 = 324,
+ CUBLASLT_MATMUL_TILE_64x152 = 325,
+ CUBLASLT_MATMUL_TILE_64x160 = 326,
+ CUBLASLT_MATMUL_TILE_64x168 = 327,
+ CUBLASLT_MATMUL_TILE_64x176 = 328,
+ CUBLASLT_MATMUL_TILE_64x184 = 329,
+ CUBLASLT_MATMUL_TILE_64x200 = 330,
+ CUBLASLT_MATMUL_TILE_64x208 = 331,
+ CUBLASLT_MATMUL_TILE_64x216 = 332,
+ CUBLASLT_MATMUL_TILE_64x224 = 333,
+ CUBLASLT_MATMUL_TILE_64x232 = 334,
+ CUBLASLT_MATMUL_TILE_64x240 = 335,
+ CUBLASLT_MATMUL_TILE_64x248 = 336,
+ CUBLASLT_MATMUL_TILE_64x264 = 337,
+ CUBLASLT_MATMUL_TILE_64x272 = 338,
+ CUBLASLT_MATMUL_TILE_64x280 = 339,
+ CUBLASLT_MATMUL_TILE_64x288 = 340,
+ CUBLASLT_MATMUL_TILE_64x296 = 341,
+ CUBLASLT_MATMUL_TILE_64x304 = 342,
+ CUBLASLT_MATMUL_TILE_64x312 = 343,
+ CUBLASLT_MATMUL_TILE_64x328 = 344,
+ CUBLASLT_MATMUL_TILE_64x336 = 345,
+ CUBLASLT_MATMUL_TILE_64x344 = 346,
+ CUBLASLT_MATMUL_TILE_64x352 = 347,
+ CUBLASLT_MATMUL_TILE_64x360 = 348,
+ CUBLASLT_MATMUL_TILE_64x368 = 349,
+ CUBLASLT_MATMUL_TILE_64x376 = 350,
+ CUBLASLT_MATMUL_TILE_64x392 = 351,
+ CUBLASLT_MATMUL_TILE_64x400 = 352,
+ CUBLASLT_MATMUL_TILE_64x408 = 353,
+ CUBLASLT_MATMUL_TILE_64x416 = 354,
+ CUBLASLT_MATMUL_TILE_64x424 = 355,
+ CUBLASLT_MATMUL_TILE_64x432 = 356,
+ CUBLASLT_MATMUL_TILE_64x440 = 357,
+ CUBLASLT_MATMUL_TILE_64x456 = 358,
+ CUBLASLT_MATMUL_TILE_64x464 = 359,
+ CUBLASLT_MATMUL_TILE_64x472 = 360,
+ CUBLASLT_MATMUL_TILE_64x480 = 361,
+ CUBLASLT_MATMUL_TILE_64x488 = 362,
+ CUBLASLT_MATMUL_TILE_64x496 = 363,
+ CUBLASLT_MATMUL_TILE_64x504 = 364,
+ CUBLASLT_MATMUL_TILE_64x520 = 365,
+ CUBLASLT_MATMUL_TILE_64x528 = 366,
+ CUBLASLT_MATMUL_TILE_64x536 = 367,
+ CUBLASLT_MATMUL_TILE_64x544 = 368,
+ CUBLASLT_MATMUL_TILE_64x552 = 369,
+ CUBLASLT_MATMUL_TILE_64x560 = 370,
+ CUBLASLT_MATMUL_TILE_64x568 = 371,
+ CUBLASLT_MATMUL_TILE_64x584 = 372,
+ CUBLASLT_MATMUL_TILE_64x592 = 373,
+ CUBLASLT_MATMUL_TILE_64x600 = 374,
+ CUBLASLT_MATMUL_TILE_64x608 = 375,
+ CUBLASLT_MATMUL_TILE_64x616 = 376,
+ CUBLASLT_MATMUL_TILE_64x624 = 377,
+ CUBLASLT_MATMUL_TILE_64x632 = 378,
+ CUBLASLT_MATMUL_TILE_64x648 = 379,
+ CUBLASLT_MATMUL_TILE_64x656 = 380,
+ CUBLASLT_MATMUL_TILE_64x664 = 381,
+ CUBLASLT_MATMUL_TILE_64x672 = 382,
+ CUBLASLT_MATMUL_TILE_64x680 = 383,
+ CUBLASLT_MATMUL_TILE_64x688 = 384,
+ CUBLASLT_MATMUL_TILE_64x696 = 385,
+ CUBLASLT_MATMUL_TILE_64x712 = 386,
+ CUBLASLT_MATMUL_TILE_64x720 = 387,
+ CUBLASLT_MATMUL_TILE_64x728 = 388,
+ CUBLASLT_MATMUL_TILE_64x736 = 389,
+ CUBLASLT_MATMUL_TILE_64x744 = 390,
+ CUBLASLT_MATMUL_TILE_64x752 = 391,
+ CUBLASLT_MATMUL_TILE_64x760 = 392,
+ CUBLASLT_MATMUL_TILE_128x8 = 393,
+ CUBLASLT_MATMUL_TILE_128x16 = 394,
+ CUBLASLT_MATMUL_TILE_128x24 = 395,
+ CUBLASLT_MATMUL_TILE_128x40 = 396,
+ CUBLASLT_MATMUL_TILE_128x48 = 397,
+ CUBLASLT_MATMUL_TILE_128x56 = 398,
+ CUBLASLT_MATMUL_TILE_128x72 = 399,
+ CUBLASLT_MATMUL_TILE_128x80 = 400,
+ CUBLASLT_MATMUL_TILE_128x88 = 401,
+ CUBLASLT_MATMUL_TILE_128x104 = 402,
+ CUBLASLT_MATMUL_TILE_128x112 = 403,
+ CUBLASLT_MATMUL_TILE_128x120 = 404,
+ CUBLASLT_MATMUL_TILE_128x136 = 405,
+ CUBLASLT_MATMUL_TILE_128x144 = 406,
+ CUBLASLT_MATMUL_TILE_128x152 = 407,
+ CUBLASLT_MATMUL_TILE_128x168 = 408,
+ CUBLASLT_MATMUL_TILE_128x176 = 409,
+ CUBLASLT_MATMUL_TILE_128x184 = 410,
+ CUBLASLT_MATMUL_TILE_128x200 = 411,
+ CUBLASLT_MATMUL_TILE_128x208 = 412,
+ CUBLASLT_MATMUL_TILE_128x216 = 413,
+ CUBLASLT_MATMUL_TILE_128x224 = 414,
+ CUBLASLT_MATMUL_TILE_128x232 = 415,
+ CUBLASLT_MATMUL_TILE_128x240 = 416,
+ CUBLASLT_MATMUL_TILE_128x248 = 417,
+ CUBLASLT_MATMUL_TILE_128x264 = 418,
+ CUBLASLT_MATMUL_TILE_128x272 = 419,
+ CUBLASLT_MATMUL_TILE_128x280 = 420,
+ CUBLASLT_MATMUL_TILE_128x288 = 421,
+ CUBLASLT_MATMUL_TILE_128x296 = 422,
+ CUBLASLT_MATMUL_TILE_128x304 = 423,
+ CUBLASLT_MATMUL_TILE_128x312 = 424,
+ CUBLASLT_MATMUL_TILE_128x328 = 425,
+ CUBLASLT_MATMUL_TILE_128x336 = 426,
+ CUBLASLT_MATMUL_TILE_128x344 = 427,
+ CUBLASLT_MATMUL_TILE_128x352 = 428,
+ CUBLASLT_MATMUL_TILE_128x360 = 429,
+ CUBLASLT_MATMUL_TILE_128x368 = 430,
+ CUBLASLT_MATMUL_TILE_128x376 = 431,
+ CUBLASLT_MATMUL_TILE_128x392 = 432,
+ CUBLASLT_MATMUL_TILE_128x400 = 433,
+ CUBLASLT_MATMUL_TILE_128x408 = 434,
+ CUBLASLT_MATMUL_TILE_128x416 = 435,
+ CUBLASLT_MATMUL_TILE_128x424 = 436,
+ CUBLASLT_MATMUL_TILE_128x432 = 437,
+ CUBLASLT_MATMUL_TILE_128x440 = 438,
+ CUBLASLT_MATMUL_TILE_128x448 = 439,
+ CUBLASLT_MATMUL_TILE_128x456 = 440,
+ CUBLASLT_MATMUL_TILE_128x464 = 441,
+ CUBLASLT_MATMUL_TILE_128x472 = 442,
+ CUBLASLT_MATMUL_TILE_128x480 = 443,
+ CUBLASLT_MATMUL_TILE_128x488 = 444,
+ CUBLASLT_MATMUL_TILE_128x496 = 445,
+ CUBLASLT_MATMUL_TILE_128x504 = 446,
+ CUBLASLT_MATMUL_TILE_128x512 = 447,
+ CUBLASLT_MATMUL_TILE_192x8 = 448,
+ CUBLASLT_MATMUL_TILE_192x16 = 449,
+ CUBLASLT_MATMUL_TILE_192x24 = 450,
+ CUBLASLT_MATMUL_TILE_192x32 = 451,
+ CUBLASLT_MATMUL_TILE_192x40 = 452,
+ CUBLASLT_MATMUL_TILE_192x48 = 453,
+ CUBLASLT_MATMUL_TILE_192x56 = 454,
+ CUBLASLT_MATMUL_TILE_192x72 = 455,
+ CUBLASLT_MATMUL_TILE_192x80 = 456,
+ CUBLASLT_MATMUL_TILE_192x88 = 457,
+ CUBLASLT_MATMUL_TILE_192x96 = 458,
+ CUBLASLT_MATMUL_TILE_192x104 = 459,
+ CUBLASLT_MATMUL_TILE_192x112 = 460,
+ CUBLASLT_MATMUL_TILE_192x120 = 461,
+ CUBLASLT_MATMUL_TILE_192x136 = 462,
+ CUBLASLT_MATMUL_TILE_192x144 = 463,
+ CUBLASLT_MATMUL_TILE_192x152 = 464,
+ CUBLASLT_MATMUL_TILE_192x160 = 465,
+ CUBLASLT_MATMUL_TILE_192x168 = 466,
+ CUBLASLT_MATMUL_TILE_192x176 = 467,
+ CUBLASLT_MATMUL_TILE_192x184 = 468,
+ CUBLASLT_MATMUL_TILE_192x200 = 469,
+ CUBLASLT_MATMUL_TILE_192x208 = 470,
+ CUBLASLT_MATMUL_TILE_192x216 = 471,
+ CUBLASLT_MATMUL_TILE_192x224 = 472,
+ CUBLASLT_MATMUL_TILE_192x232 = 473,
+ CUBLASLT_MATMUL_TILE_192x240 = 474,
+ CUBLASLT_MATMUL_TILE_192x248 = 475,
+ CUBLASLT_MATMUL_TILE_192x264 = 476,
+ CUBLASLT_MATMUL_TILE_192x272 = 477,
+ CUBLASLT_MATMUL_TILE_192x280 = 478,
+ CUBLASLT_MATMUL_TILE_192x288 = 479,
+ CUBLASLT_MATMUL_TILE_192x296 = 480,
+ CUBLASLT_MATMUL_TILE_192x304 = 481,
+ CUBLASLT_MATMUL_TILE_192x312 = 482,
+ CUBLASLT_MATMUL_TILE_192x320 = 483,
+ CUBLASLT_MATMUL_TILE_192x328 = 484,
+ CUBLASLT_MATMUL_TILE_192x336 = 485,
+ CUBLASLT_MATMUL_TILE_256x8 = 486,
+ CUBLASLT_MATMUL_TILE_256x16 = 487,
+ CUBLASLT_MATMUL_TILE_256x24 = 488,
+ CUBLASLT_MATMUL_TILE_256x40 = 489,
+ CUBLASLT_MATMUL_TILE_256x48 = 490,
+ CUBLASLT_MATMUL_TILE_256x56 = 491,
+ CUBLASLT_MATMUL_TILE_256x72 = 492,
+ CUBLASLT_MATMUL_TILE_256x80 = 493,
+ CUBLASLT_MATMUL_TILE_256x88 = 494,
+ CUBLASLT_MATMUL_TILE_256x96 = 495,
+ CUBLASLT_MATMUL_TILE_256x104 = 496,
+ CUBLASLT_MATMUL_TILE_256x112 = 497,
+ CUBLASLT_MATMUL_TILE_256x120 = 498,
+ CUBLASLT_MATMUL_TILE_256x136 = 499,
+ CUBLASLT_MATMUL_TILE_256x144 = 500,
+ CUBLASLT_MATMUL_TILE_256x152 = 501,
+ CUBLASLT_MATMUL_TILE_256x160 = 502,
+ CUBLASLT_MATMUL_TILE_256x168 = 503,
+ CUBLASLT_MATMUL_TILE_256x176 = 504,
+ CUBLASLT_MATMUL_TILE_256x184 = 505,
+ CUBLASLT_MATMUL_TILE_256x200 = 506,
+ CUBLASLT_MATMUL_TILE_256x208 = 507,
+ CUBLASLT_MATMUL_TILE_256x216 = 508,
+ CUBLASLT_MATMUL_TILE_256x224 = 509,
+ CUBLASLT_MATMUL_TILE_256x232 = 510,
+ CUBLASLT_MATMUL_TILE_256x240 = 511,
+ CUBLASLT_MATMUL_TILE_256x248 = 512,
+ CUBLASLT_MATMUL_TILE_256x256 = 513,
+ CUBLASLT_MATMUL_TILE_320x8 = 514,
+ CUBLASLT_MATMUL_TILE_320x16 = 515,
+ CUBLASLT_MATMUL_TILE_320x24 = 516,
+ CUBLASLT_MATMUL_TILE_320x32 = 517,
+ CUBLASLT_MATMUL_TILE_320x40 = 518,
+ CUBLASLT_MATMUL_TILE_320x48 = 519,
+ CUBLASLT_MATMUL_TILE_320x56 = 520,
+ CUBLASLT_MATMUL_TILE_320x72 = 521,
+ CUBLASLT_MATMUL_TILE_320x80 = 522,
+ CUBLASLT_MATMUL_TILE_320x88 = 523,
+ CUBLASLT_MATMUL_TILE_320x96 = 524,
+ CUBLASLT_MATMUL_TILE_320x104 = 525,
+ CUBLASLT_MATMUL_TILE_320x112 = 526,
+ CUBLASLT_MATMUL_TILE_320x120 = 527,
+ CUBLASLT_MATMUL_TILE_320x136 = 528,
+ CUBLASLT_MATMUL_TILE_320x144 = 529,
+ CUBLASLT_MATMUL_TILE_320x152 = 530,
+ CUBLASLT_MATMUL_TILE_320x160 = 531,
+ CUBLASLT_MATMUL_TILE_320x168 = 532,
+ CUBLASLT_MATMUL_TILE_320x176 = 533,
+ CUBLASLT_MATMUL_TILE_320x184 = 534,
+ CUBLASLT_MATMUL_TILE_320x192 = 535,
+ CUBLASLT_MATMUL_TILE_320x200 = 536,
+ CUBLASLT_MATMUL_TILE_384x8 = 537,
+ CUBLASLT_MATMUL_TILE_384x16 = 538,
+ CUBLASLT_MATMUL_TILE_384x24 = 539,
+ CUBLASLT_MATMUL_TILE_384x32 = 540,
+ CUBLASLT_MATMUL_TILE_384x40 = 541,
+ CUBLASLT_MATMUL_TILE_384x48 = 542,
+ CUBLASLT_MATMUL_TILE_384x56 = 543,
+ CUBLASLT_MATMUL_TILE_384x72 = 544,
+ CUBLASLT_MATMUL_TILE_384x80 = 545,
+ CUBLASLT_MATMUL_TILE_384x88 = 546,
+ CUBLASLT_MATMUL_TILE_384x96 = 547,
+ CUBLASLT_MATMUL_TILE_384x104 = 548,
+ CUBLASLT_MATMUL_TILE_384x112 = 549,
+ CUBLASLT_MATMUL_TILE_384x120 = 550,
+ CUBLASLT_MATMUL_TILE_384x136 = 551,
+ CUBLASLT_MATMUL_TILE_384x144 = 552,
+ CUBLASLT_MATMUL_TILE_384x152 = 553,
+ CUBLASLT_MATMUL_TILE_384x160 = 554,
+ CUBLASLT_MATMUL_TILE_384x168 = 555,
+ CUBLASLT_MATMUL_TILE_448x8 = 556,
+ CUBLASLT_MATMUL_TILE_448x16 = 557,
+ CUBLASLT_MATMUL_TILE_448x24 = 558,
+ CUBLASLT_MATMUL_TILE_448x32 = 559,
+ CUBLASLT_MATMUL_TILE_448x40 = 560,
+ CUBLASLT_MATMUL_TILE_448x48 = 561,
+ CUBLASLT_MATMUL_TILE_448x56 = 562,
+ CUBLASLT_MATMUL_TILE_448x72 = 563,
+ CUBLASLT_MATMUL_TILE_448x80 = 564,
+ CUBLASLT_MATMUL_TILE_448x88 = 565,
+ CUBLASLT_MATMUL_TILE_448x96 = 566,
+ CUBLASLT_MATMUL_TILE_448x104 = 567,
+ CUBLASLT_MATMUL_TILE_448x112 = 568,
+ CUBLASLT_MATMUL_TILE_448x120 = 569,
+ CUBLASLT_MATMUL_TILE_448x128 = 570,
+ CUBLASLT_MATMUL_TILE_448x136 = 571,
+ CUBLASLT_MATMUL_TILE_448x144 = 572,
+ CUBLASLT_MATMUL_TILE_512x8 = 573,
+ CUBLASLT_MATMUL_TILE_512x16 = 574,
+ CUBLASLT_MATMUL_TILE_512x24 = 575,
+ CUBLASLT_MATMUL_TILE_512x32 = 576,
+ CUBLASLT_MATMUL_TILE_512x40 = 577,
+ CUBLASLT_MATMUL_TILE_512x48 = 578,
+ CUBLASLT_MATMUL_TILE_512x56 = 579,
+ CUBLASLT_MATMUL_TILE_512x72 = 580,
+ CUBLASLT_MATMUL_TILE_512x80 = 581,
+ CUBLASLT_MATMUL_TILE_512x88 = 582,
+ CUBLASLT_MATMUL_TILE_512x96 = 583,
+ CUBLASLT_MATMUL_TILE_512x104 = 584,
+ CUBLASLT_MATMUL_TILE_512x112 = 585,
+ CUBLASLT_MATMUL_TILE_512x120 = 586,
+ CUBLASLT_MATMUL_TILE_512x128 = 587,
+ CUBLASLT_MATMUL_TILE_576x8 = 588,
+ CUBLASLT_MATMUL_TILE_576x16 = 589,
+ CUBLASLT_MATMUL_TILE_576x24 = 590,
+ CUBLASLT_MATMUL_TILE_576x32 = 591,
+ CUBLASLT_MATMUL_TILE_576x40 = 592,
+ CUBLASLT_MATMUL_TILE_576x48 = 593,
+ CUBLASLT_MATMUL_TILE_576x56 = 594,
+ CUBLASLT_MATMUL_TILE_576x72 = 595,
+ CUBLASLT_MATMUL_TILE_576x80 = 596,
+ CUBLASLT_MATMUL_TILE_576x88 = 597,
+ CUBLASLT_MATMUL_TILE_576x96 = 598,
+ CUBLASLT_MATMUL_TILE_576x104 = 599,
+ CUBLASLT_MATMUL_TILE_576x112 = 600,
+ CUBLASLT_MATMUL_TILE_640x8 = 601,
+ CUBLASLT_MATMUL_TILE_640x16 = 602,
+ CUBLASLT_MATMUL_TILE_640x24 = 603,
+ CUBLASLT_MATMUL_TILE_640x32 = 604,
+ CUBLASLT_MATMUL_TILE_640x40 = 605,
+ CUBLASLT_MATMUL_TILE_640x48 = 606,
+ CUBLASLT_MATMUL_TILE_640x56 = 607,
+ CUBLASLT_MATMUL_TILE_640x72 = 608,
+ CUBLASLT_MATMUL_TILE_640x80 = 609,
+ CUBLASLT_MATMUL_TILE_640x88 = 610,
+ CUBLASLT_MATMUL_TILE_640x96 = 611,
+ CUBLASLT_MATMUL_TILE_704x8 = 612,
+ CUBLASLT_MATMUL_TILE_704x16 = 613,
+ CUBLASLT_MATMUL_TILE_704x24 = 614,
+ CUBLASLT_MATMUL_TILE_704x32 = 615,
+ CUBLASLT_MATMUL_TILE_704x40 = 616,
+ CUBLASLT_MATMUL_TILE_704x48 = 617,
+ CUBLASLT_MATMUL_TILE_704x56 = 618,
+ CUBLASLT_MATMUL_TILE_704x72 = 619,
+ CUBLASLT_MATMUL_TILE_704x80 = 620,
+ CUBLASLT_MATMUL_TILE_704x88 = 621,
+ CUBLASLT_MATMUL_TILE_768x8 = 622,
+ CUBLASLT_MATMUL_TILE_768x16 = 623,
+ CUBLASLT_MATMUL_TILE_768x24 = 624,
+ CUBLASLT_MATMUL_TILE_768x32 = 625,
+ CUBLASLT_MATMUL_TILE_768x40 = 626,
+ CUBLASLT_MATMUL_TILE_768x48 = 627,
+ CUBLASLT_MATMUL_TILE_768x56 = 628,
+ CUBLASLT_MATMUL_TILE_768x72 = 629,
+ CUBLASLT_MATMUL_TILE_768x80 = 630,
+ CUBLASLT_MATMUL_TILE_END = 631;
/** Size and number of stages in which elements are read into shared memory
*
@@ -13763,7 +14358,19 @@ public static native void cublasZtrmm(@Cast("char") byte side,
CUBLASLT_SEARCH_RESERVED_04 = 4,
/** reserved for future use
*/
- CUBLASLT_SEARCH_RESERVED_05 = 5;
+ CUBLASLT_SEARCH_RESERVED_05 = 5,
+ /** reserved for future use
+ */
+ CUBLASLT_SEARCH_RESERVED_06 = 6,
+ /** reserved for future use
+ */
+ CUBLASLT_SEARCH_RESERVED_07 = 7,
+ /** reserved for future use
+ */
+ CUBLASLT_SEARCH_RESERVED_08 = 8,
+ /** reserved for future use
+ */
+ CUBLASLT_SEARCH_RESERVED_09 = 9;
/** Algo search preference to fine tune the heuristic function. */
/** enum cublasLtMatmulPreferenceAttributes_t */
@@ -14431,7 +15038,7 @@ public static native void cublasZtrmm(@Cast("char") byte side,
// #include "driver_types.h"
// #include "cuComplex.h" /* import complex data type */
-// #include "cublas_api.h"
+// #include "cublas_v2.h"
// #if defined(__cplusplus)
// Targeting ../cublas/cublasXtContext.java
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
index cca6078361..9ef4c848b7 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java
@@ -146,8 +146,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
// #define CUDNN_VERSION_H_
public static final int CUDNN_MAJOR = 9;
-public static final int CUDNN_MINOR = 3;
-public static final int CUDNN_PATCHLEVEL = 0;
+public static final int CUDNN_MINOR = 5;
+public static final int CUDNN_PATCHLEVEL = 1;
public static final int CUDNN_VERSION = (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL);
@@ -228,8 +228,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
/* These version numbers are autogenerated, do not edit manually. */
public static final int CUDNN_GRAPH_MAJOR = 9;
-public static final int CUDNN_GRAPH_MINOR = 3;
-public static final int CUDNN_GRAPH_PATCH = 0;
+public static final int CUDNN_GRAPH_MINOR = 5;
+public static final int CUDNN_GRAPH_PATCH = 1;
// #if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
// #error Version mismatch in cuDNN GRAPH!!!
@@ -301,6 +301,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008,
CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009,
CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010,
+ CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
CUDNN_STATUS_NOT_SUPPORTED = 3000,
CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001,
@@ -315,6 +316,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010,
CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011,
CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012,
+ CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013,
CUDNN_STATUS_INTERNAL_ERROR = 4000,
CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001,
@@ -639,6 +641,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
+ CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406,
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
@@ -704,6 +707,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800,
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
+ CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
@@ -806,6 +810,11 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950,
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951,
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952,
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
+
CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
@@ -847,7 +856,10 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
- CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313;
+ CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
+
+ CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400,
+ CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401;
/** enum cudnnBackendAttributeType_t */
public static final int
@@ -917,7 +929,9 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
- CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33;
+ CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
+ CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
+ CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35;
/** enum cudnnBackendNumericalNote_t */
public static final int
@@ -938,7 +952,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
- CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 3;
+ CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3,
+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4;
/** enum cudnnBackendKnobType_t */
public static final int
@@ -1057,6 +1072,16 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
public static native @Cast("cudnnStatus_t") int cudnnBackendExecute(cudnnContext handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
+public static native @Cast("cudnnStatus_t") int cudnnBackendPopulateCudaGraph(cudnnContext handle,
+ cudnnBackendDescriptor_t executionPlan,
+ cudnnBackendDescriptor_t variantPack,
+ CUgraph_st graph);
+
+public static native @Cast("cudnnStatus_t") int cudnnBackendUpdateCudaGraph(cudnnContext handle,
+ cudnnBackendDescriptor_t executionPlan,
+ cudnnBackendDescriptor_t variantPack,
+ CUgraph_st graph);
+
// #if defined(__cplusplus)
// #endif
@@ -1128,8 +1153,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
/* These version numbers are autogenerated, do not edit manually. */
public static final int CUDNN_OPS_MAJOR = 9;
-public static final int CUDNN_OPS_MINOR = 3;
-public static final int CUDNN_OPS_PATCH = 0;
+public static final int CUDNN_OPS_MINOR = 5;
+public static final int CUDNN_OPS_PATCH = 1;
// #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
// #error Version mismatch in cuDNN OPS INFER!!!
@@ -2303,7 +2328,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
@Cast("size_t") long reserveSpaceSizeInBytes);
/* Performs backward pass of Batch Normalization layer. Returns x gradient,
-* bnScale gradient and bnBias gradient */
+ * bnScale gradient and bnBias gradient */
public static native @Cast("cudnnStatus_t") @Deprecated int cudnnBatchNormalizationBackward(cudnnContext handle,
@Cast("cudnnBatchNormMode_t") int mode,
@Const Pointer alphaDataDiff,
@@ -2557,8 +2582,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
/* These version numbers are autogenerated, do not edit manually. */
public static final int CUDNN_ADV_MAJOR = 9;
-public static final int CUDNN_ADV_MINOR = 3;
-public static final int CUDNN_ADV_PATCH = 0;
+public static final int CUDNN_ADV_MINOR = 5;
+public static final int CUDNN_ADV_PATCH = 1;
// #if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL)
// #error Version mismatch in cuDNN ADV INFER!!!
@@ -3374,8 +3399,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
Pointer reserveSpace);
/*
-* CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
-*/
+ * CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
+ */
/* Input normalization mode for loss function */
/** enum cudnnLossNormalizationMode_t */
public static final int
@@ -3655,8 +3680,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn {
/* These version numbers are autogenerated, do not edit manually. */
public static final int CUDNN_CNN_MAJOR = 9;
-public static final int CUDNN_CNN_MINOR = 3;
-public static final int CUDNN_CNN_PATCH = 0;
+public static final int CUDNN_CNN_MINOR = 5;
+public static final int CUDNN_CNN_PATCH = 1;
// #if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL)
// #error Version mismatch in cuDNN CNN INFER!!!
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
index 61b09ebbe2..14c0058238 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
@@ -90,11 +90,11 @@ public class cufft extends org.bytedeco.cuda.presets.cufft {
// #endif
public static final int CUFFT_VER_MAJOR = 11;
-public static final int CUFFT_VER_MINOR = 2;
-public static final int CUFFT_VER_PATCH = 6;
-public static final int CUFFT_VER_BUILD = 28;
+public static final int CUFFT_VER_MINOR = 3;
+public static final int CUFFT_VER_PATCH = 0;
+public static final int CUFFT_VER_BUILD = 4;
-public static final int CUFFT_VERSION = 11206;
+public static final int CUFFT_VERSION = 11300;
// CUFFT API function return values
/** enum cufftResult */
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
index 5e744d4351..8bea75dc1d 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java
@@ -777,8 +777,9 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
CUPTI_ACTIVITY_KIND_MARKER = 12,
/**
- * Extended, optional, data about a marker. The corresponding
- * activity record structure is \ref CUpti_ActivityMarkerData.
+ * Extended, optional, data about a marker. User must enable
+ * CUPTI_ACTIVITY_KIND_MARKER as well to get records for marker data.
+ * The corresponding activity record structure is \ref CUpti_ActivityMarkerData.
*/
CUPTI_ACTIVITY_KIND_MARKER_DATA = 13,
@@ -4105,7 +4106,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
/**
* Domain containing callback points for various states.
*/
- CUPTI_CB_DOMAIN_STATE = 6,
+ CUPTI_CB_DOMAIN_STATE = 6,
CUPTI_CB_DOMAIN_SIZE = 7,
@@ -4295,6 +4296,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti {
// Targeting ../cupti/CUpti_StateData.java
+
/**
* \brief An ID for a driver API, runtime API, resource or
* synchronization callback.
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
index 93bccbcccf..7d16bdb85a 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
@@ -92,7 +92,7 @@ public class curand extends org.bytedeco.cuda.presets.curand {
public static final int CURAND_VER_MAJOR = 10;
public static final int CURAND_VER_MINOR = 3;
public static final int CURAND_VER_PATCH = 7;
-public static final int CURAND_VER_BUILD = 37;
+public static final int CURAND_VER_BUILD = 77;
public static final int CURAND_VERSION = (CURAND_VER_MAJOR * 1000 +
CURAND_VER_MINOR * 100 +
CURAND_VER_PATCH);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
index d23eac2aea..a6b20b65e1 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
@@ -89,9 +89,9 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
// #endif
public static final int CUSOLVER_VER_MAJOR = 11;
- public static final int CUSOLVER_VER_MINOR = 6;
- public static final int CUSOLVER_VER_PATCH = 4;
- public static final int CUSOLVER_VER_BUILD = 38;
+ public static final int CUSOLVER_VER_MINOR = 7;
+ public static final int CUSOLVER_VER_PATCH = 1;
+ public static final int CUSOLVER_VER_BUILD = 2;
public static final int CUSOLVER_VERSION =
(CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH);
@@ -370,7 +370,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
// #include
// #include "cuComplex.h" /* import complex data type */
-// #include "cublas_api.h"
+// #include "cublas_v2.h"
// #include "cusolver_common.h"
/*******************************************************************************/
@@ -12217,6 +12217,78 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
@Cast("size_t") long workspaceInBytesOnHost,
int[] info);
+ /* 64-bit API for batched SYEVD */
+ public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched_bufferSize(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobz,
+ @Cast("cublasFillMode_t") int uplo,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ @Const Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ @Const Pointer W,
+ @Cast("cudaDataType") int computeType,
+ @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice,
+ @Cast("size_t*") SizeTPointer workspaceInBytesOnHost,
+ @Cast("int64_t") long batchSize);
+
+ public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobz,
+ @Cast("cublasFillMode_t") int uplo,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ IntPointer info,
+ @Cast("int64_t") long batchSize);
+ public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobz,
+ @Cast("cublasFillMode_t") int uplo,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ IntBuffer info,
+ @Cast("int64_t") long batchSize);
+ public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobz,
+ @Cast("cublasFillMode_t") int uplo,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ int[] info,
+ @Cast("int64_t") long batchSize);
+
/* 64-bit API for SYEVDX */
public static native @Cast("cusolverStatus_t") int cusolverDnXsyevdx_bufferSize(
cusolverDnContext handle,
@@ -12349,6 +12421,98 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
@Cast("size_t") long workspaceInBytesOnHost,
int[] info);
+ /* 64-bit API for GEEV */
+ public static native @Cast("cusolverStatus_t") int cusolverDnXgeev_bufferSize(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobvl,
+ @Cast("cusolverEigMode_t") int jobvr,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ @Const Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ @Const Pointer W,
+ @Cast("cudaDataType") int dataTypeVL,
+ @Const Pointer VL,
+ @Cast("int64_t") long ldvl,
+ @Cast("cudaDataType") int dataTypeVR,
+ @Const Pointer VR,
+ @Cast("int64_t") long ldvr,
+ @Cast("cudaDataType") int computeType,
+ @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice,
+ @Cast("size_t*") SizeTPointer workspaceInBytesOnHost);
+
+ public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobvl,
+ @Cast("cusolverEigMode_t") int jobvr,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int dataTypeVL,
+ Pointer VL,
+ @Cast("int64_t") long ldvl,
+ @Cast("cudaDataType") int dataTypeVR,
+ Pointer VR,
+ @Cast("int64_t") long ldvr,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ IntPointer info);
+ public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobvl,
+ @Cast("cusolverEigMode_t") int jobvr,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int dataTypeVL,
+ Pointer VL,
+ @Cast("int64_t") long ldvl,
+ @Cast("cudaDataType") int dataTypeVR,
+ Pointer VR,
+ @Cast("int64_t") long ldvr,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ IntBuffer info);
+ public static native @Cast("cusolverStatus_t") int cusolverDnXgeev(
+ cusolverDnContext handle,
+ cusolverDnParams params,
+ @Cast("cusolverEigMode_t") int jobvl,
+ @Cast("cusolverEigMode_t") int jobvr,
+ @Cast("int64_t") long n,
+ @Cast("cudaDataType") int dataTypeA,
+ Pointer A,
+ @Cast("int64_t") long lda,
+ @Cast("cudaDataType") int dataTypeW,
+ Pointer W,
+ @Cast("cudaDataType") int dataTypeVL,
+ Pointer VL,
+ @Cast("int64_t") long ldvl,
+ @Cast("cudaDataType") int dataTypeVR,
+ Pointer VR,
+ @Cast("int64_t") long ldvr,
+ @Cast("cudaDataType") int computeType,
+ Pointer bufferOnDevice,
+ @Cast("size_t") long workspaceInBytesOnDevice,
+ Pointer bufferOnHost,
+ @Cast("size_t") long workspaceInBytesOnHost,
+ int[] info);
+
/* 64-bit API for GESVD */
public static native @Cast("cusolverStatus_t") int cusolverDnXgesvd_bufferSize(
cusolverDnContext handle,
@@ -14227,7 +14391,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
// #define CUSOLVERSP_H_
// #include "cusparse.h"
-// #include "cublas_api.h"
+// #include "cublas_v2.h"
// #include "cusolver_common.h"
// #if defined(__cplusplus)
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
index 616e2af6e5..48ab3b11f1 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java
@@ -84,8 +84,8 @@ public class cusparse extends org.bytedeco.cuda.presets.cusparse {
public static final int CUSPARSE_VER_MAJOR = 12;
public static final int CUSPARSE_VER_MINOR = 5;
-public static final int CUSPARSE_VER_PATCH = 2;
-public static final int CUSPARSE_VER_BUILD = 23;
+public static final int CUSPARSE_VER_PATCH = 4;
+public static final int CUSPARSE_VER_BUILD = 2;
public static final int CUSPARSE_VERSION = (CUSPARSE_VER_MAJOR * 1000 +
CUSPARSE_VER_MINOR * 100 +
CUSPARSE_VER_PATCH);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
index fa5d6f366d..734ac977e3 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java
@@ -33,11 +33,11 @@ public class nccl extends org.bytedeco.cuda.presets.nccl {
// #endif
public static final int NCCL_MAJOR = 2;
-public static final int NCCL_MINOR = 22;
-public static final int NCCL_PATCH = 3;
+public static final int NCCL_MINOR = 23;
+public static final int NCCL_PATCH = 4;
public static final String NCCL_SUFFIX = "";
-public static final int NCCL_VERSION_CODE = 22203;
+public static final int NCCL_VERSION_CODE = 22304;
// #define NCCL_VERSION(X,Y,Z) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z))
// #ifdef __cplusplus
@@ -194,6 +194,15 @@ public class nccl extends org.bytedeco.cuda.presets.nccl {
public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @ByPtrPtr ncclComm newcomm, ncclConfig_t config);
public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @Cast("ncclComm**") PointerPointer newcomm, ncclConfig_t config);
+/* Creates a new communicator (multi thread/process version), similar to ncclCommInitRankConfig.
+ * Allows to use more than one ncclUniqueId (up to one per rank), indicated by nId, to accelerate the init operation.
+ * The number of ncclUniqueIds and their order must be the same for every rank.
+ */
+public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config);
+
/* Returns a string for each error code. */
public static native @Cast("const char*") BytePointer ncclGetErrorString(@Cast("ncclResult_t") int result);
public static native @Cast("const char*") BytePointer pncclGetErrorString(@Cast("ncclResult_t") int result);
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
index b69843630e..116d7f9aef 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java
@@ -88,7 +88,7 @@ public class nppc extends org.bytedeco.cuda.presets.nppc {
/**
* Build version
*/
-public static final int NPP_VER_BUILD = 23;
+public static final int NPP_VER_BUILD = 54;
/**
* Full version
diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
index 0a17fa078e..5a3a89a2e6 100644
--- a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
+++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java
@@ -88,7 +88,7 @@ public class nvjpeg extends org.bytedeco.cuda.presets.nvjpeg {
public static final int NVJPEG_VER_MAJOR = 12;
public static final int NVJPEG_VER_MINOR = 3;
public static final int NVJPEG_VER_PATCH = 3;
-public static final int NVJPEG_VER_BUILD = 23;
+public static final int NVJPEG_VER_BUILD = 54;
/* nvJPEG status enums, returned by nvJPEG API */
/** enum nvjpegStatus_t */
diff --git a/nvcodec/README.md b/nvcodec/README.md
index fdf7a1ba99..f961919dfe 100644
--- a/nvcodec/README.md
+++ b/nvcodec/README.md
@@ -62,7 +62,7 @@ You can find more encoder and decoder samples in the [`samples`](samples) subdir
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/nvcodec/platform/pom.xml b/nvcodec/platform/pom.xml
index b3f7b54c5e..b3dccbc6be 100644
--- a/nvcodec/platform/pom.xml
+++ b/nvcodec/platform/pom.xml
@@ -23,7 +23,7 @@
org.bytedeco
cuda-platform
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
${project.groupId}
diff --git a/nvcodec/pom.xml b/nvcodec/pom.xml
index 78f8677ad0..c0869ff538 100644
--- a/nvcodec/pom.xml
+++ b/nvcodec/pom.xml
@@ -18,7 +18,7 @@
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
org.bytedeco
@@ -44,7 +44,7 @@
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
diff --git a/nvcodec/samples/pom.xml b/nvcodec/samples/pom.xml
index 8766cdee1b..e8687e9d59 100644
--- a/nvcodec/samples/pom.xml
+++ b/nvcodec/samples/pom.xml
@@ -23,7 +23,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/nvcodec/samples/simple/pom.xml b/nvcodec/samples/simple/pom.xml
index 37581e84db..b3c98a37f9 100644
--- a/nvcodec/samples/simple/pom.xml
+++ b/nvcodec/samples/simple/pom.xml
@@ -19,7 +19,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/onnxruntime/README.md b/onnxruntime/README.md
index 2167fdab1e..816a796f31 100644
--- a/onnxruntime/README.md
+++ b/onnxruntime/README.md
@@ -60,7 +60,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/onnxruntime/samples/pom.xml b/onnxruntime/samples/pom.xml
index 77b521a4f6..06a60167c4 100644
--- a/onnxruntime/samples/pom.xml
+++ b/onnxruntime/samples/pom.xml
@@ -26,7 +26,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/opencv/README.md b/opencv/README.md
index 72fcafdf05..5c17dba09e 100644
--- a/opencv/README.md
+++ b/opencv/README.md
@@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/opencv/samples/pom.xml b/opencv/samples/pom.xml
index edfb74e1c5..587f109d2c 100644
--- a/opencv/samples/pom.xml
+++ b/opencv/samples/pom.xml
@@ -26,7 +26,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/platform/pom.xml b/platform/pom.xml
index 02a54b050b..830ba19b12 100644
--- a/platform/pom.xml
+++ b/platform/pom.xml
@@ -272,7 +272,7 @@
org.bytedeco
cuda-platform
- 12.6-9.3-${project.version}
+ 12.6-9.5-${project.version}
org.bytedeco
@@ -312,12 +312,12 @@
org.bytedeco
tensorrt-platform
- 10.3-${project.version}
+ 10.5-${project.version}
org.bytedeco
tritonserver-platform
- 2.48.0-${project.version}
+ 2.50.0-${project.version}
diff --git a/pytorch/README.md b/pytorch/README.md
index 220d9da081..5ecaba34d7 100644
--- a/pytorch/README.md
+++ b/pytorch/README.md
@@ -62,7 +62,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/pytorch/pom.xml b/pytorch/pom.xml
index 5f697e12b5..575b759fa7 100644
--- a/pytorch/pom.xml
+++ b/pytorch/pom.xml
@@ -27,7 +27,7 @@
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
true
@@ -52,7 +52,7 @@
org.bytedeco
cuda-platform
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
org.bytedeco
diff --git a/pytorch/samples/pom.xml b/pytorch/samples/pom.xml
index 1ee5668173..97d4fb103a 100644
--- a/pytorch/samples/pom.xml
+++ b/pytorch/samples/pom.xml
@@ -26,7 +26,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/tensorrt/README.md b/tensorrt/README.md
index 99eb8cbe7c..f255089e23 100644
--- a/tensorrt/README.md
+++ b/tensorrt/README.md
@@ -17,7 +17,7 @@ Introduction
------------
This directory contains the JavaCPP Presets module for:
- * TensorRT 10.3.0.26 https://developer.nvidia.com/tensorrt
+ * TensorRT 10.5.0.18 https://developer.nvidia.com/tensorrt
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
@@ -54,19 +54,19 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
tensorrt-platform
- 10.3-1.5.11-SNAPSHOT
+ 10.5-1.5.11-SNAPSHOT
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
org.bytedeco
tensorrt-platform-redist
- 10.3-1.5.11-SNAPSHOT
+ 10.5-1.5.11-SNAPSHOT
diff --git a/tensorrt/platform/pom.xml b/tensorrt/platform/pom.xml
index 8b458847cd..afc6e0dc55 100644
--- a/tensorrt/platform/pom.xml
+++ b/tensorrt/platform/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tensorrt-platform
- 10.3-${project.parent.version}
+ 10.5-${project.parent.version}
JavaCPP Presets Platform for TensorRT
@@ -23,7 +23,7 @@
org.bytedeco
cuda-platform
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
${project.groupId}
diff --git a/tensorrt/platform/redist/pom.xml b/tensorrt/platform/redist/pom.xml
index e0cfebbc47..cabae40357 100644
--- a/tensorrt/platform/redist/pom.xml
+++ b/tensorrt/platform/redist/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tensorrt-platform-redist
- 10.3-${project.parent.version}
+ 10.5-${project.parent.version}
JavaCPP Presets Platform Redist for TensorRT
diff --git a/tensorrt/pom.xml b/tensorrt/pom.xml
index 7a827cd556..da69643f07 100644
--- a/tensorrt/pom.xml
+++ b/tensorrt/pom.xml
@@ -11,14 +11,14 @@
org.bytedeco
tensorrt
- 10.3-${project.parent.version}
+ 10.5-${project.parent.version}
JavaCPP Presets for TensorRT
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
org.bytedeco
@@ -44,7 +44,7 @@
org.bytedeco
cuda
- 12.6-9.3-${project.parent.version}
+ 12.6-9.5-${project.parent.version}
diff --git a/tensorrt/samples/pom.xml b/tensorrt/samples/pom.xml
index 1e2d76f0a7..f7094fb21a 100644
--- a/tensorrt/samples/pom.xml
+++ b/tensorrt/samples/pom.xml
@@ -12,19 +12,19 @@
org.bytedeco
tensorrt-platform
- 10.3-1.5.11-SNAPSHOT
+ 10.5-1.5.11-SNAPSHOT
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
org.bytedeco
tensorrt-platform-redist
- 10.3-1.5.11-SNAPSHOT
+ 10.5-1.5.11-SNAPSHOT
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
index 86b1490234..874a32baa8 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java
@@ -51,11 +51,11 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer {
/** TensorRT major version. */
public static final int NV_TENSORRT_MAJOR = 10;
/** TensorRT minor version. */
-public static final int NV_TENSORRT_MINOR = 3;
+public static final int NV_TENSORRT_MINOR = 5;
/** TensorRT patch version. */
public static final int NV_TENSORRT_PATCH = 0;
/** TensorRT build number. */
-public static final int NV_TENSORRT_BUILD = 26;
+public static final int NV_TENSORRT_BUILD = 18;
/** TensorRT LWS major version. */
public static final int NV_TENSORRT_LWS_MAJOR = 0;
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
index 21d802657a..f27e1a3125 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
@@ -47,22 +47,27 @@ public class IBuilder extends INoCopy {
/**
* \brief Determine whether the platform has fast native fp16.
+ *
+ * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
* */
//!
//!
- public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
+ //!
+ public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastFp16();
/**
* \brief Determine whether the platform has fast native int8.
+ *
+ * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
* */
//!
//!
//!
- public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
+ public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastInt8();
/**
* \brief Get the maximum batch size DLA can support.
@@ -214,12 +219,15 @@ public class IBuilder extends INoCopy {
* */
+ //!
//!
//!
public native @NoException(true) void reset();
/**
* \brief Determine whether the platform has TF32 support.
+ *
+ * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
* */
@@ -230,7 +238,7 @@ public class IBuilder extends INoCopy {
//!
//!
//!
- public native @Cast("bool") @NoException(true) boolean platformHasTf32();
+ public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasTf32();
/**
* \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
index ff6df4d439..6a0da55a28 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
@@ -1076,5 +1076,35 @@ public class IBuilderConfig extends INoCopy {
*
* @see IBuilderConfig::setRuntimePlatform()
* */
+
+
+ //!
+ //!
+ //!
+ //!
public native @NoException(true) RuntimePlatform getRuntimePlatform();
+
+ /**
+ * \brief Set the maximum number of tactics to time when there is a choice of tactics.
+ *
+ * This function controls the number of tactics timed when there are multiple tactics to choose from.
+ *
+ * @see getMaxNbTactics()
+ * */
+
+
+ //!
+ //!
+ //!
+ //!
+ public native @NoException(true) void setMaxNbTactics(int maxNbTactics);
+
+ /**
+ * \brief Query the maximum number of tactics timed when there is a choice.
+ *
+ * By default the value is -1, indicating TensorRT can determine the number of tactics based on its own heuristic.
+ *
+ * @see setMaxNbTactics()
+ * */
+ public native @NoException(true) int getMaxNbTactics();
}
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
index c13fb3e733..cf28b124bc 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
@@ -24,8 +24,8 @@
*
* \brief A convolution layer in a network definition.
*
- * This layer performs a correlation operation between 3-dimensional filter with a 4-dimensional tensor to produce
- * another 4-dimensional tensor.
+ * This layer performs a correlation operation between 3 or 4 dimensional filter with a 4 or 5 dimensional tensor to
+ * produce another 4 or 5 dimensional tensor.
*
* An optional bias argument is supported, which adds a per-channel constant to each value in the output.
*
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
index 7f67dfa1f9..5e010fb8b4 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
@@ -37,7 +37,6 @@
* * GatherMode::kDEFAULT: s = q + r - 1 - nbElementwiseDims
* * GatherMode::kND: s = q + r - indices.d[q-1] - 1 - nbElementwiseDims
* * GatherMode::kELEMENT: s = q = r.
- * The output can be a shape tensor only if the mode is GatherMode::kDEFAULT.
*
* The dimensions of the output likewise depends on the mode:
*
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
index b45379209e..496b7a76da 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java
@@ -64,6 +64,7 @@ public class IIfConditional extends INoCopy {
//!
//!
//!
+ //!
public native @NoException(true) IConditionLayer setCondition(@ByRef ITensor condition);
/**
@@ -75,6 +76,8 @@ public class IIfConditional extends INoCopy {
* Each output layer of an IIfConditional represents a single output of either the true-subgraph or the
* false-subgraph of an IIfConditional, depending on which subgraph was executed.
*
+ * The shapes of the two tensors must be equal unless the condition is a build-time constant.
+ *
* @see IIfConditionalOutputLayer
* */
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
index a3f4925524..4de57a9ba9 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java
@@ -24,7 +24,9 @@
*
* \brief This layer represents an output of an IIfConditional.
*
- * An IIfConditionalOutputLayer has exactly one output.
+ * An IIfConditionalOutputLayer has two inputs and one output.
+ *
+ * @see IIfConditional::addOutput
* */
@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
public class IIfConditionalOutputLayer extends IIfConditionalBoundaryLayer {
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
index be4006dce2..d082d0d05c 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
@@ -100,7 +100,7 @@ public class ILoopOutputLayer extends ILoopBoundaryLayer {
/** The indices in the kCONCATENATE or kREVERSE cases are as follows:
/**
/** - 0: Contribution to the output tensor. The contribution must come from inside the loop.
- /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D Int32 or Int64 shape tensor.
+ /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D shape tensor of type Int32 or Int64.
/**
/** If this function is called with the value 1, then the function getNbInputs() changes
/** from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
index 02cc0f8a0c..e6e24b0ec8 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
@@ -1037,7 +1037,7 @@ public class INetworkDefinition extends INoCopy {
*
* @see IParametricReLULayer
*
- * \warning Int32 tensors are not valid input tensors.
+ * \warning Tensors of type Int32, Int64, Bool, or UInt8 are not allowed as inputs.
*
* @return The new parametric ReLU layer, or nullptr if it could not be created.
* */
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
index f337837446..5d8fd66a95 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java
@@ -28,8 +28,8 @@
* Output, and an axis attribute.
* * Indices is an Int32 tensor that determines which locations in Output to set as on_value.
* * Values is a two-element (rank=1) tensor that consists of [off_value, on_value]
- * * Depth is an Int32 shape tensor of rank 0, which contains the depth (number of classes) of the one-hot encoding.
- * The depth tensor must be a build-time constant, and its value should be positive.
+ * * Depth is an 0D tensor of type Int32 or Int64, which contains the depth (number of classes) of the one-hot encoding.
+ * The depth tensor must be a positive build-time constant.
* * Output is a tensor with rank = rank(indices)+1, where the added dimension contains the one-hot encoding.
* The data types of Output is equal to the Values data type.
* * Axis is a scalar specifying to which dimension of the output one-hot encoding is added.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
index 4afc4105e5..a29c09124f 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
@@ -209,7 +209,7 @@ public class IResizeLayer extends ILayer {
* The indices in the dynamic case are as follows:
*
* - 0: Execution tensor to be resized.
- * - 1: The output dimensions, as a 1D Int32 shape tensor.
+ * - 1: The output dimensions, as a 1D tensor of type Int32 or Int64.
*
* If this function is called with the value 1, then the function getNbInputs() changes
* from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
index 53f4045e1a..53422e0d04 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
@@ -26,7 +26,7 @@
* This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
* and a second transpose operation. The dimension types of the output are those of the reshape dimension.
*
- * The layer has an optional second input. If present, it must be a 1D Int32 shape tensor,
+ * The layer has an optional second input. If present, it must be a 1D tensor of type Int32 or Int64,
* and the reshape dimensions are taken from it.
*
* \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
@@ -138,7 +138,7 @@ public class IShuffleLayer extends ILayer {
/** The indices in the dynamic case are as follows:
/**
/** - 0: Data or Shape tensor to be shuffled.
- /** - 1: The dimensions for the reshape operation, as a 1D Int32 shape tensor.
+ /** - 1: The dimensions for the reshape operation, as a 1D tensor of type Int32 or Int64.
/**
/** If this function is called with the value 1, then the function getNbInputs() changes
/** from returning 1 to 2.
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
index c270f9a7c9..e67903077e 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
@@ -34,7 +34,7 @@
*
* The slice layer selects for each dimension a start location from within the input tensor, and
* copies elements to the output tensor using the specified stride across the input tensor.
- * Start, size, and stride tensors must be 1D Int32 shape tensors if not specified via Dims.
+ * Start, size, and stride tensors must be 1D tensors of type Int32 or Int64 if not specified via Dims.
*
* An example of using slice on a tensor:
* input = {{0, 2, 4}, {1, 3, 5}}
@@ -72,10 +72,12 @@
* The following constraints must be satisfied to execute this layer on DLA:
* * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
* * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
- * * sampleMode is kSTRICT_BOUNDS.
+ * * sampleMode is kDEFAULT, kWRAP, or kFILL.
* * Strides are 1 for all dimensions.
- * * Slicing is not performed on the first dimension
- * * The input tensor has four dimensions
+ * * Slicing is not performed on the first dimension.
+ * * The input tensor has four dimensions.
+ * * For kFILL sliceMode, the fill value input is a scalar output of an IConstantLayer with value 0 that is not
+ * consumed by any other layer.
*
* \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
* */
@@ -233,15 +235,15 @@ public class ISliceLayer extends ILayer {
* The indices are as follows:
*
* - 0: Tensor to be sliced.
- * - 1: The start tensor to begin slicing, as a 1D Int32 shape tensor.
- * - 2: The size tensor of the resulting slice, as a 1D Int32 shape tensor.
- * - 3: The stride of the slicing operation, as a 1D Int32 shape tensor.
+ * - 1: The start tensor to begin slicing, as a 1D tensor of type Int32 or Int64.
+ * - 2: The size tensor of the resulting slice, as a 1D tensor of type Int32 or Int64.
+ * - 3: The stride of the slicing operation, as a 1D tensor of type Int32 or Int64.
* - 4: Value for the kFILL slice mode. The fill value data type should either be the same
* or be implicitly convertible to the input data type.
* Implicit data type conversion is supported among kFLOAT, kHALF, kINT8, and kFP8 data types.
* This input is disallowed for other modes.
* - 5: The axes tensor indicating the corresponding axes that start, size, and stride
- * should apply to, as a 1D Int32 shape tensor. Negative values for axes
+ * should apply to, as a 1D tensor or type Int32 or Int64. Negative values for axes
* indicate indexing from the back of the input tensor. Values must be unique and be
* within the interval of [-rank(input), rank(input)-1].
*
diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
index e3ae1a877f..b8aa86cdfe 100644
--- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
+++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
@@ -102,4 +102,6 @@ public class VBuilderConfig extends VRoot {
public native @NoException(true) void setRuntimePlatform(RuntimePlatform runtimePlatform);
public native @NoException(true) void setRuntimePlatform(@Cast("nvinfer1::RuntimePlatform") int runtimePlatform);
public native @NoException(true) RuntimePlatform getRuntimePlatform();
+ public native @NoException(true) void setMaxNbTactics(int maxTactics);
+ public native @NoException(true) int getMaxNbTactics();
}
diff --git a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
index 7d1fa17238..631ed304ab 100644
--- a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
+++ b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java
@@ -51,7 +51,7 @@
"NvInferLegacyDims.h", "NvInferRuntime.h", "NvInfer.h", "NvInferImpl.h"/*, "NvUtils.h"*/},
exclude = "NvInferRuntimeBase.h",
link = "nvinfer@.10",
- preload = "nvinfer_builder_resource@.10.3.0"
+ preload = "nvinfer_builder_resource@.10.5.0"
),
@Platform(
value = "linux-arm64",
diff --git a/tritonserver/README.md b/tritonserver/README.md
index f3cc656be1..3c88de0308 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -23,7 +23,7 @@ Introduction
------------
This directory contains the JavaCPP Presets module for:
- * Triton Inference Server 2.48.0 https://github.com/triton-inference-server/server
+ * Triton Inference Server 2.50.0 https://github.com/triton-inference-server/server
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
@@ -51,9 +51,9 @@ This sample intends to show how to call the Java-mapped C API of Triton to execu
1. Get the source code of Triton Inference Server to prepare the model repository:
```bash
- $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.48.0.tar.gz
- $ tar zxvf v2.48.0.tar.gz
- $ cd server-2.48.0/docs/examples/model_repository
+ $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.50.0.tar.gz
+ $ tar zxvf v2.50.0.tar.gz
+ $ cd server-2.50.0/docs/examples/model_repository
$ mkdir models
$ cd models; cp -a ../simple .
```
@@ -61,7 +61,7 @@ Now, this `models` directory will be our model repository.
2. Start the Docker container to run the sample (assuming we are under the `models` directory created above):
```bash
- $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.07-py3 bash
+ $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.09-py3 bash
$ apt update
$ apt install -y openjdk-11-jdk
$ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz
diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
index f438e39cad..611be9089e 100755
--- a/tritonserver/cppbuild.sh
+++ b/tritonserver/cppbuild.sh
@@ -11,9 +11,9 @@ INCLUDE_DEVELOPER_TOOLS_SERVER=${INCLUDE_DEVELOPER_TOOLS_SERVER:=1}
if [[ ! -f "/opt/tritonserver/include/triton/developer_tools/generic_server_wrapper.h" ]] && [[ ! -f "/opt/tritonserver/lib/libtritondevelopertoolsserver.so" ]] && [[ ${INCLUDE_DEVELOPER_TOOLS_SERVER} -ne 0 ]]; then
TOOLS_BRANCH=${TOOLS_BRANCH:="https://github.com/triton-inference-server/developer_tools.git"}
- TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.07"}
+ TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.09"}
TRITON_CORE_REPO=${TRITON_CORE_REPO:="https://github.com/triton-inference-server/core.git"}
- TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.07"}
+ TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.09"}
TRITON_HOME="/opt/tritonserver"
BUILD_HOME="$PWD"/tritonbuild
mkdir -p ${BUILD_HOME} && cd ${BUILD_HOME}
diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
index d83b464263..82851eac3d 100644
--- a/tritonserver/platform/pom.xml
+++ b/tritonserver/platform/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tritonserver-platform
- 2.48.0-${project.parent.version}
+ 2.50.0-${project.parent.version}
JavaCPP Presets Platform for Triton Inference Server
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
index 8e138ae38b..638848272a 100644
--- a/tritonserver/platform/redist/pom.xml
+++ b/tritonserver/platform/redist/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tritonserver-platform-redist
- 2.48.0-${project.parent.version}
+ 2.50.0-${project.parent.version}
JavaCPP Presets Platform Redist for Triton Inference Server
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index 95a6320848..5c6b326cf9 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -11,7 +11,7 @@
org.bytedeco
tritonserver
- 2.48.0-${project.parent.version}
+ 2.50.0-${project.parent.version}
JavaCPP Presets for Triton Inference Server
diff --git a/tritonserver/samples/simple/pom.xml b/tritonserver/samples/simple/pom.xml
index 13e9fa7fc4..817cab07f5 100644
--- a/tritonserver/samples/simple/pom.xml
+++ b/tritonserver/samples/simple/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tritonserver-platform
- 2.48.0-1.5.11-SNAPSHOT
+ 2.50.0-1.5.11-SNAPSHOT
shaded
diff --git a/tritonserver/samples/simplecpp/pom.xml b/tritonserver/samples/simplecpp/pom.xml
index 5a5ebb7d4e..fa631c00b0 100644
--- a/tritonserver/samples/simplecpp/pom.xml
+++ b/tritonserver/samples/simplecpp/pom.xml
@@ -12,7 +12,7 @@
org.bytedeco
tritonserver-platform
- 2.48.0-1.5.11-SNAPSHOT
+ 2.50.0-1.5.11-SNAPSHOT
shaded
diff --git a/tritonserver/samples/unsupported/pom.xml b/tritonserver/samples/unsupported/pom.xml
index 7145be0246..b3a891ddcd 100644
--- a/tritonserver/samples/unsupported/pom.xml
+++ b/tritonserver/samples/unsupported/pom.xml
@@ -13,17 +13,17 @@
org.bytedeco
cuda-platform
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
org.bytedeco
tensorrt-platform
- 10.3-1.5.11-SNAPSHOT
+ 10.5-1.5.11-SNAPSHOT
org.bytedeco
tritonserver-platform
- 2.48.0-1.5.11-SNAPSHOT
+ 2.50.0-1.5.11-SNAPSHOT
shaded
diff --git a/tvm/README.md b/tvm/README.md
index d40568e7c1..2ae70e5630 100644
--- a/tvm/README.md
+++ b/tvm/README.md
@@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT
diff --git a/tvm/samples/pom.xml b/tvm/samples/pom.xml
index 727d14f5a3..1299336d26 100644
--- a/tvm/samples/pom.xml
+++ b/tvm/samples/pom.xml
@@ -26,7 +26,7 @@
org.bytedeco
cuda-platform-redist
- 12.6-9.3-1.5.11-SNAPSHOT
+ 12.6-9.5-1.5.11-SNAPSHOT