diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml index ce2cb1e142..14b944e8f2 100644 --- a/.github/actions/deploy-ubuntu/action.yml +++ b/.github/actions/deploy-ubuntu/action.yml @@ -41,18 +41,18 @@ runs: export ARCH=arm64 export ARCH_CUDA=sbsa export PREFIX=aarch64-linux-gnu - export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.aarch64.rpm - export CUDNN=cuda-12-9.3.0.75-1.aarch64 - export NCCL=2.22.3-1+cuda12.5.aarch64 - export NVCOMP=nvcomp-linux-sbsa-4.0.0-cuda12.5 + export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.aarch64.rpm + export CUDNN=cuda-12-9.5.1.17-1.aarch64 + export NCCL=2.23.4-1+cuda12.6.aarch64 + export NVCOMP=nvcomp-linux-sbsa-4.0.1-cuda12.x export USERLAND_BUILDME="buildme --aarch64" elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then export ARCH=ppc64el export ARCH_CUDA=ppc64le export PREFIX=powerpc64le-linux-gnu - export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.ppc64le.rpm - export CUDNN=cuda-12-9.3.0.75-1.ppc64le - export NCCL=2.22.3-1+cuda12.5.ppc64le + export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.ppc64le.rpm + export CUDNN=cuda-12-9.5.1.17-1.ppc64le + export NCCL=2.23.4-1+cuda12.6.ppc64le elif [[ "$CI_DEPLOY_PLATFORM" == "linux-riscv64" ]]; then export ARCH=riscv64 export PREFIX=riscv64-linux-gnu @@ -63,10 +63,10 @@ runs: export ARCH=amd64 export ARCH_CUDA=x86_64 export PREFIX=x86_64-linux-gnu - export CUDA=cuda-repo-rhel8-12-6-local-12.6.0_560.28.03-1.x86_64.rpm - export CUDNN=cuda-12-9.3.0.75-1.x86_64 - export NCCL=2.22.3-1+cuda12.5.x86_64 - export NVCOMP=nvcomp-linux-x86_64-4.0.0-cuda12.5 + export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.x86_64.rpm + export CUDNN=cuda-12-9.5.1.17-1.x86_64 + export NCCL=2.23.4-1+cuda12.6.x86_64 + export NVCOMP=nvcomp-linux-x86_64-4.0.1-cuda12.x fi echo "ARCH=$ARCH" >> $GITHUB_ENV echo "PREFIX=$PREFIX" >> $GITHUB_ENV @@ -165,7 +165,7 @@ runs: if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then echo Installing CUDA, cuDNN, nvCOMP, etc - curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/$CUDA + curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/$CUDA curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-$CUDNN.rpm curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn9-devel-$CUDNN.rpm curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libnccl-$NCCL.rpm @@ -183,7 +183,7 @@ runs: for f in /usr/local/cuda/lib64/libcudnn*so.9.*; do $SUDO ln -sf $f ${f:0:${#f}-4}; $SUDO ln -sf $f ${f:0:${#f}-6}; done if [[ -n ${NVCOMP:-} ]]; then - curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/$NVCOMP.tar.gz + curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/$NVCOMP.tar.gz $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=1 lib/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=2 nvcomp/lib/ $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=1 include/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=2 nvcomp/include/ rm -f $NVCOMP.tar.gz @@ -213,16 +213,16 @@ runs: if [[ "$CI_DEPLOY_PLATFORM" == "linux-arm64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then echo Installing TensorRT # python3 -m gdown 1LZRCv4ZAGiDQAu4pvADJIGntq4cGl5tU - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz - $SUDO tar -hxvf TensorRT-10.3.0.26.Ubuntu-22.04.aarch64-gnu.cuda-12.5.tar.gz -C /usr/local/ + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz + $SUDO tar -hxvf TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/ $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt fi if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then echo Installing TensorRT # python3 -m gdown 1dVhD-DEYY42QbZe1GXl-vxe3k6KqWGsL - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz - $SUDO tar -hxvf TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz -C /usr/local/ + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz + $SUDO tar -hxvf TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/ $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt fi diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml index 3783ecc388..1869894efa 100644 --- a/.github/actions/deploy-windows/action.yml +++ b/.github/actions/deploy-windows/action.yml @@ -99,22 +99,22 @@ runs: if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" ( echo Installing CUDA, cuDNN, nvCOMP, etc curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe - curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/cuda_12.6.0_560.76_windows.exe - curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip + curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.94_windows.exe + curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip - curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.0/local_installers/nvcomp-windows-x86_64-4.0.0-cuda12.5.zip + curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/nvcomp-windows-x86_64-4.0.1-cuda12.x.zip cuda_11.8.0_522.06_windows.exe -s bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'" bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'" - cuda_12.6.0_560.76_windows.exe -s + cuda_12.6.2_560.94_windows.exe -s bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old' 'C:/Program Files/NVIDIA Corporation/NvToolsExt'" bash -c "ls 'C:/Program Files/NVIDIA Corporation/NvToolsExt'" - unzip cudnn-windows-x86_64-9.3.0.75_cuda12-archive.zip + unzip cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip unzip zlib123dllx64.zip - unzip nvcomp-windows-x86_64-4.0.0-cuda12.5.zip - move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" - move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" - move cudnn-windows-x86_64-9.3.0.75_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" + unzip nvcomp-windows-x86_64-4.0.1-cuda12.x.zip + move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" + move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" + move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" move nvcomp\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" move nvcomp\include\device "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" @@ -144,9 +144,9 @@ runs: if "%CI_DEPLOY_MODULE%"=="tensorrt" ( echo Installing TensorRT rem python -m gdown 1GfmJ1BKbacLpUU-0i_mGu0sjrAS0Xzzi - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip - unzip TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip - move TensorRT-10.3.0.26 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT" + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip + unzip TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip + move TensorRT-10.5.0.18 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT" ) if "%CI_DEPLOY_MODULE%"=="mkl" ( @@ -221,7 +221,7 @@ runs: set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6" set "CUDA_PATH_V12_6=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6" set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\libnvvp;%PATH%" - echo CUDA Version 12.6.0>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt" + echo CUDA Version 12.6.2>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\version.txt" ) set "CCACHE_DIR=%USERPROFILE%\ccache" set "PATH=C:\hostedtoolcache\windows\Python\3.9.13\x64;C:\msys64\%MSYSTEM%\bin;C:\msys64\usr\bin;%ProgramFiles%\apache-maven-3.6.3\bin;%PATH%" diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml index d04f7b44a6..9c1cfa0c28 100644 --- a/.github/workflows/tritonserver.yml +++ b/.github/workflows/tritonserver.yml @@ -19,6 +19,6 @@ env: jobs: linux-x86_64: runs-on: ubuntu-20.04 - container: nvcr.io/nvidia/tritonserver:24.07-tf2-python-py3 + container: nvcr.io/nvidia/tritonserver:24.09-tf2-python-py3 steps: - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a91468477..78f7f723aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481)) * Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472)) * Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475)) - * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.0, cuDNN 9.3.0, NCCL 2.22.3, nvCOMP 4.0.0, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.3.0.26, Triton Inference Server 2.48.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies + * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.0.1, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.50.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies ### January 29, 2024 version 1.5.10 * Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463)) diff --git a/README.md b/README.md index a1e3e768d4..8f477741dc 100644 --- a/README.md +++ b/README.md @@ -217,8 +217,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * Caffe 1.0 https://github.com/BVLC/caffe * OpenPose 1.7.0 https://github.com/CMU-Perceptual-Computing-Lab/openpose * CUDA 12.6.x https://developer.nvidia.com/cuda-downloads - * cuDNN 9.3.x https://developer.nvidia.com/cudnn - * NCCL 2.22.x https://developer.nvidia.com/nccl + * cuDNN 9.5.x https://developer.nvidia.com/cudnn + * NCCL 2.23.x https://developer.nvidia.com/nccl * nvCOMP 4.0.x https://developer.nvidia.com/nvcomp * NVIDIA Video Codec SDK 12.2.x https://developer.nvidia.com/nvidia-video-codec-sdk * OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader @@ -227,8 +227,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * SentencePiece 0.2.0 https://github.com/google/sentencepiece * TensorFlow 1.15.x https://github.com/tensorflow/tensorflow * TensorFlow Lite 2.18.x https://github.com/tensorflow/tensorflow - * TensorRT 10.3.x https://developer.nvidia.com/tensorrt - * Triton Inference Server 2.48.x https://developer.nvidia.com/nvidia-triton-inference-server + * TensorRT 10.5.x https://developer.nvidia.com/tensorrt + * Triton Inference Server 2.50.x https://developer.nvidia.com/nvidia-triton-inference-server * The Arcade Learning Environment 0.8.x https://github.com/mgbellemare/Arcade-Learning-Environment * DepthAI 2.24.x https://github.com/luxonis/depthai-core * ONNX 1.17.x https://github.com/onnx/onnx diff --git a/cuda/README.md b/cuda/README.md index a969df66ec..598d815f7b 100644 --- a/cuda/README.md +++ b/cuda/README.md @@ -25,10 +25,10 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * CUDA 12.6.0 https://developer.nvidia.com/cuda-zone - * cuDNN 9.3.0 https://developer.nvidia.com/cudnn - * NCCL 2.22.3 https://developer.nvidia.com/nccl - * nvCOMP 4.0.0 https://developer.nvidia.com/nvcomp + * CUDA 12.6.2 https://developer.nvidia.com/cuda-zone + * cuDNN 9.5.1 https://developer.nvidia.com/cudnn + * NCCL 2.23.4 https://developer.nvidia.com/nccl + * nvCOMP 4.0.1 https://developer.nvidia.com/nvcomp Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -67,14 +67,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco cuda-platform - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/cuda/platform/pom.xml b/cuda/platform/pom.xml index f374c4fc33..b84a14f18a 100644 --- a/cuda/platform/pom.xml +++ b/cuda/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco cuda-platform - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} JavaCPP Presets Platform for CUDA diff --git a/cuda/platform/redist/pom.xml b/cuda/platform/redist/pom.xml index 7885ff8e03..056aa0ed30 100644 --- a/cuda/platform/redist/pom.xml +++ b/cuda/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} JavaCPP Presets Platform Redist for CUDA diff --git a/cuda/pom.xml b/cuda/pom.xml index d0bb5940d5..48401ddffe 100644 --- a/cuda/pom.xml +++ b/cuda/pom.xml @@ -11,7 +11,7 @@ org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} JavaCPP Presets for CUDA diff --git a/cuda/samples/pom.xml b/cuda/samples/pom.xml index 0edae05bfd..8a9cc92880 100644 --- a/cuda/samples/pom.xml +++ b/cuda/samples/pom.xml @@ -12,14 +12,14 @@ org.bytedeco cuda-platform - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java index 6be405416e..d672e808b2 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/cublas/cublasLtMatmulHeuristicResult_t.java @@ -13,7 +13,7 @@ import static org.bytedeco.cuda.global.cublas.*; -/** Results structure used by cublasLtMatmulGetAlgo. +/** Results structure used by cublasLtMatmulAlgoGetHeuristic * * Holds returned configured algo descriptor and its runtime properties. */ diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java index 7531cf23df..b0e5b26c98 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_ActivityMarkerData.java @@ -16,6 +16,8 @@ /** * \brief The activity record providing detailed information for a marker. * + * User must enable CUPTI_ACTIVITY_KIND_MARKER as well + * to get records for marker data. * The marker data contains color, payload, and category. * (CUPTI_ACTIVITY_KIND_MARKER_DATA). */ diff --git a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java index 0e79317c8d..533a9e0407 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/cupti/CUpti_CallbackData.java @@ -13,6 +13,7 @@ import static org.bytedeco.cuda.global.cupti.*; + /** * \brief Data passed into a runtime or driver API callback function. * diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java index 04a7bfcb98..8faa8693ef 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java @@ -103,8 +103,8 @@ public class cublas extends org.bytedeco.cuda.presets.cublas { public static final int CUBLAS_VER_MAJOR = 12; public static final int CUBLAS_VER_MINOR = 6; -public static final int CUBLAS_VER_PATCH = 0; -public static final int CUBLAS_VER_BUILD = 22; +public static final int CUBLAS_VER_PATCH = 3; +public static final int CUBLAS_VER_BUILD = 3; public static final int CUBLAS_VERSION = (CUBLAS_VER_MAJOR * 10000 + CUBLAS_VER_MINOR * 100 + CUBLAS_VER_PATCH); /* CUBLAS status type returns */ @@ -12713,7 +12713,602 @@ public static native void cublasZtrmm(@Cast("char") byte side, CUBLASLT_MATMUL_TILE_128x96 = 33, CUBLASLT_MATMUL_TILE_32x256 = 34, CUBLASLT_MATMUL_TILE_256x32 = 35, - CUBLASLT_MATMUL_TILE_END = 36; + CUBLASLT_MATMUL_TILE_8x128 = 36, + CUBLASLT_MATMUL_TILE_8x192 = 37, + CUBLASLT_MATMUL_TILE_8x256 = 38, + CUBLASLT_MATMUL_TILE_8x320 = 39, + CUBLASLT_MATMUL_TILE_8x384 = 40, + CUBLASLT_MATMUL_TILE_8x448 = 41, + CUBLASLT_MATMUL_TILE_8x512 = 42, + CUBLASLT_MATMUL_TILE_8x576 = 43, + CUBLASLT_MATMUL_TILE_8x640 = 44, + CUBLASLT_MATMUL_TILE_8x704 = 45, + CUBLASLT_MATMUL_TILE_8x768 = 46, + CUBLASLT_MATMUL_TILE_16x64 = 47, + CUBLASLT_MATMUL_TILE_16x128 = 48, + CUBLASLT_MATMUL_TILE_16x192 = 49, + CUBLASLT_MATMUL_TILE_16x256 = 50, + CUBLASLT_MATMUL_TILE_16x320 = 51, + CUBLASLT_MATMUL_TILE_16x384 = 52, + CUBLASLT_MATMUL_TILE_16x448 = 53, + CUBLASLT_MATMUL_TILE_16x512 = 54, + CUBLASLT_MATMUL_TILE_16x576 = 55, + CUBLASLT_MATMUL_TILE_16x640 = 56, + CUBLASLT_MATMUL_TILE_16x704 = 57, + CUBLASLT_MATMUL_TILE_16x768 = 58, + CUBLASLT_MATMUL_TILE_24x64 = 59, + CUBLASLT_MATMUL_TILE_24x128 = 60, + CUBLASLT_MATMUL_TILE_24x192 = 61, + CUBLASLT_MATMUL_TILE_24x256 = 62, + CUBLASLT_MATMUL_TILE_24x320 = 63, + CUBLASLT_MATMUL_TILE_24x384 = 64, + CUBLASLT_MATMUL_TILE_24x448 = 65, + CUBLASLT_MATMUL_TILE_24x512 = 66, + CUBLASLT_MATMUL_TILE_24x576 = 67, + CUBLASLT_MATMUL_TILE_24x640 = 68, + CUBLASLT_MATMUL_TILE_24x704 = 69, + CUBLASLT_MATMUL_TILE_24x768 = 70, + CUBLASLT_MATMUL_TILE_32x192 = 71, + CUBLASLT_MATMUL_TILE_32x320 = 72, + CUBLASLT_MATMUL_TILE_32x384 = 73, + CUBLASLT_MATMUL_TILE_32x448 = 74, + CUBLASLT_MATMUL_TILE_32x512 = 75, + CUBLASLT_MATMUL_TILE_32x576 = 76, + CUBLASLT_MATMUL_TILE_32x640 = 77, + CUBLASLT_MATMUL_TILE_32x704 = 78, + CUBLASLT_MATMUL_TILE_32x768 = 79, + CUBLASLT_MATMUL_TILE_40x64 = 80, + CUBLASLT_MATMUL_TILE_40x128 = 81, + CUBLASLT_MATMUL_TILE_40x192 = 82, + CUBLASLT_MATMUL_TILE_40x256 = 83, + CUBLASLT_MATMUL_TILE_40x320 = 84, + CUBLASLT_MATMUL_TILE_40x384 = 85, + CUBLASLT_MATMUL_TILE_40x448 = 86, + CUBLASLT_MATMUL_TILE_40x512 = 87, + CUBLASLT_MATMUL_TILE_40x576 = 88, + CUBLASLT_MATMUL_TILE_40x640 = 89, + CUBLASLT_MATMUL_TILE_40x704 = 90, + CUBLASLT_MATMUL_TILE_40x768 = 91, + CUBLASLT_MATMUL_TILE_48x64 = 92, + CUBLASLT_MATMUL_TILE_48x128 = 93, + CUBLASLT_MATMUL_TILE_48x192 = 94, + CUBLASLT_MATMUL_TILE_48x256 = 95, + CUBLASLT_MATMUL_TILE_48x320 = 96, + CUBLASLT_MATMUL_TILE_48x384 = 97, + CUBLASLT_MATMUL_TILE_48x448 = 98, + CUBLASLT_MATMUL_TILE_48x512 = 99, + CUBLASLT_MATMUL_TILE_48x576 = 100, + CUBLASLT_MATMUL_TILE_48x640 = 101, + CUBLASLT_MATMUL_TILE_48x704 = 102, + CUBLASLT_MATMUL_TILE_48x768 = 103, + CUBLASLT_MATMUL_TILE_56x64 = 104, + CUBLASLT_MATMUL_TILE_56x128 = 105, + CUBLASLT_MATMUL_TILE_56x192 = 106, + CUBLASLT_MATMUL_TILE_56x256 = 107, + CUBLASLT_MATMUL_TILE_56x320 = 108, + CUBLASLT_MATMUL_TILE_56x384 = 109, + CUBLASLT_MATMUL_TILE_56x448 = 110, + CUBLASLT_MATMUL_TILE_56x512 = 111, + CUBLASLT_MATMUL_TILE_56x576 = 112, + CUBLASLT_MATMUL_TILE_56x640 = 113, + CUBLASLT_MATMUL_TILE_56x704 = 114, + CUBLASLT_MATMUL_TILE_56x768 = 115, + CUBLASLT_MATMUL_TILE_64x192 = 116, + CUBLASLT_MATMUL_TILE_64x320 = 117, + CUBLASLT_MATMUL_TILE_64x384 = 118, + CUBLASLT_MATMUL_TILE_64x448 = 119, + CUBLASLT_MATMUL_TILE_64x576 = 120, + CUBLASLT_MATMUL_TILE_64x640 = 121, + CUBLASLT_MATMUL_TILE_64x704 = 122, + CUBLASLT_MATMUL_TILE_64x768 = 123, + CUBLASLT_MATMUL_TILE_72x64 = 124, + CUBLASLT_MATMUL_TILE_72x128 = 125, + CUBLASLT_MATMUL_TILE_72x192 = 126, + CUBLASLT_MATMUL_TILE_72x256 = 127, + CUBLASLT_MATMUL_TILE_72x320 = 128, + CUBLASLT_MATMUL_TILE_72x384 = 129, + CUBLASLT_MATMUL_TILE_72x448 = 130, + CUBLASLT_MATMUL_TILE_72x512 = 131, + CUBLASLT_MATMUL_TILE_72x576 = 132, + CUBLASLT_MATMUL_TILE_72x640 = 133, + CUBLASLT_MATMUL_TILE_80x64 = 134, + CUBLASLT_MATMUL_TILE_80x128 = 135, + CUBLASLT_MATMUL_TILE_80x192 = 136, + CUBLASLT_MATMUL_TILE_80x256 = 137, + CUBLASLT_MATMUL_TILE_80x320 = 138, + CUBLASLT_MATMUL_TILE_80x384 = 139, + CUBLASLT_MATMUL_TILE_80x448 = 140, + CUBLASLT_MATMUL_TILE_80x512 = 141, + CUBLASLT_MATMUL_TILE_80x576 = 142, + CUBLASLT_MATMUL_TILE_88x64 = 143, + CUBLASLT_MATMUL_TILE_88x128 = 144, + CUBLASLT_MATMUL_TILE_88x192 = 145, + CUBLASLT_MATMUL_TILE_88x256 = 146, + CUBLASLT_MATMUL_TILE_88x320 = 147, + CUBLASLT_MATMUL_TILE_88x384 = 148, + CUBLASLT_MATMUL_TILE_88x448 = 149, + CUBLASLT_MATMUL_TILE_88x512 = 150, + CUBLASLT_MATMUL_TILE_96x192 = 151, + CUBLASLT_MATMUL_TILE_96x256 = 152, + CUBLASLT_MATMUL_TILE_96x320 = 153, + CUBLASLT_MATMUL_TILE_96x384 = 154, + CUBLASLT_MATMUL_TILE_96x448 = 155, + CUBLASLT_MATMUL_TILE_96x512 = 156, + CUBLASLT_MATMUL_TILE_104x64 = 157, + CUBLASLT_MATMUL_TILE_104x128 = 158, + CUBLASLT_MATMUL_TILE_104x192 = 159, + CUBLASLT_MATMUL_TILE_104x256 = 160, + CUBLASLT_MATMUL_TILE_104x320 = 161, + CUBLASLT_MATMUL_TILE_104x384 = 162, + CUBLASLT_MATMUL_TILE_104x448 = 163, + CUBLASLT_MATMUL_TILE_112x64 = 164, + CUBLASLT_MATMUL_TILE_112x128 = 165, + CUBLASLT_MATMUL_TILE_112x192 = 166, + CUBLASLT_MATMUL_TILE_112x256 = 167, + CUBLASLT_MATMUL_TILE_112x320 = 168, + CUBLASLT_MATMUL_TILE_112x384 = 169, + CUBLASLT_MATMUL_TILE_120x64 = 170, + CUBLASLT_MATMUL_TILE_120x128 = 171, + CUBLASLT_MATMUL_TILE_120x192 = 172, + CUBLASLT_MATMUL_TILE_120x256 = 173, + CUBLASLT_MATMUL_TILE_120x320 = 174, + CUBLASLT_MATMUL_TILE_120x384 = 175, + CUBLASLT_MATMUL_TILE_128x320 = 176, + CUBLASLT_MATMUL_TILE_128x384 = 177, + CUBLASLT_MATMUL_TILE_136x64 = 178, + CUBLASLT_MATMUL_TILE_136x128 = 179, + CUBLASLT_MATMUL_TILE_136x192 = 180, + CUBLASLT_MATMUL_TILE_136x256 = 181, + CUBLASLT_MATMUL_TILE_136x320 = 182, + CUBLASLT_MATMUL_TILE_144x64 = 183, + CUBLASLT_MATMUL_TILE_144x128 = 184, + CUBLASLT_MATMUL_TILE_144x192 = 185, + CUBLASLT_MATMUL_TILE_144x256 = 186, + CUBLASLT_MATMUL_TILE_144x320 = 187, + CUBLASLT_MATMUL_TILE_152x64 = 188, + CUBLASLT_MATMUL_TILE_152x128 = 189, + CUBLASLT_MATMUL_TILE_152x192 = 190, + CUBLASLT_MATMUL_TILE_152x256 = 191, + CUBLASLT_MATMUL_TILE_152x320 = 192, + CUBLASLT_MATMUL_TILE_160x64 = 193, + CUBLASLT_MATMUL_TILE_160x192 = 194, + CUBLASLT_MATMUL_TILE_160x256 = 195, + CUBLASLT_MATMUL_TILE_168x64 = 196, + CUBLASLT_MATMUL_TILE_168x128 = 197, + CUBLASLT_MATMUL_TILE_168x192 = 198, + CUBLASLT_MATMUL_TILE_168x256 = 199, + CUBLASLT_MATMUL_TILE_176x64 = 200, + CUBLASLT_MATMUL_TILE_176x128 = 201, + CUBLASLT_MATMUL_TILE_176x192 = 202, + CUBLASLT_MATMUL_TILE_176x256 = 203, + CUBLASLT_MATMUL_TILE_184x64 = 204, + CUBLASLT_MATMUL_TILE_184x128 = 205, + CUBLASLT_MATMUL_TILE_184x192 = 206, + CUBLASLT_MATMUL_TILE_184x256 = 207, + CUBLASLT_MATMUL_TILE_192x64 = 208, + CUBLASLT_MATMUL_TILE_192x192 = 209, + CUBLASLT_MATMUL_TILE_192x256 = 210, + CUBLASLT_MATMUL_TILE_200x64 = 211, + CUBLASLT_MATMUL_TILE_200x128 = 212, + CUBLASLT_MATMUL_TILE_200x192 = 213, + CUBLASLT_MATMUL_TILE_208x64 = 214, + CUBLASLT_MATMUL_TILE_208x128 = 215, + CUBLASLT_MATMUL_TILE_208x192 = 216, + CUBLASLT_MATMUL_TILE_216x64 = 217, + CUBLASLT_MATMUL_TILE_216x128 = 218, + CUBLASLT_MATMUL_TILE_216x192 = 219, + CUBLASLT_MATMUL_TILE_224x64 = 220, + CUBLASLT_MATMUL_TILE_224x128 = 221, + CUBLASLT_MATMUL_TILE_224x192 = 222, + CUBLASLT_MATMUL_TILE_232x64 = 223, + CUBLASLT_MATMUL_TILE_232x128 = 224, + CUBLASLT_MATMUL_TILE_232x192 = 225, + CUBLASLT_MATMUL_TILE_240x64 = 226, + CUBLASLT_MATMUL_TILE_240x128 = 227, + CUBLASLT_MATMUL_TILE_240x192 = 228, + CUBLASLT_MATMUL_TILE_248x64 = 229, + CUBLASLT_MATMUL_TILE_248x128 = 230, + CUBLASLT_MATMUL_TILE_248x192 = 231, + CUBLASLT_MATMUL_TILE_256x192 = 232, + CUBLASLT_MATMUL_TILE_264x64 = 233, + CUBLASLT_MATMUL_TILE_264x128 = 234, + CUBLASLT_MATMUL_TILE_272x64 = 235, + CUBLASLT_MATMUL_TILE_272x128 = 236, + CUBLASLT_MATMUL_TILE_280x64 = 237, + CUBLASLT_MATMUL_TILE_280x128 = 238, + CUBLASLT_MATMUL_TILE_288x64 = 239, + CUBLASLT_MATMUL_TILE_288x128 = 240, + CUBLASLT_MATMUL_TILE_296x64 = 241, + CUBLASLT_MATMUL_TILE_296x128 = 242, + CUBLASLT_MATMUL_TILE_304x64 = 243, + CUBLASLT_MATMUL_TILE_304x128 = 244, + CUBLASLT_MATMUL_TILE_312x64 = 245, + CUBLASLT_MATMUL_TILE_312x128 = 246, + CUBLASLT_MATMUL_TILE_320x64 = 247, + CUBLASLT_MATMUL_TILE_320x128 = 248, + CUBLASLT_MATMUL_TILE_328x64 = 249, + CUBLASLT_MATMUL_TILE_328x128 = 250, + CUBLASLT_MATMUL_TILE_336x64 = 251, + CUBLASLT_MATMUL_TILE_336x128 = 252, + CUBLASLT_MATMUL_TILE_344x64 = 253, + CUBLASLT_MATMUL_TILE_344x128 = 254, + CUBLASLT_MATMUL_TILE_352x64 = 255, + CUBLASLT_MATMUL_TILE_352x128 = 256, + CUBLASLT_MATMUL_TILE_360x64 = 257, + CUBLASLT_MATMUL_TILE_360x128 = 258, + CUBLASLT_MATMUL_TILE_368x64 = 259, + CUBLASLT_MATMUL_TILE_368x128 = 260, + CUBLASLT_MATMUL_TILE_376x64 = 261, + CUBLASLT_MATMUL_TILE_376x128 = 262, + CUBLASLT_MATMUL_TILE_384x64 = 263, + CUBLASLT_MATMUL_TILE_384x128 = 264, + CUBLASLT_MATMUL_TILE_392x64 = 265, + CUBLASLT_MATMUL_TILE_400x64 = 266, + CUBLASLT_MATMUL_TILE_408x64 = 267, + CUBLASLT_MATMUL_TILE_416x64 = 268, + CUBLASLT_MATMUL_TILE_424x64 = 269, + CUBLASLT_MATMUL_TILE_432x64 = 270, + CUBLASLT_MATMUL_TILE_440x64 = 271, + CUBLASLT_MATMUL_TILE_448x64 = 272, + CUBLASLT_MATMUL_TILE_456x64 = 273, + CUBLASLT_MATMUL_TILE_464x64 = 274, + CUBLASLT_MATMUL_TILE_472x64 = 275, + CUBLASLT_MATMUL_TILE_480x64 = 276, + CUBLASLT_MATMUL_TILE_488x64 = 277, + CUBLASLT_MATMUL_TILE_496x64 = 278, + CUBLASLT_MATMUL_TILE_504x64 = 279, + CUBLASLT_MATMUL_TILE_520x64 = 280, + CUBLASLT_MATMUL_TILE_528x64 = 281, + CUBLASLT_MATMUL_TILE_536x64 = 282, + CUBLASLT_MATMUL_TILE_544x64 = 283, + CUBLASLT_MATMUL_TILE_552x64 = 284, + CUBLASLT_MATMUL_TILE_560x64 = 285, + CUBLASLT_MATMUL_TILE_568x64 = 286, + CUBLASLT_MATMUL_TILE_576x64 = 287, + CUBLASLT_MATMUL_TILE_584x64 = 288, + CUBLASLT_MATMUL_TILE_592x64 = 289, + CUBLASLT_MATMUL_TILE_600x64 = 290, + CUBLASLT_MATMUL_TILE_608x64 = 291, + CUBLASLT_MATMUL_TILE_616x64 = 292, + CUBLASLT_MATMUL_TILE_624x64 = 293, + CUBLASLT_MATMUL_TILE_632x64 = 294, + CUBLASLT_MATMUL_TILE_640x64 = 295, + CUBLASLT_MATMUL_TILE_648x64 = 296, + CUBLASLT_MATMUL_TILE_656x64 = 297, + CUBLASLT_MATMUL_TILE_664x64 = 298, + CUBLASLT_MATMUL_TILE_672x64 = 299, + CUBLASLT_MATMUL_TILE_680x64 = 300, + CUBLASLT_MATMUL_TILE_688x64 = 301, + CUBLASLT_MATMUL_TILE_696x64 = 302, + CUBLASLT_MATMUL_TILE_704x64 = 303, + CUBLASLT_MATMUL_TILE_712x64 = 304, + CUBLASLT_MATMUL_TILE_720x64 = 305, + CUBLASLT_MATMUL_TILE_728x64 = 306, + CUBLASLT_MATMUL_TILE_736x64 = 307, + CUBLASLT_MATMUL_TILE_744x64 = 308, + CUBLASLT_MATMUL_TILE_752x64 = 309, + CUBLASLT_MATMUL_TILE_760x64 = 310, + CUBLASLT_MATMUL_TILE_768x64 = 311, + CUBLASLT_MATMUL_TILE_64x16 = 312, + CUBLASLT_MATMUL_TILE_64x24 = 313, + CUBLASLT_MATMUL_TILE_64x40 = 314, + CUBLASLT_MATMUL_TILE_64x48 = 315, + CUBLASLT_MATMUL_TILE_64x56 = 316, + CUBLASLT_MATMUL_TILE_64x72 = 317, + CUBLASLT_MATMUL_TILE_64x80 = 318, + CUBLASLT_MATMUL_TILE_64x88 = 319, + CUBLASLT_MATMUL_TILE_64x104 = 320, + CUBLASLT_MATMUL_TILE_64x112 = 321, + CUBLASLT_MATMUL_TILE_64x120 = 322, + CUBLASLT_MATMUL_TILE_64x136 = 323, + CUBLASLT_MATMUL_TILE_64x144 = 324, + CUBLASLT_MATMUL_TILE_64x152 = 325, + CUBLASLT_MATMUL_TILE_64x160 = 326, + CUBLASLT_MATMUL_TILE_64x168 = 327, + CUBLASLT_MATMUL_TILE_64x176 = 328, + CUBLASLT_MATMUL_TILE_64x184 = 329, + CUBLASLT_MATMUL_TILE_64x200 = 330, + CUBLASLT_MATMUL_TILE_64x208 = 331, + CUBLASLT_MATMUL_TILE_64x216 = 332, + CUBLASLT_MATMUL_TILE_64x224 = 333, + CUBLASLT_MATMUL_TILE_64x232 = 334, + CUBLASLT_MATMUL_TILE_64x240 = 335, + CUBLASLT_MATMUL_TILE_64x248 = 336, + CUBLASLT_MATMUL_TILE_64x264 = 337, + CUBLASLT_MATMUL_TILE_64x272 = 338, + CUBLASLT_MATMUL_TILE_64x280 = 339, + CUBLASLT_MATMUL_TILE_64x288 = 340, + CUBLASLT_MATMUL_TILE_64x296 = 341, + CUBLASLT_MATMUL_TILE_64x304 = 342, + CUBLASLT_MATMUL_TILE_64x312 = 343, + CUBLASLT_MATMUL_TILE_64x328 = 344, + CUBLASLT_MATMUL_TILE_64x336 = 345, + CUBLASLT_MATMUL_TILE_64x344 = 346, + CUBLASLT_MATMUL_TILE_64x352 = 347, + CUBLASLT_MATMUL_TILE_64x360 = 348, + CUBLASLT_MATMUL_TILE_64x368 = 349, + CUBLASLT_MATMUL_TILE_64x376 = 350, + CUBLASLT_MATMUL_TILE_64x392 = 351, + CUBLASLT_MATMUL_TILE_64x400 = 352, + CUBLASLT_MATMUL_TILE_64x408 = 353, + CUBLASLT_MATMUL_TILE_64x416 = 354, + CUBLASLT_MATMUL_TILE_64x424 = 355, + CUBLASLT_MATMUL_TILE_64x432 = 356, + CUBLASLT_MATMUL_TILE_64x440 = 357, + CUBLASLT_MATMUL_TILE_64x456 = 358, + CUBLASLT_MATMUL_TILE_64x464 = 359, + CUBLASLT_MATMUL_TILE_64x472 = 360, + CUBLASLT_MATMUL_TILE_64x480 = 361, + CUBLASLT_MATMUL_TILE_64x488 = 362, + CUBLASLT_MATMUL_TILE_64x496 = 363, + CUBLASLT_MATMUL_TILE_64x504 = 364, + CUBLASLT_MATMUL_TILE_64x520 = 365, + CUBLASLT_MATMUL_TILE_64x528 = 366, + CUBLASLT_MATMUL_TILE_64x536 = 367, + CUBLASLT_MATMUL_TILE_64x544 = 368, + CUBLASLT_MATMUL_TILE_64x552 = 369, + CUBLASLT_MATMUL_TILE_64x560 = 370, + CUBLASLT_MATMUL_TILE_64x568 = 371, + CUBLASLT_MATMUL_TILE_64x584 = 372, + CUBLASLT_MATMUL_TILE_64x592 = 373, + CUBLASLT_MATMUL_TILE_64x600 = 374, + CUBLASLT_MATMUL_TILE_64x608 = 375, + CUBLASLT_MATMUL_TILE_64x616 = 376, + CUBLASLT_MATMUL_TILE_64x624 = 377, + CUBLASLT_MATMUL_TILE_64x632 = 378, + CUBLASLT_MATMUL_TILE_64x648 = 379, + CUBLASLT_MATMUL_TILE_64x656 = 380, + CUBLASLT_MATMUL_TILE_64x664 = 381, + CUBLASLT_MATMUL_TILE_64x672 = 382, + CUBLASLT_MATMUL_TILE_64x680 = 383, + CUBLASLT_MATMUL_TILE_64x688 = 384, + CUBLASLT_MATMUL_TILE_64x696 = 385, + CUBLASLT_MATMUL_TILE_64x712 = 386, + CUBLASLT_MATMUL_TILE_64x720 = 387, + CUBLASLT_MATMUL_TILE_64x728 = 388, + CUBLASLT_MATMUL_TILE_64x736 = 389, + CUBLASLT_MATMUL_TILE_64x744 = 390, + CUBLASLT_MATMUL_TILE_64x752 = 391, + CUBLASLT_MATMUL_TILE_64x760 = 392, + CUBLASLT_MATMUL_TILE_128x8 = 393, + CUBLASLT_MATMUL_TILE_128x16 = 394, + CUBLASLT_MATMUL_TILE_128x24 = 395, + CUBLASLT_MATMUL_TILE_128x40 = 396, + CUBLASLT_MATMUL_TILE_128x48 = 397, + CUBLASLT_MATMUL_TILE_128x56 = 398, + CUBLASLT_MATMUL_TILE_128x72 = 399, + CUBLASLT_MATMUL_TILE_128x80 = 400, + CUBLASLT_MATMUL_TILE_128x88 = 401, + CUBLASLT_MATMUL_TILE_128x104 = 402, + CUBLASLT_MATMUL_TILE_128x112 = 403, + CUBLASLT_MATMUL_TILE_128x120 = 404, + CUBLASLT_MATMUL_TILE_128x136 = 405, + CUBLASLT_MATMUL_TILE_128x144 = 406, + CUBLASLT_MATMUL_TILE_128x152 = 407, + CUBLASLT_MATMUL_TILE_128x168 = 408, + CUBLASLT_MATMUL_TILE_128x176 = 409, + CUBLASLT_MATMUL_TILE_128x184 = 410, + CUBLASLT_MATMUL_TILE_128x200 = 411, + CUBLASLT_MATMUL_TILE_128x208 = 412, + CUBLASLT_MATMUL_TILE_128x216 = 413, + CUBLASLT_MATMUL_TILE_128x224 = 414, + CUBLASLT_MATMUL_TILE_128x232 = 415, + CUBLASLT_MATMUL_TILE_128x240 = 416, + CUBLASLT_MATMUL_TILE_128x248 = 417, + CUBLASLT_MATMUL_TILE_128x264 = 418, + CUBLASLT_MATMUL_TILE_128x272 = 419, + CUBLASLT_MATMUL_TILE_128x280 = 420, + CUBLASLT_MATMUL_TILE_128x288 = 421, + CUBLASLT_MATMUL_TILE_128x296 = 422, + CUBLASLT_MATMUL_TILE_128x304 = 423, + CUBLASLT_MATMUL_TILE_128x312 = 424, + CUBLASLT_MATMUL_TILE_128x328 = 425, + CUBLASLT_MATMUL_TILE_128x336 = 426, + CUBLASLT_MATMUL_TILE_128x344 = 427, + CUBLASLT_MATMUL_TILE_128x352 = 428, + CUBLASLT_MATMUL_TILE_128x360 = 429, + CUBLASLT_MATMUL_TILE_128x368 = 430, + CUBLASLT_MATMUL_TILE_128x376 = 431, + CUBLASLT_MATMUL_TILE_128x392 = 432, + CUBLASLT_MATMUL_TILE_128x400 = 433, + CUBLASLT_MATMUL_TILE_128x408 = 434, + CUBLASLT_MATMUL_TILE_128x416 = 435, + CUBLASLT_MATMUL_TILE_128x424 = 436, + CUBLASLT_MATMUL_TILE_128x432 = 437, + CUBLASLT_MATMUL_TILE_128x440 = 438, + CUBLASLT_MATMUL_TILE_128x448 = 439, + CUBLASLT_MATMUL_TILE_128x456 = 440, + CUBLASLT_MATMUL_TILE_128x464 = 441, + CUBLASLT_MATMUL_TILE_128x472 = 442, + CUBLASLT_MATMUL_TILE_128x480 = 443, + CUBLASLT_MATMUL_TILE_128x488 = 444, + CUBLASLT_MATMUL_TILE_128x496 = 445, + CUBLASLT_MATMUL_TILE_128x504 = 446, + CUBLASLT_MATMUL_TILE_128x512 = 447, + CUBLASLT_MATMUL_TILE_192x8 = 448, + CUBLASLT_MATMUL_TILE_192x16 = 449, + CUBLASLT_MATMUL_TILE_192x24 = 450, + CUBLASLT_MATMUL_TILE_192x32 = 451, + CUBLASLT_MATMUL_TILE_192x40 = 452, + CUBLASLT_MATMUL_TILE_192x48 = 453, + CUBLASLT_MATMUL_TILE_192x56 = 454, + CUBLASLT_MATMUL_TILE_192x72 = 455, + CUBLASLT_MATMUL_TILE_192x80 = 456, + CUBLASLT_MATMUL_TILE_192x88 = 457, + CUBLASLT_MATMUL_TILE_192x96 = 458, + CUBLASLT_MATMUL_TILE_192x104 = 459, + CUBLASLT_MATMUL_TILE_192x112 = 460, + CUBLASLT_MATMUL_TILE_192x120 = 461, + CUBLASLT_MATMUL_TILE_192x136 = 462, + CUBLASLT_MATMUL_TILE_192x144 = 463, + CUBLASLT_MATMUL_TILE_192x152 = 464, + CUBLASLT_MATMUL_TILE_192x160 = 465, + CUBLASLT_MATMUL_TILE_192x168 = 466, + CUBLASLT_MATMUL_TILE_192x176 = 467, + CUBLASLT_MATMUL_TILE_192x184 = 468, + CUBLASLT_MATMUL_TILE_192x200 = 469, + CUBLASLT_MATMUL_TILE_192x208 = 470, + CUBLASLT_MATMUL_TILE_192x216 = 471, + CUBLASLT_MATMUL_TILE_192x224 = 472, + CUBLASLT_MATMUL_TILE_192x232 = 473, + CUBLASLT_MATMUL_TILE_192x240 = 474, + CUBLASLT_MATMUL_TILE_192x248 = 475, + CUBLASLT_MATMUL_TILE_192x264 = 476, + CUBLASLT_MATMUL_TILE_192x272 = 477, + CUBLASLT_MATMUL_TILE_192x280 = 478, + CUBLASLT_MATMUL_TILE_192x288 = 479, + CUBLASLT_MATMUL_TILE_192x296 = 480, + CUBLASLT_MATMUL_TILE_192x304 = 481, + CUBLASLT_MATMUL_TILE_192x312 = 482, + CUBLASLT_MATMUL_TILE_192x320 = 483, + CUBLASLT_MATMUL_TILE_192x328 = 484, + CUBLASLT_MATMUL_TILE_192x336 = 485, + CUBLASLT_MATMUL_TILE_256x8 = 486, + CUBLASLT_MATMUL_TILE_256x16 = 487, + CUBLASLT_MATMUL_TILE_256x24 = 488, + CUBLASLT_MATMUL_TILE_256x40 = 489, + CUBLASLT_MATMUL_TILE_256x48 = 490, + CUBLASLT_MATMUL_TILE_256x56 = 491, + CUBLASLT_MATMUL_TILE_256x72 = 492, + CUBLASLT_MATMUL_TILE_256x80 = 493, + CUBLASLT_MATMUL_TILE_256x88 = 494, + CUBLASLT_MATMUL_TILE_256x96 = 495, + CUBLASLT_MATMUL_TILE_256x104 = 496, + CUBLASLT_MATMUL_TILE_256x112 = 497, + CUBLASLT_MATMUL_TILE_256x120 = 498, + CUBLASLT_MATMUL_TILE_256x136 = 499, + CUBLASLT_MATMUL_TILE_256x144 = 500, + CUBLASLT_MATMUL_TILE_256x152 = 501, + CUBLASLT_MATMUL_TILE_256x160 = 502, + CUBLASLT_MATMUL_TILE_256x168 = 503, + CUBLASLT_MATMUL_TILE_256x176 = 504, + CUBLASLT_MATMUL_TILE_256x184 = 505, + CUBLASLT_MATMUL_TILE_256x200 = 506, + CUBLASLT_MATMUL_TILE_256x208 = 507, + CUBLASLT_MATMUL_TILE_256x216 = 508, + CUBLASLT_MATMUL_TILE_256x224 = 509, + CUBLASLT_MATMUL_TILE_256x232 = 510, + CUBLASLT_MATMUL_TILE_256x240 = 511, + CUBLASLT_MATMUL_TILE_256x248 = 512, + CUBLASLT_MATMUL_TILE_256x256 = 513, + CUBLASLT_MATMUL_TILE_320x8 = 514, + CUBLASLT_MATMUL_TILE_320x16 = 515, + CUBLASLT_MATMUL_TILE_320x24 = 516, + CUBLASLT_MATMUL_TILE_320x32 = 517, + CUBLASLT_MATMUL_TILE_320x40 = 518, + CUBLASLT_MATMUL_TILE_320x48 = 519, + CUBLASLT_MATMUL_TILE_320x56 = 520, + CUBLASLT_MATMUL_TILE_320x72 = 521, + CUBLASLT_MATMUL_TILE_320x80 = 522, + CUBLASLT_MATMUL_TILE_320x88 = 523, + CUBLASLT_MATMUL_TILE_320x96 = 524, + CUBLASLT_MATMUL_TILE_320x104 = 525, + CUBLASLT_MATMUL_TILE_320x112 = 526, + CUBLASLT_MATMUL_TILE_320x120 = 527, + CUBLASLT_MATMUL_TILE_320x136 = 528, + CUBLASLT_MATMUL_TILE_320x144 = 529, + CUBLASLT_MATMUL_TILE_320x152 = 530, + CUBLASLT_MATMUL_TILE_320x160 = 531, + CUBLASLT_MATMUL_TILE_320x168 = 532, + CUBLASLT_MATMUL_TILE_320x176 = 533, + CUBLASLT_MATMUL_TILE_320x184 = 534, + CUBLASLT_MATMUL_TILE_320x192 = 535, + CUBLASLT_MATMUL_TILE_320x200 = 536, + CUBLASLT_MATMUL_TILE_384x8 = 537, + CUBLASLT_MATMUL_TILE_384x16 = 538, + CUBLASLT_MATMUL_TILE_384x24 = 539, + CUBLASLT_MATMUL_TILE_384x32 = 540, + CUBLASLT_MATMUL_TILE_384x40 = 541, + CUBLASLT_MATMUL_TILE_384x48 = 542, + CUBLASLT_MATMUL_TILE_384x56 = 543, + CUBLASLT_MATMUL_TILE_384x72 = 544, + CUBLASLT_MATMUL_TILE_384x80 = 545, + CUBLASLT_MATMUL_TILE_384x88 = 546, + CUBLASLT_MATMUL_TILE_384x96 = 547, + CUBLASLT_MATMUL_TILE_384x104 = 548, + CUBLASLT_MATMUL_TILE_384x112 = 549, + CUBLASLT_MATMUL_TILE_384x120 = 550, + CUBLASLT_MATMUL_TILE_384x136 = 551, + CUBLASLT_MATMUL_TILE_384x144 = 552, + CUBLASLT_MATMUL_TILE_384x152 = 553, + CUBLASLT_MATMUL_TILE_384x160 = 554, + CUBLASLT_MATMUL_TILE_384x168 = 555, + CUBLASLT_MATMUL_TILE_448x8 = 556, + CUBLASLT_MATMUL_TILE_448x16 = 557, + CUBLASLT_MATMUL_TILE_448x24 = 558, + CUBLASLT_MATMUL_TILE_448x32 = 559, + CUBLASLT_MATMUL_TILE_448x40 = 560, + CUBLASLT_MATMUL_TILE_448x48 = 561, + CUBLASLT_MATMUL_TILE_448x56 = 562, + CUBLASLT_MATMUL_TILE_448x72 = 563, + CUBLASLT_MATMUL_TILE_448x80 = 564, + CUBLASLT_MATMUL_TILE_448x88 = 565, + CUBLASLT_MATMUL_TILE_448x96 = 566, + CUBLASLT_MATMUL_TILE_448x104 = 567, + CUBLASLT_MATMUL_TILE_448x112 = 568, + CUBLASLT_MATMUL_TILE_448x120 = 569, + CUBLASLT_MATMUL_TILE_448x128 = 570, + CUBLASLT_MATMUL_TILE_448x136 = 571, + CUBLASLT_MATMUL_TILE_448x144 = 572, + CUBLASLT_MATMUL_TILE_512x8 = 573, + CUBLASLT_MATMUL_TILE_512x16 = 574, + CUBLASLT_MATMUL_TILE_512x24 = 575, + CUBLASLT_MATMUL_TILE_512x32 = 576, + CUBLASLT_MATMUL_TILE_512x40 = 577, + CUBLASLT_MATMUL_TILE_512x48 = 578, + CUBLASLT_MATMUL_TILE_512x56 = 579, + CUBLASLT_MATMUL_TILE_512x72 = 580, + CUBLASLT_MATMUL_TILE_512x80 = 581, + CUBLASLT_MATMUL_TILE_512x88 = 582, + CUBLASLT_MATMUL_TILE_512x96 = 583, + CUBLASLT_MATMUL_TILE_512x104 = 584, + CUBLASLT_MATMUL_TILE_512x112 = 585, + CUBLASLT_MATMUL_TILE_512x120 = 586, + CUBLASLT_MATMUL_TILE_512x128 = 587, + CUBLASLT_MATMUL_TILE_576x8 = 588, + CUBLASLT_MATMUL_TILE_576x16 = 589, + CUBLASLT_MATMUL_TILE_576x24 = 590, + CUBLASLT_MATMUL_TILE_576x32 = 591, + CUBLASLT_MATMUL_TILE_576x40 = 592, + CUBLASLT_MATMUL_TILE_576x48 = 593, + CUBLASLT_MATMUL_TILE_576x56 = 594, + CUBLASLT_MATMUL_TILE_576x72 = 595, + CUBLASLT_MATMUL_TILE_576x80 = 596, + CUBLASLT_MATMUL_TILE_576x88 = 597, + CUBLASLT_MATMUL_TILE_576x96 = 598, + CUBLASLT_MATMUL_TILE_576x104 = 599, + CUBLASLT_MATMUL_TILE_576x112 = 600, + CUBLASLT_MATMUL_TILE_640x8 = 601, + CUBLASLT_MATMUL_TILE_640x16 = 602, + CUBLASLT_MATMUL_TILE_640x24 = 603, + CUBLASLT_MATMUL_TILE_640x32 = 604, + CUBLASLT_MATMUL_TILE_640x40 = 605, + CUBLASLT_MATMUL_TILE_640x48 = 606, + CUBLASLT_MATMUL_TILE_640x56 = 607, + CUBLASLT_MATMUL_TILE_640x72 = 608, + CUBLASLT_MATMUL_TILE_640x80 = 609, + CUBLASLT_MATMUL_TILE_640x88 = 610, + CUBLASLT_MATMUL_TILE_640x96 = 611, + CUBLASLT_MATMUL_TILE_704x8 = 612, + CUBLASLT_MATMUL_TILE_704x16 = 613, + CUBLASLT_MATMUL_TILE_704x24 = 614, + CUBLASLT_MATMUL_TILE_704x32 = 615, + CUBLASLT_MATMUL_TILE_704x40 = 616, + CUBLASLT_MATMUL_TILE_704x48 = 617, + CUBLASLT_MATMUL_TILE_704x56 = 618, + CUBLASLT_MATMUL_TILE_704x72 = 619, + CUBLASLT_MATMUL_TILE_704x80 = 620, + CUBLASLT_MATMUL_TILE_704x88 = 621, + CUBLASLT_MATMUL_TILE_768x8 = 622, + CUBLASLT_MATMUL_TILE_768x16 = 623, + CUBLASLT_MATMUL_TILE_768x24 = 624, + CUBLASLT_MATMUL_TILE_768x32 = 625, + CUBLASLT_MATMUL_TILE_768x40 = 626, + CUBLASLT_MATMUL_TILE_768x48 = 627, + CUBLASLT_MATMUL_TILE_768x56 = 628, + CUBLASLT_MATMUL_TILE_768x72 = 629, + CUBLASLT_MATMUL_TILE_768x80 = 630, + CUBLASLT_MATMUL_TILE_END = 631; /** Size and number of stages in which elements are read into shared memory * @@ -13763,7 +14358,19 @@ public static native void cublasZtrmm(@Cast("char") byte side, CUBLASLT_SEARCH_RESERVED_04 = 4, /** reserved for future use */ - CUBLASLT_SEARCH_RESERVED_05 = 5; + CUBLASLT_SEARCH_RESERVED_05 = 5, + /** reserved for future use + */ + CUBLASLT_SEARCH_RESERVED_06 = 6, + /** reserved for future use + */ + CUBLASLT_SEARCH_RESERVED_07 = 7, + /** reserved for future use + */ + CUBLASLT_SEARCH_RESERVED_08 = 8, + /** reserved for future use + */ + CUBLASLT_SEARCH_RESERVED_09 = 9; /** Algo search preference to fine tune the heuristic function. */ /** enum cublasLtMatmulPreferenceAttributes_t */ @@ -14431,7 +15038,7 @@ public static native void cublasZtrmm(@Cast("char") byte side, // #include "driver_types.h" // #include "cuComplex.h" /* import complex data type */ -// #include "cublas_api.h" +// #include "cublas_v2.h" // #if defined(__cplusplus) // Targeting ../cublas/cublasXtContext.java diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java index cca6078361..9ef4c848b7 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cudnn.java @@ -146,8 +146,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { // #define CUDNN_VERSION_H_ public static final int CUDNN_MAJOR = 9; -public static final int CUDNN_MINOR = 3; -public static final int CUDNN_PATCHLEVEL = 0; +public static final int CUDNN_MINOR = 5; +public static final int CUDNN_PATCHLEVEL = 1; public static final int CUDNN_VERSION = (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL); @@ -228,8 +228,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { /* These version numbers are autogenerated, do not edit manually. */ public static final int CUDNN_GRAPH_MAJOR = 9; -public static final int CUDNN_GRAPH_MINOR = 3; -public static final int CUDNN_GRAPH_PATCH = 0; +public static final int CUDNN_GRAPH_MINOR = 5; +public static final int CUDNN_GRAPH_PATCH = 1; // #if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL) // #error Version mismatch in cuDNN GRAPH!!! @@ -301,6 +301,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008, CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009, CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010, + CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011, CUDNN_STATUS_NOT_SUPPORTED = 3000, CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001, @@ -315,6 +316,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010, CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011, CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012, + CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013, CUDNN_STATUS_INTERNAL_ERROR = 4000, CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001, @@ -639,6 +641,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403, CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404, CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405, + CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406, CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500, CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501, @@ -704,6 +707,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800, CUDNN_ATTR_OPERATIONGRAPH_OPS = 801, CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802, + CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803, CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900, CUDNN_ATTR_TENSOR_DATA_TYPE = 901, @@ -806,6 +810,11 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903, CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904, + CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950, + CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951, + CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952, + CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953, + CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000, CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001, CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002, @@ -847,7 +856,10 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_ATTR_OPERATION_RNG_YDESC = 2310, CUDNN_ATTR_OPERATION_RNG_SEED = 2311, CUDNN_ATTR_OPERATION_RNG_DESC = 2312, - CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313; + CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313, + + CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400, + CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401; /** enum cudnnBackendAttributeType_t */ public static final int @@ -917,7 +929,9 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30, CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31, CUDNN_BACKEND_RNG_DESCRIPTOR = 32, - CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33; + CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33, + CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34, + CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35; /** enum cudnnBackendNumericalNote_t */ public static final int @@ -938,7 +952,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0, CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1, CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2, - CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 3; + CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3, + CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4; /** enum cudnnBackendKnobType_t */ public static final int @@ -1057,6 +1072,16 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { public static native @Cast("cudnnStatus_t") int cudnnBackendExecute(cudnnContext handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack); +public static native @Cast("cudnnStatus_t") int cudnnBackendPopulateCudaGraph(cudnnContext handle, + cudnnBackendDescriptor_t executionPlan, + cudnnBackendDescriptor_t variantPack, + CUgraph_st graph); + +public static native @Cast("cudnnStatus_t") int cudnnBackendUpdateCudaGraph(cudnnContext handle, + cudnnBackendDescriptor_t executionPlan, + cudnnBackendDescriptor_t variantPack, + CUgraph_st graph); + // #if defined(__cplusplus) // #endif @@ -1128,8 +1153,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { /* These version numbers are autogenerated, do not edit manually. */ public static final int CUDNN_OPS_MAJOR = 9; -public static final int CUDNN_OPS_MINOR = 3; -public static final int CUDNN_OPS_PATCH = 0; +public static final int CUDNN_OPS_MINOR = 5; +public static final int CUDNN_OPS_PATCH = 1; // #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL) // #error Version mismatch in cuDNN OPS INFER!!! @@ -2303,7 +2328,7 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { @Cast("size_t") long reserveSpaceSizeInBytes); /* Performs backward pass of Batch Normalization layer. Returns x gradient, -* bnScale gradient and bnBias gradient */ + * bnScale gradient and bnBias gradient */ public static native @Cast("cudnnStatus_t") @Deprecated int cudnnBatchNormalizationBackward(cudnnContext handle, @Cast("cudnnBatchNormMode_t") int mode, @Const Pointer alphaDataDiff, @@ -2557,8 +2582,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { /* These version numbers are autogenerated, do not edit manually. */ public static final int CUDNN_ADV_MAJOR = 9; -public static final int CUDNN_ADV_MINOR = 3; -public static final int CUDNN_ADV_PATCH = 0; +public static final int CUDNN_ADV_MINOR = 5; +public static final int CUDNN_ADV_PATCH = 1; // #if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL) // #error Version mismatch in cuDNN ADV INFER!!! @@ -3374,8 +3399,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { Pointer reserveSpace); /* -* CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions -*/ + * CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions + */ /* Input normalization mode for loss function */ /** enum cudnnLossNormalizationMode_t */ public static final int @@ -3655,8 +3680,8 @@ public class cudnn extends org.bytedeco.cuda.presets.cudnn { /* These version numbers are autogenerated, do not edit manually. */ public static final int CUDNN_CNN_MAJOR = 9; -public static final int CUDNN_CNN_MINOR = 3; -public static final int CUDNN_CNN_PATCH = 0; +public static final int CUDNN_CNN_MINOR = 5; +public static final int CUDNN_CNN_PATCH = 1; // #if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL) // #error Version mismatch in cuDNN CNN INFER!!! diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java index 61b09ebbe2..14c0058238 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java @@ -90,11 +90,11 @@ public class cufft extends org.bytedeco.cuda.presets.cufft { // #endif public static final int CUFFT_VER_MAJOR = 11; -public static final int CUFFT_VER_MINOR = 2; -public static final int CUFFT_VER_PATCH = 6; -public static final int CUFFT_VER_BUILD = 28; +public static final int CUFFT_VER_MINOR = 3; +public static final int CUFFT_VER_PATCH = 0; +public static final int CUFFT_VER_BUILD = 4; -public static final int CUFFT_VERSION = 11206; +public static final int CUFFT_VERSION = 11300; // CUFFT API function return values /** enum cufftResult */ diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java index 5e744d4351..8bea75dc1d 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cupti.java @@ -777,8 +777,9 @@ public class cupti extends org.bytedeco.cuda.presets.cupti { CUPTI_ACTIVITY_KIND_MARKER = 12, /** - * Extended, optional, data about a marker. The corresponding - * activity record structure is \ref CUpti_ActivityMarkerData. + * Extended, optional, data about a marker. User must enable + * CUPTI_ACTIVITY_KIND_MARKER as well to get records for marker data. + * The corresponding activity record structure is \ref CUpti_ActivityMarkerData. */ CUPTI_ACTIVITY_KIND_MARKER_DATA = 13, @@ -4105,7 +4106,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti { /** * Domain containing callback points for various states. */ - CUPTI_CB_DOMAIN_STATE = 6, + CUPTI_CB_DOMAIN_STATE = 6, CUPTI_CB_DOMAIN_SIZE = 7, @@ -4295,6 +4296,7 @@ public class cupti extends org.bytedeco.cuda.presets.cupti { // Targeting ../cupti/CUpti_StateData.java + /** * \brief An ID for a driver API, runtime API, resource or * synchronization callback. diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java index 93bccbcccf..7d16bdb85a 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java @@ -92,7 +92,7 @@ public class curand extends org.bytedeco.cuda.presets.curand { public static final int CURAND_VER_MAJOR = 10; public static final int CURAND_VER_MINOR = 3; public static final int CURAND_VER_PATCH = 7; -public static final int CURAND_VER_BUILD = 37; +public static final int CURAND_VER_BUILD = 77; public static final int CURAND_VERSION = (CURAND_VER_MAJOR * 1000 + CURAND_VER_MINOR * 100 + CURAND_VER_PATCH); diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java index d23eac2aea..a6b20b65e1 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java @@ -89,9 +89,9 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { // #endif public static final int CUSOLVER_VER_MAJOR = 11; - public static final int CUSOLVER_VER_MINOR = 6; - public static final int CUSOLVER_VER_PATCH = 4; - public static final int CUSOLVER_VER_BUILD = 38; + public static final int CUSOLVER_VER_MINOR = 7; + public static final int CUSOLVER_VER_PATCH = 1; + public static final int CUSOLVER_VER_BUILD = 2; public static final int CUSOLVER_VERSION = (CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH); @@ -370,7 +370,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { // #include // #include "cuComplex.h" /* import complex data type */ -// #include "cublas_api.h" +// #include "cublas_v2.h" // #include "cusolver_common.h" /*******************************************************************************/ @@ -12217,6 +12217,78 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { @Cast("size_t") long workspaceInBytesOnHost, int[] info); + /* 64-bit API for batched SYEVD */ + public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched_bufferSize( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobz, + @Cast("cublasFillMode_t") int uplo, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + @Const Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + @Const Pointer W, + @Cast("cudaDataType") int computeType, + @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice, + @Cast("size_t*") SizeTPointer workspaceInBytesOnHost, + @Cast("int64_t") long batchSize); + + public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobz, + @Cast("cublasFillMode_t") int uplo, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + IntPointer info, + @Cast("int64_t") long batchSize); + public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobz, + @Cast("cublasFillMode_t") int uplo, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + IntBuffer info, + @Cast("int64_t") long batchSize); + public static native @Cast("cusolverStatus_t") int cusolverDnXsyevBatched( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobz, + @Cast("cublasFillMode_t") int uplo, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + int[] info, + @Cast("int64_t") long batchSize); + /* 64-bit API for SYEVDX */ public static native @Cast("cusolverStatus_t") int cusolverDnXsyevdx_bufferSize( cusolverDnContext handle, @@ -12349,6 +12421,98 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { @Cast("size_t") long workspaceInBytesOnHost, int[] info); + /* 64-bit API for GEEV */ + public static native @Cast("cusolverStatus_t") int cusolverDnXgeev_bufferSize( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobvl, + @Cast("cusolverEigMode_t") int jobvr, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + @Const Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + @Const Pointer W, + @Cast("cudaDataType") int dataTypeVL, + @Const Pointer VL, + @Cast("int64_t") long ldvl, + @Cast("cudaDataType") int dataTypeVR, + @Const Pointer VR, + @Cast("int64_t") long ldvr, + @Cast("cudaDataType") int computeType, + @Cast("size_t*") SizeTPointer workspaceInBytesOnDevice, + @Cast("size_t*") SizeTPointer workspaceInBytesOnHost); + + public static native @Cast("cusolverStatus_t") int cusolverDnXgeev( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobvl, + @Cast("cusolverEigMode_t") int jobvr, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int dataTypeVL, + Pointer VL, + @Cast("int64_t") long ldvl, + @Cast("cudaDataType") int dataTypeVR, + Pointer VR, + @Cast("int64_t") long ldvr, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + IntPointer info); + public static native @Cast("cusolverStatus_t") int cusolverDnXgeev( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobvl, + @Cast("cusolverEigMode_t") int jobvr, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int dataTypeVL, + Pointer VL, + @Cast("int64_t") long ldvl, + @Cast("cudaDataType") int dataTypeVR, + Pointer VR, + @Cast("int64_t") long ldvr, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + IntBuffer info); + public static native @Cast("cusolverStatus_t") int cusolverDnXgeev( + cusolverDnContext handle, + cusolverDnParams params, + @Cast("cusolverEigMode_t") int jobvl, + @Cast("cusolverEigMode_t") int jobvr, + @Cast("int64_t") long n, + @Cast("cudaDataType") int dataTypeA, + Pointer A, + @Cast("int64_t") long lda, + @Cast("cudaDataType") int dataTypeW, + Pointer W, + @Cast("cudaDataType") int dataTypeVL, + Pointer VL, + @Cast("int64_t") long ldvl, + @Cast("cudaDataType") int dataTypeVR, + Pointer VR, + @Cast("int64_t") long ldvr, + @Cast("cudaDataType") int computeType, + Pointer bufferOnDevice, + @Cast("size_t") long workspaceInBytesOnDevice, + Pointer bufferOnHost, + @Cast("size_t") long workspaceInBytesOnHost, + int[] info); + /* 64-bit API for GESVD */ public static native @Cast("cusolverStatus_t") int cusolverDnXgesvd_bufferSize( cusolverDnContext handle, @@ -14227,7 +14391,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { // #define CUSOLVERSP_H_ // #include "cusparse.h" -// #include "cublas_api.h" +// #include "cublas_v2.h" // #include "cusolver_common.h" // #if defined(__cplusplus) diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java index 616e2af6e5..48ab3b11f1 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusparse.java @@ -84,8 +84,8 @@ public class cusparse extends org.bytedeco.cuda.presets.cusparse { public static final int CUSPARSE_VER_MAJOR = 12; public static final int CUSPARSE_VER_MINOR = 5; -public static final int CUSPARSE_VER_PATCH = 2; -public static final int CUSPARSE_VER_BUILD = 23; +public static final int CUSPARSE_VER_PATCH = 4; +public static final int CUSPARSE_VER_BUILD = 2; public static final int CUSPARSE_VERSION = (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH); diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java index fa5d6f366d..734ac977e3 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nccl.java @@ -33,11 +33,11 @@ public class nccl extends org.bytedeco.cuda.presets.nccl { // #endif public static final int NCCL_MAJOR = 2; -public static final int NCCL_MINOR = 22; -public static final int NCCL_PATCH = 3; +public static final int NCCL_MINOR = 23; +public static final int NCCL_PATCH = 4; public static final String NCCL_SUFFIX = ""; -public static final int NCCL_VERSION_CODE = 22203; +public static final int NCCL_VERSION_CODE = 22304; // #define NCCL_VERSION(X,Y,Z) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z)) // #ifdef __cplusplus @@ -194,6 +194,15 @@ public class nccl extends org.bytedeco.cuda.presets.nccl { public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @ByPtrPtr ncclComm newcomm, ncclConfig_t config); public static native @Cast("ncclResult_t") int pncclCommSplit(ncclComm comm, int color, int key, @Cast("ncclComm**") PointerPointer newcomm, ncclConfig_t config); +/* Creates a new communicator (multi thread/process version), similar to ncclCommInitRankConfig. + * Allows to use more than one ncclUniqueId (up to one per rank), indicated by nId, to accelerate the init operation. + * The number of ncclUniqueIds and their order must be the same for every rank. + */ +public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config); +public static native @Cast("ncclResult_t") int ncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config); +public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@ByPtrPtr ncclComm newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config); +public static native @Cast("ncclResult_t") int pncclCommInitRankScalable(@Cast("ncclComm**") PointerPointer newcomm, int nranks, int myrank, int nId, ncclUniqueId commIds, ncclConfig_t config); + /* Returns a string for each error code. */ public static native @Cast("const char*") BytePointer ncclGetErrorString(@Cast("ncclResult_t") int result); public static native @Cast("const char*") BytePointer pncclGetErrorString(@Cast("ncclResult_t") int result); diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java index b69843630e..116d7f9aef 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nppc.java @@ -88,7 +88,7 @@ public class nppc extends org.bytedeco.cuda.presets.nppc { /** * Build version */ -public static final int NPP_VER_BUILD = 23; +public static final int NPP_VER_BUILD = 54; /** * Full version diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java index 0a17fa078e..5a3a89a2e6 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nvjpeg.java @@ -88,7 +88,7 @@ public class nvjpeg extends org.bytedeco.cuda.presets.nvjpeg { public static final int NVJPEG_VER_MAJOR = 12; public static final int NVJPEG_VER_MINOR = 3; public static final int NVJPEG_VER_PATCH = 3; -public static final int NVJPEG_VER_BUILD = 23; +public static final int NVJPEG_VER_BUILD = 54; /* nvJPEG status enums, returned by nvJPEG API */ /** enum nvjpegStatus_t */ diff --git a/nvcodec/README.md b/nvcodec/README.md index fdf7a1ba99..f961919dfe 100644 --- a/nvcodec/README.md +++ b/nvcodec/README.md @@ -62,7 +62,7 @@ You can find more encoder and decoder samples in the [`samples`](samples) subdir org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/nvcodec/platform/pom.xml b/nvcodec/platform/pom.xml index b3f7b54c5e..b3dccbc6be 100644 --- a/nvcodec/platform/pom.xml +++ b/nvcodec/platform/pom.xml @@ -23,7 +23,7 @@ org.bytedeco cuda-platform - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} ${project.groupId} diff --git a/nvcodec/pom.xml b/nvcodec/pom.xml index 78f8677ad0..c0869ff538 100644 --- a/nvcodec/pom.xml +++ b/nvcodec/pom.xml @@ -18,7 +18,7 @@ org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} org.bytedeco @@ -44,7 +44,7 @@ org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} diff --git a/nvcodec/samples/pom.xml b/nvcodec/samples/pom.xml index 8766cdee1b..e8687e9d59 100644 --- a/nvcodec/samples/pom.xml +++ b/nvcodec/samples/pom.xml @@ -23,7 +23,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/nvcodec/samples/simple/pom.xml b/nvcodec/samples/simple/pom.xml index 37581e84db..b3c98a37f9 100644 --- a/nvcodec/samples/simple/pom.xml +++ b/nvcodec/samples/simple/pom.xml @@ -19,7 +19,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/onnxruntime/README.md b/onnxruntime/README.md index 2167fdab1e..816a796f31 100644 --- a/onnxruntime/README.md +++ b/onnxruntime/README.md @@ -60,7 +60,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/onnxruntime/samples/pom.xml b/onnxruntime/samples/pom.xml index 77b521a4f6..06a60167c4 100644 --- a/onnxruntime/samples/pom.xml +++ b/onnxruntime/samples/pom.xml @@ -26,7 +26,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/opencv/README.md b/opencv/README.md index 72fcafdf05..5c17dba09e 100644 --- a/opencv/README.md +++ b/opencv/README.md @@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/opencv/samples/pom.xml b/opencv/samples/pom.xml index edfb74e1c5..587f109d2c 100644 --- a/opencv/samples/pom.xml +++ b/opencv/samples/pom.xml @@ -26,7 +26,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/platform/pom.xml b/platform/pom.xml index 02a54b050b..830ba19b12 100644 --- a/platform/pom.xml +++ b/platform/pom.xml @@ -272,7 +272,7 @@ org.bytedeco cuda-platform - 12.6-9.3-${project.version} + 12.6-9.5-${project.version} org.bytedeco @@ -312,12 +312,12 @@ org.bytedeco tensorrt-platform - 10.3-${project.version} + 10.5-${project.version} org.bytedeco tritonserver-platform - 2.48.0-${project.version} + 2.50.0-${project.version} diff --git a/pytorch/README.md b/pytorch/README.md index 220d9da081..5ecaba34d7 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -62,7 +62,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/pytorch/pom.xml b/pytorch/pom.xml index 5f697e12b5..575b759fa7 100644 --- a/pytorch/pom.xml +++ b/pytorch/pom.xml @@ -27,7 +27,7 @@ org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} true @@ -52,7 +52,7 @@ org.bytedeco cuda-platform - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} org.bytedeco diff --git a/pytorch/samples/pom.xml b/pytorch/samples/pom.xml index 1ee5668173..97d4fb103a 100644 --- a/pytorch/samples/pom.xml +++ b/pytorch/samples/pom.xml @@ -26,7 +26,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/tensorrt/README.md b/tensorrt/README.md index 99eb8cbe7c..f255089e23 100644 --- a/tensorrt/README.md +++ b/tensorrt/README.md @@ -17,7 +17,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * TensorRT 10.3.0.26 https://developer.nvidia.com/tensorrt + * TensorRT 10.5.0.18 https://developer.nvidia.com/tensorrt Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -54,19 +54,19 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco tensorrt-platform - 10.3-1.5.11-SNAPSHOT + 10.5-1.5.11-SNAPSHOT org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT org.bytedeco tensorrt-platform-redist - 10.3-1.5.11-SNAPSHOT + 10.5-1.5.11-SNAPSHOT diff --git a/tensorrt/platform/pom.xml b/tensorrt/platform/pom.xml index 8b458847cd..afc6e0dc55 100644 --- a/tensorrt/platform/pom.xml +++ b/tensorrt/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tensorrt-platform - 10.3-${project.parent.version} + 10.5-${project.parent.version} JavaCPP Presets Platform for TensorRT @@ -23,7 +23,7 @@ org.bytedeco cuda-platform - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} ${project.groupId} diff --git a/tensorrt/platform/redist/pom.xml b/tensorrt/platform/redist/pom.xml index e0cfebbc47..cabae40357 100644 --- a/tensorrt/platform/redist/pom.xml +++ b/tensorrt/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tensorrt-platform-redist - 10.3-${project.parent.version} + 10.5-${project.parent.version} JavaCPP Presets Platform Redist for TensorRT diff --git a/tensorrt/pom.xml b/tensorrt/pom.xml index 7a827cd556..da69643f07 100644 --- a/tensorrt/pom.xml +++ b/tensorrt/pom.xml @@ -11,14 +11,14 @@ org.bytedeco tensorrt - 10.3-${project.parent.version} + 10.5-${project.parent.version} JavaCPP Presets for TensorRT org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} org.bytedeco @@ -44,7 +44,7 @@ org.bytedeco cuda - 12.6-9.3-${project.parent.version} + 12.6-9.5-${project.parent.version} diff --git a/tensorrt/samples/pom.xml b/tensorrt/samples/pom.xml index 1e2d76f0a7..f7094fb21a 100644 --- a/tensorrt/samples/pom.xml +++ b/tensorrt/samples/pom.xml @@ -12,19 +12,19 @@ org.bytedeco tensorrt-platform - 10.3-1.5.11-SNAPSHOT + 10.5-1.5.11-SNAPSHOT org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT org.bytedeco tensorrt-platform-redist - 10.3-1.5.11-SNAPSHOT + 10.5-1.5.11-SNAPSHOT diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java index 86b1490234..874a32baa8 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java @@ -51,11 +51,11 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { /** TensorRT major version. */ public static final int NV_TENSORRT_MAJOR = 10; /** TensorRT minor version. */ -public static final int NV_TENSORRT_MINOR = 3; +public static final int NV_TENSORRT_MINOR = 5; /** TensorRT patch version. */ public static final int NV_TENSORRT_PATCH = 0; /** TensorRT build number. */ -public static final int NV_TENSORRT_BUILD = 26; +public static final int NV_TENSORRT_BUILD = 18; /** TensorRT LWS major version. */ public static final int NV_TENSORRT_LWS_MAJOR = 0; diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java index 21d802657a..f27e1a3125 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java @@ -47,22 +47,27 @@ public class IBuilder extends INoCopy { /** * \brief Determine whether the platform has fast native fp16. + * + * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly. * */ //! //! - public native @Cast("bool") @NoException(true) boolean platformHasFastFp16(); + //! + public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastFp16(); /** * \brief Determine whether the platform has fast native int8. + * + * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly. * */ //! //! //! - public native @Cast("bool") @NoException(true) boolean platformHasFastInt8(); + public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasFastInt8(); /** * \brief Get the maximum batch size DLA can support. @@ -214,12 +219,15 @@ public class IBuilder extends INoCopy { * */ + //! //! //! public native @NoException(true) void reset(); /** * \brief Determine whether the platform has TF32 support. + * + * @deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly. * */ @@ -230,7 +238,7 @@ public class IBuilder extends INoCopy { //! //! //! - public native @Cast("bool") @NoException(true) boolean platformHasTf32(); + public native @Cast("bool") @Deprecated @NoException(true) boolean platformHasTf32(); /** * \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java index ff6df4d439..6a0da55a28 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java @@ -1076,5 +1076,35 @@ public class IBuilderConfig extends INoCopy { * * @see IBuilderConfig::setRuntimePlatform() * */ + + + //! + //! + //! + //! public native @NoException(true) RuntimePlatform getRuntimePlatform(); + + /** + * \brief Set the maximum number of tactics to time when there is a choice of tactics. + * + * This function controls the number of tactics timed when there are multiple tactics to choose from. + * + * @see getMaxNbTactics() + * */ + + + //! + //! + //! + //! + public native @NoException(true) void setMaxNbTactics(int maxNbTactics); + + /** + * \brief Query the maximum number of tactics timed when there is a choice. + * + * By default the value is -1, indicating TensorRT can determine the number of tactics based on its own heuristic. + * + * @see setMaxNbTactics() + * */ + public native @NoException(true) int getMaxNbTactics(); } diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java index c13fb3e733..cf28b124bc 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java @@ -24,8 +24,8 @@ * * \brief A convolution layer in a network definition. * - * This layer performs a correlation operation between 3-dimensional filter with a 4-dimensional tensor to produce - * another 4-dimensional tensor. + * This layer performs a correlation operation between 3 or 4 dimensional filter with a 4 or 5 dimensional tensor to + * produce another 4 or 5 dimensional tensor. * * An optional bias argument is supported, which adds a per-channel constant to each value in the output. * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java index 7f67dfa1f9..5e010fb8b4 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java @@ -37,7 +37,6 @@ * * GatherMode::kDEFAULT: s = q + r - 1 - nbElementwiseDims * * GatherMode::kND: s = q + r - indices.d[q-1] - 1 - nbElementwiseDims * * GatherMode::kELEMENT: s = q = r. - * The output can be a shape tensor only if the mode is GatherMode::kDEFAULT. * * The dimensions of the output likewise depends on the mode: * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java index b45379209e..496b7a76da 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditional.java @@ -64,6 +64,7 @@ public class IIfConditional extends INoCopy { //! //! //! + //! public native @NoException(true) IConditionLayer setCondition(@ByRef ITensor condition); /** @@ -75,6 +76,8 @@ public class IIfConditional extends INoCopy { * Each output layer of an IIfConditional represents a single output of either the true-subgraph or the * false-subgraph of an IIfConditional, depending on which subgraph was executed. * + * The shapes of the two tensors must be equal unless the condition is a build-time constant. + * * @see IIfConditionalOutputLayer * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java index a3f4925524..4de57a9ba9 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IIfConditionalOutputLayer.java @@ -24,7 +24,9 @@ * * \brief This layer represents an output of an IIfConditional. * - * An IIfConditionalOutputLayer has exactly one output. + * An IIfConditionalOutputLayer has two inputs and one output. + * + * @see IIfConditional::addOutput * */ @Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IIfConditionalOutputLayer extends IIfConditionalBoundaryLayer { diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java index be4006dce2..d082d0d05c 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java @@ -100,7 +100,7 @@ public class ILoopOutputLayer extends ILoopBoundaryLayer { /** The indices in the kCONCATENATE or kREVERSE cases are as follows: /** /** - 0: Contribution to the output tensor. The contribution must come from inside the loop. - /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D Int32 or Int64 shape tensor. + /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D shape tensor of type Int32 or Int64. /** /** If this function is called with the value 1, then the function getNbInputs() changes /** from returning 1 to 2. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java index 02cc0f8a0c..e6e24b0ec8 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java @@ -1037,7 +1037,7 @@ public class INetworkDefinition extends INoCopy { * * @see IParametricReLULayer * - * \warning Int32 tensors are not valid input tensors. + * \warning Tensors of type Int32, Int64, Bool, or UInt8 are not allowed as inputs. * * @return The new parametric ReLU layer, or nullptr if it could not be created. * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java index f337837446..5d8fd66a95 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IOneHotLayer.java @@ -28,8 +28,8 @@ * Output, and an axis attribute. * * Indices is an Int32 tensor that determines which locations in Output to set as on_value. * * Values is a two-element (rank=1) tensor that consists of [off_value, on_value] - * * Depth is an Int32 shape tensor of rank 0, which contains the depth (number of classes) of the one-hot encoding. - * The depth tensor must be a build-time constant, and its value should be positive. + * * Depth is an 0D tensor of type Int32 or Int64, which contains the depth (number of classes) of the one-hot encoding. + * The depth tensor must be a positive build-time constant. * * Output is a tensor with rank = rank(indices)+1, where the added dimension contains the one-hot encoding. * The data types of Output is equal to the Values data type. * * Axis is a scalar specifying to which dimension of the output one-hot encoding is added. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java index 4afc4105e5..a29c09124f 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java @@ -209,7 +209,7 @@ public class IResizeLayer extends ILayer { * The indices in the dynamic case are as follows: * * - 0: Execution tensor to be resized. - * - 1: The output dimensions, as a 1D Int32 shape tensor. + * - 1: The output dimensions, as a 1D tensor of type Int32 or Int64. * * If this function is called with the value 1, then the function getNbInputs() changes * from returning 1 to 2. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java index 53f4045e1a..53422e0d04 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java @@ -26,7 +26,7 @@ * This layer shuffles data by applying in sequence: a transpose operation, a reshape operation * and a second transpose operation. The dimension types of the output are those of the reshape dimension. * - * The layer has an optional second input. If present, it must be a 1D Int32 shape tensor, + * The layer has an optional second input. If present, it must be a 1D tensor of type Int32 or Int64, * and the reshape dimensions are taken from it. * * \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. @@ -138,7 +138,7 @@ public class IShuffleLayer extends ILayer { /** The indices in the dynamic case are as follows: /** /** - 0: Data or Shape tensor to be shuffled. - /** - 1: The dimensions for the reshape operation, as a 1D Int32 shape tensor. + /** - 1: The dimensions for the reshape operation, as a 1D tensor of type Int32 or Int64. /** /** If this function is called with the value 1, then the function getNbInputs() changes /** from returning 1 to 2. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java index c270f9a7c9..e67903077e 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java @@ -34,7 +34,7 @@ * * The slice layer selects for each dimension a start location from within the input tensor, and * copies elements to the output tensor using the specified stride across the input tensor. - * Start, size, and stride tensors must be 1D Int32 shape tensors if not specified via Dims. + * Start, size, and stride tensors must be 1D tensors of type Int32 or Int64 if not specified via Dims. * * An example of using slice on a tensor: * input = {{0, 2, 4}, {1, 3, 5}} @@ -72,10 +72,12 @@ * The following constraints must be satisfied to execute this layer on DLA: * * start, size, and stride are build time constants, either as static Dims or as constant input tensors. * * axes, if provided, are build time constants, either as static Dims or as a constant input tensor. - * * sampleMode is kSTRICT_BOUNDS. + * * sampleMode is kDEFAULT, kWRAP, or kFILL. * * Strides are 1 for all dimensions. - * * Slicing is not performed on the first dimension - * * The input tensor has four dimensions + * * Slicing is not performed on the first dimension. + * * The input tensor has four dimensions. + * * For kFILL sliceMode, the fill value input is a scalar output of an IConstantLayer with value 0 that is not + * consumed by any other layer. * * \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. * */ @@ -233,15 +235,15 @@ public class ISliceLayer extends ILayer { * The indices are as follows: * * - 0: Tensor to be sliced. - * - 1: The start tensor to begin slicing, as a 1D Int32 shape tensor. - * - 2: The size tensor of the resulting slice, as a 1D Int32 shape tensor. - * - 3: The stride of the slicing operation, as a 1D Int32 shape tensor. + * - 1: The start tensor to begin slicing, as a 1D tensor of type Int32 or Int64. + * - 2: The size tensor of the resulting slice, as a 1D tensor of type Int32 or Int64. + * - 3: The stride of the slicing operation, as a 1D tensor of type Int32 or Int64. * - 4: Value for the kFILL slice mode. The fill value data type should either be the same * or be implicitly convertible to the input data type. * Implicit data type conversion is supported among kFLOAT, kHALF, kINT8, and kFP8 data types. * This input is disallowed for other modes. * - 5: The axes tensor indicating the corresponding axes that start, size, and stride - * should apply to, as a 1D Int32 shape tensor. Negative values for axes + * should apply to, as a 1D tensor or type Int32 or Int64. Negative values for axes * indicate indexing from the back of the input tensor. Values must be unique and be * within the interval of [-rank(input), rank(input)-1]. * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java index e3ae1a877f..b8aa86cdfe 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java @@ -102,4 +102,6 @@ public class VBuilderConfig extends VRoot { public native @NoException(true) void setRuntimePlatform(RuntimePlatform runtimePlatform); public native @NoException(true) void setRuntimePlatform(@Cast("nvinfer1::RuntimePlatform") int runtimePlatform); public native @NoException(true) RuntimePlatform getRuntimePlatform(); + public native @NoException(true) void setMaxNbTactics(int maxTactics); + public native @NoException(true) int getMaxNbTactics(); } diff --git a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java index 7d1fa17238..631ed304ab 100644 --- a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java +++ b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java @@ -51,7 +51,7 @@ "NvInferLegacyDims.h", "NvInferRuntime.h", "NvInfer.h", "NvInferImpl.h"/*, "NvUtils.h"*/}, exclude = "NvInferRuntimeBase.h", link = "nvinfer@.10", - preload = "nvinfer_builder_resource@.10.3.0" + preload = "nvinfer_builder_resource@.10.5.0" ), @Platform( value = "linux-arm64", diff --git a/tritonserver/README.md b/tritonserver/README.md index f3cc656be1..3c88de0308 100644 --- a/tritonserver/README.md +++ b/tritonserver/README.md @@ -23,7 +23,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * Triton Inference Server 2.48.0 https://github.com/triton-inference-server/server + * Triton Inference Server 2.50.0 https://github.com/triton-inference-server/server Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -51,9 +51,9 @@ This sample intends to show how to call the Java-mapped C API of Triton to execu 1. Get the source code of Triton Inference Server to prepare the model repository: ```bash - $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.48.0.tar.gz - $ tar zxvf v2.48.0.tar.gz - $ cd server-2.48.0/docs/examples/model_repository + $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.50.0.tar.gz + $ tar zxvf v2.50.0.tar.gz + $ cd server-2.50.0/docs/examples/model_repository $ mkdir models $ cd models; cp -a ../simple . ``` @@ -61,7 +61,7 @@ Now, this `models` directory will be our model repository. 2. Start the Docker container to run the sample (assuming we are under the `models` directory created above): ```bash - $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.07-py3 bash + $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.09-py3 bash $ apt update $ apt install -y openjdk-11-jdk $ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh index f438e39cad..611be9089e 100755 --- a/tritonserver/cppbuild.sh +++ b/tritonserver/cppbuild.sh @@ -11,9 +11,9 @@ INCLUDE_DEVELOPER_TOOLS_SERVER=${INCLUDE_DEVELOPER_TOOLS_SERVER:=1} if [[ ! -f "/opt/tritonserver/include/triton/developer_tools/generic_server_wrapper.h" ]] && [[ ! -f "/opt/tritonserver/lib/libtritondevelopertoolsserver.so" ]] && [[ ${INCLUDE_DEVELOPER_TOOLS_SERVER} -ne 0 ]]; then TOOLS_BRANCH=${TOOLS_BRANCH:="https://github.com/triton-inference-server/developer_tools.git"} - TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.07"} + TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.09"} TRITON_CORE_REPO=${TRITON_CORE_REPO:="https://github.com/triton-inference-server/core.git"} - TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.07"} + TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.09"} TRITON_HOME="/opt/tritonserver" BUILD_HOME="$PWD"/tritonbuild mkdir -p ${BUILD_HOME} && cd ${BUILD_HOME} diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml index d83b464263..82851eac3d 100644 --- a/tritonserver/platform/pom.xml +++ b/tritonserver/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.48.0-${project.parent.version} + 2.50.0-${project.parent.version} JavaCPP Presets Platform for Triton Inference Server diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml index 8e138ae38b..638848272a 100644 --- a/tritonserver/platform/redist/pom.xml +++ b/tritonserver/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform-redist - 2.48.0-${project.parent.version} + 2.50.0-${project.parent.version} JavaCPP Presets Platform Redist for Triton Inference Server diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml index 95a6320848..5c6b326cf9 100644 --- a/tritonserver/pom.xml +++ b/tritonserver/pom.xml @@ -11,7 +11,7 @@ org.bytedeco tritonserver - 2.48.0-${project.parent.version} + 2.50.0-${project.parent.version} JavaCPP Presets for Triton Inference Server diff --git a/tritonserver/samples/simple/pom.xml b/tritonserver/samples/simple/pom.xml index 13e9fa7fc4..817cab07f5 100644 --- a/tritonserver/samples/simple/pom.xml +++ b/tritonserver/samples/simple/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.48.0-1.5.11-SNAPSHOT + 2.50.0-1.5.11-SNAPSHOT shaded diff --git a/tritonserver/samples/simplecpp/pom.xml b/tritonserver/samples/simplecpp/pom.xml index 5a5ebb7d4e..fa631c00b0 100644 --- a/tritonserver/samples/simplecpp/pom.xml +++ b/tritonserver/samples/simplecpp/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.48.0-1.5.11-SNAPSHOT + 2.50.0-1.5.11-SNAPSHOT shaded diff --git a/tritonserver/samples/unsupported/pom.xml b/tritonserver/samples/unsupported/pom.xml index 7145be0246..b3a891ddcd 100644 --- a/tritonserver/samples/unsupported/pom.xml +++ b/tritonserver/samples/unsupported/pom.xml @@ -13,17 +13,17 @@ org.bytedeco cuda-platform - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT org.bytedeco tensorrt-platform - 10.3-1.5.11-SNAPSHOT + 10.5-1.5.11-SNAPSHOT org.bytedeco tritonserver-platform - 2.48.0-1.5.11-SNAPSHOT + 2.50.0-1.5.11-SNAPSHOT shaded diff --git a/tvm/README.md b/tvm/README.md index d40568e7c1..2ae70e5630 100644 --- a/tvm/README.md +++ b/tvm/README.md @@ -63,7 +63,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT diff --git a/tvm/samples/pom.xml b/tvm/samples/pom.xml index 727d14f5a3..1299336d26 100644 --- a/tvm/samples/pom.xml +++ b/tvm/samples/pom.xml @@ -26,7 +26,7 @@ org.bytedeco cuda-platform-redist - 12.6-9.3-1.5.11-SNAPSHOT + 12.6-9.5-1.5.11-SNAPSHOT