* Upgrade presets for MKL 2025.0, LLVM 19.1.3, nvCOMP 4.1.0.6, PyTor…

…ch 2.5.1, Triton Inference Server 2.51.0
bytedeco · Oct 31, 2024 · 52a829d · 52a829d
1 parent ba960fc
commit 52a829d
Show file tree

Hide file tree

Showing 64 changed files with 992 additions and 414 deletions.
diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml
@@ -44,7 +44,7 @@ runs:
           export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.aarch64.rpm
           export CUDNN=cuda-12-9.5.1.17-1.aarch64
           export NCCL=2.23.4-1+cuda12.6.aarch64
-          export NVCOMP=nvcomp-linux-sbsa-4.0.1-cuda12.x
+          export NVCOMP=nvcomp-linux-sbsa-4.1.0.6_cuda12-archive
           export USERLAND_BUILDME="buildme --aarch64"
         elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
           export ARCH=ppc64el
@@ -66,7 +66,7 @@ runs:
           export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.x86_64.rpm
           export CUDNN=cuda-12-9.5.1.17-1.x86_64
           export NCCL=2.23.4-1+cuda12.6.x86_64
-          export NVCOMP=nvcomp-linux-x86_64-4.0.1-cuda12.x
+          export NVCOMP=nvcomp-linux-x86_64-4.1.0.6_cuda12-archive
         fi
         echo "ARCH=$ARCH" >> $GITHUB_ENV
         echo "PREFIX=$PREFIX" >> $GITHUB_ENV
@@ -183,9 +183,9 @@ runs:
           for f in /usr/local/cuda/lib64/libcudnn*so.9.*; do $SUDO ln -sf $f ${f:0:${#f}-4}; $SUDO ln -sf $f ${f:0:${#f}-6}; done
 
           if [[ -n ${NVCOMP:-} ]]; then
-            curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/$NVCOMP.tar.gz
-            $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=1 lib/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=2 nvcomp/lib/
-            $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=1 include/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=2 nvcomp/include/
+            curl -LO https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/linux-$ARCH_CUDA/$NVCOMP.tar.xz
+            $SUDO tar -xvf $NVCOMP.tar.xz -C /usr/local/cuda/lib64/ --strip-components=2 */lib/
+            $SUDO tar -xvf $NVCOMP.tar.xz -C /usr/local/cuda/include/ --strip-components=2 */include/
             rm -f $NVCOMP.tar.gz
           fi
 
@@ -228,8 +228,8 @@ runs:
 
         if [[ "$CI_DEPLOY_MODULE" == "mkl" ]]; then
           echo Installing MKL
-          curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/89a381f6-f85d-4dda-ae62-30d51470f53c/l_onemkl_p_2024.2.2.17_offline.sh
-          $SUDO bash l_onemkl_p_2024.2.2.17_offline.sh -s -a -s --eula accept
+          curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh
+          $SUDO bash intel-onemkl-2025.0.0.940_offline.sh -s -a -s --eula accept
           export MAVEN_OPTIONS="-Djavacpp.platform.compiler=clang++"
         fi
 

diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml
@@ -102,7 +102,7 @@ runs:
           curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.94_windows.exe
           curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
           curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
-          curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
+          curl -LO https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/windows-x86_64/nvcomp-windows-x86_64-4.1.0.6_cuda12-archive.zip
           cuda_11.8.0_522.06_windows.exe -s
           bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
           bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
@@ -111,17 +111,15 @@ runs:
           bash -c "ls 'C:/Program Files/NVIDIA Corporation/NvToolsExt'"
           unzip cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip
           unzip zlib123dllx64.zip
-          unzip nvcomp-windows-x86_64-4.0.1-cuda12.x.zip
+          unzip nvcomp-windows-x86_64-4.1.0.6_cuda12-archive.zip
           move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
           move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
           move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
           move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
-          move nvcomp\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
-          move nvcomp\include\device "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
-          move nvcomp\include\native "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
-          move nvcomp\include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
-          move nvcomp\lib\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
-          move nvcomp\lib\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
+          move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
+          move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include"
+          move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\bin\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin"
+          move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\bin\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64"
 
           rem echo Applying hotfix to Visual Studio 2019 for CUDA
           rem curl -LO https://raw.githubusercontent.com/microsoft/STL/main/stl/inc/cmath
@@ -151,8 +149,8 @@ runs:
 
         if "%CI_DEPLOY_MODULE%"=="mkl" (
           echo Installing MKL
-          curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9fe96489-78fe-4fea-8cc2-2ddf7de0246a/w_onemkl_p_2024.2.2.16_offline.exe
-          w_onemkl_p_2024.2.2.16_offline.exe -s -a -s --eula accept
+          curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/e0a45889-f395-47d6-811d-0f3d8caae4a0/intel-onemkl-2025.0.0.929_offline.exe
+          intel-onemkl-2025.0.0.929_offline.exe -s -a -s --eula accept
         )
 
         if "%CI_DEPLOY_PLATFORM%"=="windows-x86" if "%CI_DEPLOY_MODULE%"=="flycapture" (

diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml
@@ -19,6 +19,6 @@ env:
 jobs:
   linux-x86_64:
     runs-on: ubuntu-20.04
-    container: nvcr.io/nvidia/tritonserver:24.09-tf2-python-py3
+    container: nvcr.io/nvidia/tritonserver:24.10-tf2-python-py3
     steps:
       - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,7 +9,7 @@
  * Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481))
  * Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472))
  * Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475))
- * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.0.1, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.50.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
+ * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
 
 ### January 29, 2024 version 1.5.10
  * Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463))

diff --git a/README.md b/README.md
@@ -196,7 +196,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * HDF5 1.14.x  https://www.hdfgroup.org/downloads/
  * Hyperscan 5.4.x  https://github.com/intel/hyperscan
  * LZ4 1.9.x  https://github.com/lz4/lz4
- * MKL 2024.x  https://software.intel.com/mkl
+ * MKL 2025.x  https://software.intel.com/mkl
  * MKL-DNN 0.21.x  https://github.com/oneapi-src/oneDNN
  * DNNL 3.6.x  https://github.com/oneapi-src/oneDNN
  * OpenBLAS 0.3.28  http://www.openblas.net/
@@ -219,7 +219,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * CUDA 12.6.x  https://developer.nvidia.com/cuda-downloads
    * cuDNN 9.5.x  https://developer.nvidia.com/cudnn
    * NCCL 2.23.x  https://developer.nvidia.com/nccl
-   * nvCOMP 4.0.x https://developer.nvidia.com/nvcomp
+   * nvCOMP 4.1.x https://developer.nvidia.com/nvcomp
  * NVIDIA Video Codec SDK 12.2.x  https://developer.nvidia.com/nvidia-video-codec-sdk
  * OpenCL 3.0.x  https://github.com/KhronosGroup/OpenCL-ICD-Loader
  * MXNet 1.9.x  https://github.com/apache/incubator-mxnet
@@ -228,7 +228,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.18.x  https://github.com/tensorflow/tensorflow
  * TensorRT 10.5.x  https://developer.nvidia.com/tensorrt
- * Triton Inference Server 2.50.x  https://developer.nvidia.com/nvidia-triton-inference-server
+ * Triton Inference Server 2.51.x  https://developer.nvidia.com/nvidia-triton-inference-server
  * The Arcade Learning Environment 0.8.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * DepthAI 2.24.x  https://github.com/luxonis/depthai-core
  * ONNX 1.17.x  https://github.com/onnx/onnx

diff --git a/cuda/README.md b/cuda/README.md
@@ -28,7 +28,7 @@ This directory contains the JavaCPP Presets module for:
  * CUDA 12.6.2  https://developer.nvidia.com/cuda-zone
  * cuDNN 9.5.1  https://developer.nvidia.com/cudnn
  * NCCL 2.23.4  https://developer.nvidia.com/nccl
- * nvCOMP 4.0.1  https://developer.nvidia.com/nvcomp
+ * nvCOMP 4.1.0.6  https://developer.nvidia.com/nvcomp
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 

diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
@@ -15038,7 +15038,7 @@ public static native void cublasZtrmm(@Cast("char") byte side,
 // #include "driver_types.h"
 // #include "cuComplex.h" /* import complex data type */
 
-// #include "cublas_v2.h"
+// #include "cublas_api.h"
 
 // #if defined(__cplusplus)
 // Targeting ../cublas/cublasXtContext.java

diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java
@@ -370,7 +370,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
 //   #include <stdio.h>
 
 //   #include "cuComplex.h" /* import complex data type */
-//   #include "cublas_v2.h"
+//   #include "cublas_api.h"
 //   #include "cusolver_common.h"
 
   /*******************************************************************************/
@@ -14391,7 +14391,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver {
 //   #define CUSOLVERSP_H_
 
 //   #include "cusparse.h"
-//   #include "cublas_v2.h"
+//   #include "cublas_api.h"
 //   #include "cusolver_common.h"
 
 //   #if defined(__cplusplus)