From 52a829dad24f5e7431c2245c2fa0daa73d774904 Mon Sep 17 00:00:00 2001 From: Samuel Audet Date: Thu, 31 Oct 2024 16:58:36 +0900 Subject: [PATCH] * Upgrade presets for MKL 2025.0, LLVM 19.1.3, nvCOMP 4.1.0.6, PyTorch 2.5.1, Triton Inference Server 2.51.0 --- .github/actions/deploy-ubuntu/action.yml | 14 +- .github/actions/deploy-windows/action.yml | 18 +- .github/workflows/tritonserver.yml | 2 +- CHANGELOG.md | 2 +- README.md | 6 +- cuda/README.md | 2 +- .../java/org/bytedeco/cuda/global/cublas.java | 2 +- .../org/bytedeco/cuda/global/cusolver.java | 4 +- .../java/org/bytedeco/cuda/global/nvcomp.java | 755 ++++++++++++++---- .../bytedeco/cuda/nvcomp/BitcompManager.java | 16 +- .../cuda/nvcomp/DeflateFormatSpecHeader.java | 1 + .../nvcomp/nvcompAlignmentRequirements_t.java | 45 ++ ...s.java => nvcompBatchedBitcompOpts_t.java} | 24 +- .../nvcomp/nvcompBatchedCascadedOpts_t.java | 3 +- .../nvcomp/nvcompBatchedDeflateOpts_t.java | 1 + .../cuda/nvcomp/nvcompBatchedGzipOpts_t.java | 38 + .../cuda/nvcomp/nvcompBatchedLZ4Opts_t.java | 5 + .../cuda/nvcomp/nvcompCascadedFormatOpts.java | 52 -- .../cuda/nvcomp/nvcompLZ4FormatOpts.java | 49 -- .../org/bytedeco/ffmpeg/global/postproc.java | 2 +- gsl/README.md | 2 +- gsl/samples/pom.xml | 2 +- leptonica/cppbuild.sh | 2 +- llvm/README.md | 4 +- llvm/cppbuild.sh | 2 +- llvm/platform/pom.xml | 2 +- llvm/pom.xml | 3 +- llvm/samples/clang/pom.xml | 2 +- llvm/samples/llvm/pom.xml | 2 +- llvm/samples/polly/pom.xml | 6 +- .../llvm/LLVM/LLVMOpaqueDbgRecord.java | 23 - mkl/README.md | 6 +- mkl/platform/pom.xml | 2 +- mkl/platform/redist/pom.xml | 2 +- mkl/pom.xml | 2 +- mkl/samples/pom.xml | 4 +- .../java/org/bytedeco/mkl/global/mkl_rt.java | 217 ++++- numpy/README.md | 2 +- numpy/samples/pom.xml | 2 +- opencv/README.md | 2 +- opencv/samples/pom.xml | 2 +- platform/pom.xml | 8 +- pytorch/README.md | 8 +- pytorch/cppbuild.sh | 2 +- pytorch/platform/gpu/pom.xml | 2 +- pytorch/platform/pom.xml | 2 +- pytorch/pom.xml | 2 +- pytorch/samples/pom.xml | 6 +- scipy/README.md | 2 +- scipy/samples/pom.xml | 2 +- tritonserver/README.md | 10 +- tritonserver/cppbuild.sh | 4 +- tritonserver/platform/pom.xml | 2 +- tritonserver/platform/redist/pom.xml | 2 +- tritonserver/pom.xml | 2 +- tritonserver/samples/simple/pom.xml | 2 +- tritonserver/samples/simplecpp/pom.xml | 2 +- tritonserver/samples/unsupported/pom.xml | 2 +- tvm/README.md | 2 +- tvm/cppbuild.sh | 2 +- tvm/platform/gpu/pom.xml | 2 +- tvm/platform/pom.xml | 2 +- tvm/pom.xml | 4 +- tvm/samples/pom.xml | 2 +- 64 files changed, 992 insertions(+), 414 deletions(-) create mode 100644 cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompAlignmentRequirements_t.java rename cuda/src/gen/java/org/bytedeco/cuda/nvcomp/{nvcompBatchedBitcompFormatOpts.java => nvcompBatchedBitcompOpts_t.java} (64%) create mode 100644 cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedGzipOpts_t.java delete mode 100644 cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompCascadedFormatOpts.java delete mode 100644 cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompLZ4FormatOpts.java delete mode 100644 llvm/src/gen/java/org/bytedeco/llvm/LLVM/LLVMOpaqueDbgRecord.java diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml index 14b944e8f23..58334141cb3 100644 --- a/.github/actions/deploy-ubuntu/action.yml +++ b/.github/actions/deploy-ubuntu/action.yml @@ -44,7 +44,7 @@ runs: export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.aarch64.rpm export CUDNN=cuda-12-9.5.1.17-1.aarch64 export NCCL=2.23.4-1+cuda12.6.aarch64 - export NVCOMP=nvcomp-linux-sbsa-4.0.1-cuda12.x + export NVCOMP=nvcomp-linux-sbsa-4.1.0.6_cuda12-archive export USERLAND_BUILDME="buildme --aarch64" elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then export ARCH=ppc64el @@ -66,7 +66,7 @@ runs: export CUDA=cuda-repo-rhel8-12-6-local-12.6.2_560.35.03-1.x86_64.rpm export CUDNN=cuda-12-9.5.1.17-1.x86_64 export NCCL=2.23.4-1+cuda12.6.x86_64 - export NVCOMP=nvcomp-linux-x86_64-4.0.1-cuda12.x + export NVCOMP=nvcomp-linux-x86_64-4.1.0.6_cuda12-archive fi echo "ARCH=$ARCH" >> $GITHUB_ENV echo "PREFIX=$PREFIX" >> $GITHUB_ENV @@ -183,9 +183,9 @@ runs: for f in /usr/local/cuda/lib64/libcudnn*so.9.*; do $SUDO ln -sf $f ${f:0:${#f}-4}; $SUDO ln -sf $f ${f:0:${#f}-6}; done if [[ -n ${NVCOMP:-} ]]; then - curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/$NVCOMP.tar.gz - $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=1 lib/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/lib64/ --strip-components=2 nvcomp/lib/ - $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=1 include/ || $SUDO tar -xvf $NVCOMP.tar.gz -C /usr/local/cuda/include/ --strip-components=2 nvcomp/include/ + curl -LO https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/linux-$ARCH_CUDA/$NVCOMP.tar.xz + $SUDO tar -xvf $NVCOMP.tar.xz -C /usr/local/cuda/lib64/ --strip-components=2 */lib/ + $SUDO tar -xvf $NVCOMP.tar.xz -C /usr/local/cuda/include/ --strip-components=2 */include/ rm -f $NVCOMP.tar.gz fi @@ -228,8 +228,8 @@ runs: if [[ "$CI_DEPLOY_MODULE" == "mkl" ]]; then echo Installing MKL - curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/89a381f6-f85d-4dda-ae62-30d51470f53c/l_onemkl_p_2024.2.2.17_offline.sh - $SUDO bash l_onemkl_p_2024.2.2.17_offline.sh -s -a -s --eula accept + curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh + $SUDO bash intel-onemkl-2025.0.0.940_offline.sh -s -a -s --eula accept export MAVEN_OPTIONS="-Djavacpp.platform.compiler=clang++" fi diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml index 1869894efa2..f795b0b0175 100644 --- a/.github/actions/deploy-windows/action.yml +++ b/.github/actions/deploy-windows/action.yml @@ -102,7 +102,7 @@ runs: curl -LO https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.94_windows.exe curl -LO https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip - curl -LO https://developer.download.nvidia.com/compute/nvcomp/4.0.1/local_installers/nvcomp-windows-x86_64-4.0.1-cuda12.x.zip + curl -LO https://developer.download.nvidia.com/compute/nvcomp/redist/nvcomp/windows-x86_64/nvcomp-windows-x86_64-4.1.0.6_cuda12-archive.zip cuda_11.8.0_522.06_windows.exe -s bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'" bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'" @@ -111,17 +111,15 @@ runs: bash -c "ls 'C:/Program Files/NVIDIA Corporation/NvToolsExt'" unzip cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip unzip zlib123dllx64.zip - unzip nvcomp-windows-x86_64-4.0.1-cuda12.x.zip + unzip nvcomp-windows-x86_64-4.1.0.6_cuda12-archive.zip move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" move cudnn-windows-x86_64-9.5.1.17_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" - move nvcomp\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" - move nvcomp\include\device "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" - move nvcomp\include\native "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" - move nvcomp\include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" - move nvcomp\lib\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" - move nvcomp\lib\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" + move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" + move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" + move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\bin\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin" + move nvcomp-windows-x86_64-4.1.0.6_cuda12-archive\bin\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" rem echo Applying hotfix to Visual Studio 2019 for CUDA rem curl -LO https://raw.githubusercontent.com/microsoft/STL/main/stl/inc/cmath @@ -151,8 +149,8 @@ runs: if "%CI_DEPLOY_MODULE%"=="mkl" ( echo Installing MKL - curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9fe96489-78fe-4fea-8cc2-2ddf7de0246a/w_onemkl_p_2024.2.2.16_offline.exe - w_onemkl_p_2024.2.2.16_offline.exe -s -a -s --eula accept + curl -LO https://registrationcenter-download.intel.com/akdlm/IRC_NAS/e0a45889-f395-47d6-811d-0f3d8caae4a0/intel-onemkl-2025.0.0.929_offline.exe + intel-onemkl-2025.0.0.929_offline.exe -s -a -s --eula accept ) if "%CI_DEPLOY_PLATFORM%"=="windows-x86" if "%CI_DEPLOY_MODULE%"=="flycapture" ( diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml index 9c1cfa0c286..6dbe2418a02 100644 --- a/.github/workflows/tritonserver.yml +++ b/.github/workflows/tritonserver.yml @@ -19,6 +19,6 @@ env: jobs: linux-x86_64: runs-on: ubuntu-20.04 - container: nvcr.io/nvidia/tritonserver:24.09-tf2-python-py3 + container: nvcr.io/nvidia/tritonserver:24.10-tf2-python-py3 steps: - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions diff --git a/CHANGELOG.md b/CHANGELOG.md index 78f7f723aa6..e70e3dc4004 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481)) * Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472)) * Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475)) - * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2024.2, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.2, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.0.1, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.0 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.50.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies + * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies ### January 29, 2024 version 1.5.10 * Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463)) diff --git a/README.md b/README.md index 8f477741dc8..f6cc649e5f9 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * HDF5 1.14.x https://www.hdfgroup.org/downloads/ * Hyperscan 5.4.x https://github.com/intel/hyperscan * LZ4 1.9.x https://github.com/lz4/lz4 - * MKL 2024.x https://software.intel.com/mkl + * MKL 2025.x https://software.intel.com/mkl * MKL-DNN 0.21.x https://github.com/oneapi-src/oneDNN * DNNL 3.6.x https://github.com/oneapi-src/oneDNN * OpenBLAS 0.3.28 http://www.openblas.net/ @@ -219,7 +219,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * CUDA 12.6.x https://developer.nvidia.com/cuda-downloads * cuDNN 9.5.x https://developer.nvidia.com/cudnn * NCCL 2.23.x https://developer.nvidia.com/nccl - * nvCOMP 4.0.x https://developer.nvidia.com/nvcomp + * nvCOMP 4.1.x https://developer.nvidia.com/nvcomp * NVIDIA Video Codec SDK 12.2.x https://developer.nvidia.com/nvidia-video-codec-sdk * OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader * MXNet 1.9.x https://github.com/apache/incubator-mxnet @@ -228,7 +228,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * TensorFlow 1.15.x https://github.com/tensorflow/tensorflow * TensorFlow Lite 2.18.x https://github.com/tensorflow/tensorflow * TensorRT 10.5.x https://developer.nvidia.com/tensorrt - * Triton Inference Server 2.50.x https://developer.nvidia.com/nvidia-triton-inference-server + * Triton Inference Server 2.51.x https://developer.nvidia.com/nvidia-triton-inference-server * The Arcade Learning Environment 0.8.x https://github.com/mgbellemare/Arcade-Learning-Environment * DepthAI 2.24.x https://github.com/luxonis/depthai-core * ONNX 1.17.x https://github.com/onnx/onnx diff --git a/cuda/README.md b/cuda/README.md index 598d815f7b7..cedef43f833 100644 --- a/cuda/README.md +++ b/cuda/README.md @@ -28,7 +28,7 @@ This directory contains the JavaCPP Presets module for: * CUDA 12.6.2 https://developer.nvidia.com/cuda-zone * cuDNN 9.5.1 https://developer.nvidia.com/cudnn * NCCL 2.23.4 https://developer.nvidia.com/nccl - * nvCOMP 4.0.1 https://developer.nvidia.com/nvcomp + * nvCOMP 4.1.0.6 https://developer.nvidia.com/nvcomp Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java index 8faa8693ef9..9210cc96b78 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java @@ -15038,7 +15038,7 @@ public static native void cublasZtrmm(@Cast("char") byte side, // #include "driver_types.h" // #include "cuComplex.h" /* import complex data type */ -// #include "cublas_v2.h" +// #include "cublas_api.h" // #if defined(__cplusplus) // Targeting ../cublas/cublasXtContext.java diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java index a6b20b65e17..d118d4f25dc 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/cusolver.java @@ -370,7 +370,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { // #include // #include "cuComplex.h" /* import complex data type */ -// #include "cublas_v2.h" +// #include "cublas_api.h" // #include "cusolver_common.h" /*******************************************************************************/ @@ -14391,7 +14391,7 @@ public class cusolver extends org.bytedeco.cuda.presets.cusolver { // #define CUSOLVERSP_H_ // #include "cusparse.h" -// #include "cublas_v2.h" +// #include "cublas_api.h" // #include "cusolver_common.h" // #if defined(__cplusplus) diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/nvcomp.java b/cuda/src/gen/java/org/bytedeco/cuda/global/nvcomp.java index 5a486259ac9..c59cbb5f301 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/global/nvcomp.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/global/nvcomp.java @@ -31,6 +31,8 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { // #pragma once +// #include + /** enum nvcompStatus_t */ public static final int nvcompSuccess = 0, @@ -45,6 +47,9 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { nvcompErrorChunkSizeTooLarge = 18, nvcompErrorCudaError = 1000, nvcompErrorInternal = 10000; +// Targeting ../nvcomp/nvcompAlignmentRequirements_t.java + + // Parsed from @@ -291,6 +296,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { // #include "cascaded.hpp" // #include "zstd.hpp" // #include "deflate.hpp" +// #include "gzip.hpp" // #include // #include @@ -371,7 +377,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** enum nvcompANSDataType_t */ public static final int uint8 = 0, - float16 = 1; // requires uncomp chunk size to be multiple of 2 + float16 = 1; // Targeting ../nvcomp/nvcompBatchedANSOpts_t.java @@ -381,12 +387,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompANSCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompANSRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedANSCompressGetRequiredAlignments( + @ByVal nvcompBatchedANSOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -446,13 +465,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The caller is responsible for passing device_compressed_chunk_bytes of size - * sufficient to hold compressed data + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. - * Each pointer must be aligned to an 8-byte boundary. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedANSCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. @@ -460,6 +482,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace, could be NULL in case * temporary memory is not needed. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedANSCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -467,7 +493,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedANSCompressGetMaxOutputChunkSize}. - * Each pointer must be aligned to an 8-byte boundary. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedANSCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -499,6 +528,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedANSOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedANSDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -535,15 +569,20 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \brief Asynchronously compute the number of bytes of uncompressed data for * each compressed chunk. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedANSDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. * If there is an error when retrieving the size of a chunk, the * uncompressed size of that chunk will be set to 0. This argument needs to - * be prealloated in device-accessible memory. + * be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -565,13 +604,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * NOTE: This function is used to decompress compressed buffers produced by + * This function is used to decompress compressed buffers produced by * {@code nvcompBatchedANSCompressAsync}. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory and start at a location with - * 8-byte alignment. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedANSDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -585,12 +627,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * This argument needs to be preallocated. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space, could be NULL in case temporary space is not needed. + * Must be aligned to the value in {@code nvcompBatchedANSDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes, - * and start at a location with 8-byte alignment. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedANSDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -704,21 +748,36 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { // #include // #ifdef __cplusplus -// Targeting ../nvcomp/nvcompBatchedBitcompFormatOpts.java +// Targeting ../nvcomp/nvcompBatchedBitcompOpts_t.java + +/** Legacy alias for \ref nvcompBatchedBitcompOpts_t. */ -@MemberGetter public static native @Const @ByRef nvcompBatchedBitcompFormatOpts nvcompBatchedBitcompDefaultOpts(); +@MemberGetter public static native @Const @ByRef nvcompBatchedBitcompOpts_t nvcompBatchedBitcompDefaultOpts(); @MemberGetter public static native @Cast("const size_t") long nvcompBitcompCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompBitcompRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedBitcompCompressGetRequiredAlignments( + @ByVal nvcompBatchedBitcompOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -726,7 +785,8 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * * @param num_chunks [in] The number of chunks of memory in the batch. * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * batch. This parameter is currently unused. Set it to either the actual value + * or zero. * @param format_opts [in] Compression options. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -736,7 +796,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { public static native @Cast("nvcompStatus_t") int nvcompBatchedBitcompCompressGetTempSize( @Cast("size_t") long num_chunks, @Cast("size_t") long max_uncompressed_chunk_bytes, - @ByVal nvcompBatchedBitcompFormatOpts format_opts, + @ByVal nvcompBatchedBitcompOpts_t format_opts, @Cast("size_t*") SizeTPointer temp_bytes); /** @@ -746,8 +806,9 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * NOTE: Bitcomp currently doesn't use any temp memory. * * @param num_chunks [in] The number of chunks of memory in the batch. - * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk + * in the batch. This parameter is currently unused. Set it to either + * the actual value or zero. * @param format_opts [in] Compression options. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -759,7 +820,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { public static native @Cast("nvcompStatus_t") int nvcompBatchedBitcompCompressGetTempSizeEx( @Cast("size_t") long num_chunks, @Cast("size_t") long max_uncompressed_chunk_bytes, - @ByVal nvcompBatchedBitcompFormatOpts format_opts, + @ByVal nvcompBatchedBitcompOpts_t format_opts, @Cast("size_t*") SizeTPointer temp_bytes, @Cast("const size_t") long max_total_uncompressed_bytes); @@ -776,24 +837,30 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { */ public static native @Cast("nvcompStatus_t") int nvcompBatchedBitcompCompressGetMaxOutputChunkSize( @Cast("size_t") long max_uncompressed_chunk_bytes, - @ByVal nvcompBatchedBitcompFormatOpts format_opts, + @ByVal nvcompBatchedBitcompOpts_t format_opts, @Cast("size_t*") SizeTPointer max_compressed_chunk_bytes); /** * \brief Perform batched asynchronous compression. * - * NOTE: The maximum number of chunks allowed is 2^31. + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data - * should reside in device-accessible memory. The uncompressed data must start - * at locations with alignments of the data type size. + * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedBitcompCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. - * Each chunk size MUST be a multiple of the size of the data type specified by - * format_opts.data_type, else this may crash or produce invalid output. - * @param max_uncompressed_chunk_bytes [in] This argument is not used. + * Each chunk size must be a multiple of the size of the data type specified by + * format_opts.data_type. + * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the + * batch. This parameter is currently unused. + * Set it to either the actual value or zero. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] This argument is not used. * @param temp_bytes [in] This argument is not used. @@ -802,7 +869,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedBitcompCompressGetMaxOutputChunkSize}. - * Each compressed buffer should start at a location with 8-byte alignment. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedBitcompCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -820,7 +890,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @Cast("size_t") long temp_bytes, @Cast("void*const*") PointerPointer device_compressed_chunk_ptrs, @Cast("size_t*") SizeTPointer device_compressed_chunk_bytes, - @ByVal nvcompBatchedBitcompFormatOpts format_opts, + @ByVal nvcompBatchedBitcompOpts_t format_opts, CUstream_st stream); public static native @Cast("nvcompStatus_t") int nvcompBatchedBitcompCompressAsync( @Cast("const void*const*") @ByPtrPtr Pointer device_uncompressed_chunk_ptrs, @@ -831,9 +901,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @Cast("size_t") long temp_bytes, @Cast("void*const*") @ByPtrPtr Pointer device_compressed_chunk_ptrs, @Cast("size_t*") SizeTPointer device_compressed_chunk_bytes, - @ByVal nvcompBatchedBitcompFormatOpts format_opts, + @ByVal nvcompBatchedBitcompOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedBitcompDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -871,14 +946,19 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \brief Asynchronously compute the number of bytes of uncompressed data for * each compressed chunk. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedBitcompDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] This argument is not used. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. * If there is an error when retrieving the size of a chunk, the * uncompressed size of that chunk will be set to 0. This argument needs to - * be prealloated in device-accessible memory. + * be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -900,19 +980,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * NOTE: This function is used to decompress compressed buffers produced by + * This function is used to decompress compressed buffers produced by * {@code nvcompBatchedBitcompCompressAsync}. It can also decompress buffers * compressed with the standalone Bitcomp library. * - * NOTE: The function is not completely asynchronous, as it needs to look + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * + * \note The function is not completely asynchronous, as it needs to look * at the compressed data in order to create the proper bitcomp handle. * The stream is synchronized, the data is examined, then the asynchronous * decompression is launched. + * + * \note An asynchronous, faster version of batched Bitcomp asynchrnous decompression + * is available, and can be launched via the HLIF manager. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory and start at a location with - * 8-byte alignment. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedBitcompDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] This argument is not used. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, * in bytes, of the output buffers to be filled with uncompressed data for each chunk. @@ -929,7 +1015,9 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedBitcompDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -1041,9 +1129,6 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { // #include // #ifdef __cplusplus -// Targeting ../nvcomp/nvcompCascadedFormatOpts.java - - // Targeting ../nvcomp/nvcompBatchedCascadedOpts_t.java @@ -1054,12 +1139,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompCascadedCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompCascadedRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedCascadedCompressGetRequiredAlignments( + @ByVal nvcompBatchedCascadedOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -1068,7 +1166,8 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * * @param num_chunks [in] The number of chunks of memory in the batch. * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * batch. This parameter is currently unused. Set it to either the actual value + * or zero. * @param format_opts [in] The Cascaded compression options and datatype to use. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -1090,7 +1189,8 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * * @param num_chunks [in] The number of chunks of memory in the batch. * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * batch. This parameter is currently unused. Set it to either the actual value + * or zero. * @param format_opts [in] The Cascaded compression options and datatype to use. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -1128,16 +1228,24 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \note The current implementation does not support uncompressed size larger * than 4,294,967,295 bytes (max uint32_t). * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data - * should reside in device-accessible memory. The uncompressed data must start - * at locations with alignments of the data type size. + * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedCascadedCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. - * Each chunk size MUST be a multiple of the size of the data type specified by + * Each chunk size must be a multiple of the size of the data type specified by * format_opts.type, else this may crash or produce invalid output. - * @param max_uncompressed_chunk_bytes [in] This argument is not used. + * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. + * This parameter is currently unused. Set it to either the actual value + * or zero. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] This argument is not used. * @param temp_bytes [in] This argument is not used. @@ -1145,9 +1253,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * to the output compressed buffers. Both the pointers and the compressed * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by - * {@code nvcompBatchedCascadedCompressGetMaxOutputChunkSize}. Each - * compressed buffer should start at a location with alignment of both 4B and - * the data type. + * {@code nvcompBatchedCascadedCompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedCascadedCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -1179,6 +1289,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedCascadedOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedCascadedDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -1215,15 +1330,20 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \brief Asynchronously compute the number of bytes of uncompressed data for * each compressed chunk. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedCascadedDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. * If there is an error when retrieving the size of a chunk, the * uncompressed size of that chunk will be set to 0. This argument needs to - * be prealloated in device-accessible memory. + * be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -1245,13 +1365,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * \note This function is used to decompress compressed buffers produced by + * This function is used to decompress compressed buffers produced by * {@code nvcompBatchedCascadedCompressAsync}. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory and start at a location with - * alignment of both 4B and the data type. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedCascadedDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -1269,8 +1392,9 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes, - * and start at a location with alignment of the data type. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedCascadedDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -1325,7 +1449,7 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @Cast("void*const*") @ByPtrPtr Pointer device_uncompressed_chunk_ptrs, @Cast("nvcompStatus_t*") int[] device_statuses, CUstream_st stream); - + // #ifdef __cplusplus // #endif @@ -1440,12 +1564,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompDeflateCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompDeflateRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedDeflateCompressGetRequiredAlignments( + @ByVal nvcompBatchedDeflateOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -1517,19 +1654,28 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The individual chunk size must not exceed - * 65536 bytes. For best performance, a chunk size of 65536 bytes is - * recommended. The output buffers must be 8-byte aligned. + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedDeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. + * Chunk sizes must not exceed 65536 bytes. For best performance, a chunk size + * of 65536 bytes is recommended. * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedDeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -1537,6 +1683,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedDeflateCompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedDeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -1568,6 +1718,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedDeflateOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedDeflateDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -1605,10 +1760,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * each compressed chunk. * * This is needed when we do not know the expected output size. - * NOTE: If the stream is corrupt, the sizes will be garbage. + * + * \note If the stream is corrupt, the calculated sizes will be invalid. + * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedDeflateDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks @@ -1634,13 +1795,17 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * In the case where a chunk of compressed data is not a valid Deflate + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * + * \note In the case where a chunk of compressed data is not a valid Deflate * stream, 0 will be written for the size of the invalid chunk and * nvcompStatusCannotDecompress will be flagged for that chunk. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedDeflateDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -1655,11 +1820,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * in which case the actual sizes are not reported. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space. + * Must be aligned to the value in {@code nvcompBatchedDeflateDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedDeflateDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -1781,12 +1949,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompGdeflateCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompGdeflateRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedGdeflateCompressGetRequiredAlignments( + @ByVal nvcompBatchedGdeflateOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -1858,19 +2039,28 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The individual chunk size must not exceed - * 65536 bytes. For best performance, a chunk size of 65536 bytes is - * recommended. The output buffers must be 8-byte aligned. + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedGdeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. + * Chunk sizes must not exceed 65536 bytes. For best performance, a chunk size + * of 65536 bytes is recommended. * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedGdeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -1878,6 +2068,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedGdeflateCompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedGdeflateCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -1909,6 +2103,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedGdeflateOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedGdeflateDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -1946,10 +2145,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * each compressed chunk. * * This is needed when we do not know the expected output size. - * NOTE: If the stream is corrupt, the sizes will be garbage. + * + * \note If the stream is corrupt, the calculated sizes will be invalid. + * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedGdeflateDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks @@ -1975,13 +2180,17 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * In the case where a chunk of compressed data is not a valid GDeflate + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * + * \note In the case where a chunk of compressed data is not a valid GDeflate * stream, 0 will be written for the size of the invalid chunk and * nvcompStatusCannotDecompress will be flagged for that chunk. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedGdeflateDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -1996,11 +2205,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * in which case the actual sizes are not reported. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space. + * Must be aligned to the value in {@code nvcompBatchedGdeflateDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedGdeflateDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -2122,6 +2334,138 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * Batched decompression interface for gzip *****************************************************************************/ +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedGzipDecompressRequiredAlignments(); +// Targeting ../nvcomp/nvcompBatchedGzipOpts_t.java + + + +@MemberGetter public static native @Const @ByRef nvcompBatchedGzipOpts_t nvcompBatchedGzipDefaultOpts(); + +/** + * \brief Get the amount of temporary memory required on the GPU for compression. + * + * Chunk size must not exceed + * 65536 bytes. For best performance, a chunk size of 65536 bytes is + * recommended. + * + * @param num_chunks [in] The number of chunks of memory in the batch. + * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the + * batch. + * @param format_opts [in] The Gzip compression options to use. + * @param temp_bytes [out] The amount of GPU memory that will be temporarily + * required during compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedGzipCompressGetTempSize( + @Cast("size_t") long num_chunks, + @Cast("size_t") long max_uncompressed_chunk_bytes, + @ByVal nvcompBatchedGzipOpts_t format_opts, + @Cast("size_t*") SizeTPointer temp_bytes); + +/** + * \brief Get the amount of temporary memory required on the GPU for compression + * with extra total bytes argument. + * + * Chunk size must not exceed + * 65536 bytes. For best performance, a chunk size of 65536 bytes is + * recommended. + * + * @param num_chunks [in] The number of chunks of memory in the batch. + * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the + * batch. + * @param format_opts [in] The Gzip compression options to use. + * @param temp_bytes [out] The amount of GPU memory that will be temporarily + * required during compression. + * @param max_total_uncompressed_bytes [in] Upper bound on the total uncompressed + * size of all chunks + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedGzipCompressGetTempSizeEx( + @Cast("size_t") long num_chunks, + @Cast("size_t") long max_uncompressed_chunk_bytes, + @ByVal nvcompBatchedGzipOpts_t format_opts, + @Cast("size_t*") SizeTPointer temp_bytes, + @Cast("const size_t") long max_total_uncompressed_bytes); + +/** + * \brief Get the maximum size that a chunk of size at most max_uncompressed_chunk_bytes + * could compress to. That is, the minimum amount of output memory required to be given + * nvcompBatchedGzipCompressAsync() for each chunk. + * + * Chunk size must not exceed + * 65536 bytes. For best performance, a chunk size of 65536 bytes is + * recommended. + * + * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk before compression. + * @param format_opts [in] The Gzip compression options to use. + * @param max_compressed_chunk_bytes [out] The maximum possible compressed size of the chunk. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedGzipCompressGetMaxOutputChunkSize( + @Cast("size_t") long max_uncompressed_chunk_bytes, + @ByVal nvcompBatchedGzipOpts_t format_opts, + @Cast("size_t*") SizeTPointer max_compressed_chunk_bytes); + +/** + * \brief Perform batched asynchronous compression. + * + * The individual chunk size must not exceed + * 65536 bytes. For best performance, a chunk size of 65536 bytes is + * recommended. The output buffers must be 8-byte aligned. + * + * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers + * to the uncompressed data chunks. Both the pointers and the uncompressed data + * should reside in device-accessible memory. + * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of + * sizes of the uncompressed chunks in bytes. + * The sizes should reside in device-accessible memory. + * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. + * @param num_chunks [in] Number of chunks of data to compress. + * @param device_temp_ptr [in] The temporary GPU workspace. + * @param temp_bytes [in] The size of the temporary GPU memory pointed to by + * {@code device_temp_ptr}. + * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers + * to the output compressed buffers. Both the pointers and the compressed + * buffers should reside in device-accessible memory. Each compressed buffer + * should be preallocated with the size given by + * {@code nvcompBatchedGzipCompressGetMaxOutputChunkSize}. + * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, + * to be filled with the compressed sizes of each chunk. + * The buffer should be preallocated in device-accessible memory. + * @param format_opts [in] The Gzip compression options to use. + * @param stream [in] The CUDA stream to operate on. + * + * @return nvcompSuccess if successfully launched, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedGzipCompressAsync( + @Cast("const void*const*") PointerPointer device_uncompressed_chunk_ptrs, + @Cast("const size_t*") SizeTPointer device_uncompressed_chunk_bytes, + @Cast("size_t") long max_uncompressed_chunk_bytes, + @Cast("size_t") long num_chunks, + Pointer device_temp_ptr, + @Cast("size_t") long temp_bytes, + @Cast("void*const*") PointerPointer device_compressed_chunk_ptrs, + @Cast("size_t*") SizeTPointer device_compressed_chunk_bytes, + @ByVal nvcompBatchedGzipOpts_t format_opts, + CUstream_st stream); +public static native @Cast("nvcompStatus_t") int nvcompBatchedGzipCompressAsync( + @Cast("const void*const*") @ByPtrPtr Pointer device_uncompressed_chunk_ptrs, + @Cast("const size_t*") SizeTPointer device_uncompressed_chunk_bytes, + @Cast("size_t") long max_uncompressed_chunk_bytes, + @Cast("size_t") long num_chunks, + Pointer device_temp_ptr, + @Cast("size_t") long temp_bytes, + @Cast("void*const*") @ByPtrPtr Pointer device_compressed_chunk_ptrs, + @Cast("size_t*") SizeTPointer device_compressed_chunk_bytes, + @ByVal nvcompBatchedGzipOpts_t format_opts, + CUstream_st stream); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -2159,10 +2503,16 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * each compressed chunk. * * This is needed when we do not know the expected output size. - * NOTE: If the stream is corrupt, the sizes will be garbage. + * + * \note If the stream is corrupt, the calculated sizes will be invalid. + * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedGzipDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks @@ -2188,13 +2538,17 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * In the case where a chunk of compressed data is not a valid gzip + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * + * \note In the case where a chunk of compressed data is not a valid Deflate * stream, 0 will be written for the size of the invalid chunk and * nvcompStatusCannotDecompress will be flagged for that chunk. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedGzipDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -2209,11 +2563,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * in which case the actual sizes are not reported. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space. + * Must be aligned to the value in {@code nvcompBatchedGzipDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedGzipDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -2299,9 +2656,6 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { // #include // #ifdef __cplusplus -// Targeting ../nvcomp/nvcompLZ4FormatOpts.java - - // Targeting ../nvcomp/nvcompBatchedLZ4Opts_t.java @@ -2311,9 +2665,9 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompLZ4CompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompLZ4RequiredAlignment(); @@ -2321,6 +2675,19 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * Batched compression/decompression interface *****************************************************************************/ +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedLZ4CompressGetRequiredAlignments( + @ByVal nvcompBatchedLZ4Opts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -2389,23 +2756,30 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The individual chunk size must not exceed 16777216 bytes. - * For best performance, a chunk size of 65536 bytes is recommended. + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedLZ4CompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. - * Each chunk size MUST be a multiple of the size of the data type specified by - * format_opts.data_type, else this may crash or produce invalid output. + * Each chunk size must be a multiple of the size of the data type specified by + * format_opts.data_type. + * Chunk sizes must not exceed 16777216 bytes. For best performance, a chunk size + * of 65536 bytes is recommended. * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. - * This parameter is currently unused, so if it is not set - * with the maximum size, it should be set to zero. If a future version makes - * use of it, it will return an error if it is set to zero. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedLZ4CompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -2413,6 +2787,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedLZ4CompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedLZ4CompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -2444,6 +2822,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedLZ4Opts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedLZ4DecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -2481,15 +2864,21 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * each compressed chunk. * * This is needed when we do not know the expected output size. - * NOTE: If the stream is corrupt, the sizes will be garbage. + * + * \note If the stream is corrupt, the calculated sizes will be invalid. + * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedLZ4DecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. - * This argument needs to be prealloated in device-accessible memory. + * This argument needs to be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -2511,13 +2900,17 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * - * In the case where a chunk of compressed data is not a valid LZ4 + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * + * \note In the case where a chunk of compressed data is not a valid LZ4 * block, 0 will be written for the size of the invalid chunk and * nvcompStatusCannotDecompress will be flagged for that chunk. * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedLZ4DecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -2532,11 +2925,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * in which case the actual sizes are not reported. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space. + * Must be aligned to the value in {@code nvcompBatchedLZ4DecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedLZ4DecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -2660,20 +3056,32 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompSnappyCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. - * - * The Snappy compressor supports unaligned data, so this value is 1. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompSnappyRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedSnappyCompressGetRequiredAlignments( + @ByVal nvcompBatchedSnappyOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * * @param num_chunks [in] The number of chunks of memory in the batch. * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * batch. This parameter is currently unused. Set it to either the actual value + * or zero. * @param format_opts [in] Snappy compression options. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -2692,7 +3100,8 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * * @param num_chunks [in] The number of chunks of memory in the batch. * @param max_uncompressed_chunk_bytes [in] The maximum size of a chunk in the - * batch. + * batch. This parameter is currently unused. Set it to either the actual value + * or zero. * @param format_opts [in] Snappy compression options. * @param temp_bytes [out] The amount of GPU memory that will be temporarily * required during compression. @@ -2727,19 +3136,29 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The caller is responsible for passing device_compressed_chunk_bytes of size - * sufficient to hold compressed data + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedSnappyCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. + * This parameter is currently unused. Set it to either the actual value + * or zero. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace, could be NULL in case * temporary memory is not needed. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedSnappyCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -2747,6 +3166,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedSnappyCompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedSnappyCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -2778,12 +3201,17 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedSnappyOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedSnappyDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * * @param num_chunks [in] Number of chunks of data to be decompressed. * @param max_uncompressed_chunk_bytes [in] The size of the largest chunk in bytes - * when uncompressed. + * when uncompressed. * @param temp_bytes [out] The amount of GPU memory that will be temporarily required * during decompression. * @@ -2814,15 +3242,20 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \brief Asynchronously compute the number of bytes of uncompressed data for * each compressed chunk. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedSnappyDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. * If there is an error when retrieving the size of a chunk, the * uncompressed size of that chunk will be set to 0. This argument needs to - * be prealloated in device-accessible memory. + * be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -2844,9 +3277,13 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedSnappyDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -2861,11 +3298,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * in which case the actual sizes are not reported. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space, could be NULL in case temporary space is not needed. + * Must be aligned to the value in {@code nvcompBatchedSnappyDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedSnappyDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to @@ -2991,12 +3431,25 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @MemberGetter public static native @Cast("const size_t") long nvcompZstdCompressionMaxAllowedChunkSize(); /** - * This is the minimum alignment required for void type CUDA memory buffers - * passed to compression or decompression functions. Typed memory buffers must - * still be aligned to their type's size, e.g. 8 bytes for size_t. + * The most restrictive of minimum alignment requirements for void-type CUDA memory buffers + * used for input, output, or temporary memory, passed to compression or decompression functions. + * In all cases, typed memory buffers must still be aligned to their type's size, e.g., 4 bytes for {@code int}. */ @MemberGetter public static native @Cast("const size_t") long nvcompZstdRequiredAlignment(); +/** + * \brief Get the minimum buffer alignment requirements for compression. + * + * @param format_opts [in] Compression options. + * @param alignment_requirements [out] The minimum buffer alignment requirements + * for compression. + * + * @return nvcompSuccess if successful, and an error code otherwise. + */ +public static native @Cast("nvcompStatus_t") int nvcompBatchedZstdCompressGetRequiredAlignments( + @ByVal nvcompBatchedZstdOpts_t format_opts, + nvcompAlignmentRequirements_t alignment_requirements); + /** * \brief Get the amount of temporary memory required on the GPU for compression. * @@ -3069,22 +3522,29 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous compression. * - * The individual chunk size must not exceed 16 MB. - * For best performance, a chunk size of 64 KB is recommended. + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. * * @param device_uncompressed_chunk_ptrs [in] Array with size \p num_chunks of pointers * to the uncompressed data chunks. Both the pointers and the uncompressed data * should reside in device-accessible memory. + * Each chunk must be aligned to the value in the {@code input} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedZstdCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_uncompressed_chunk_bytes [in] Array with size \p num_chunks of * sizes of the uncompressed chunks in bytes. * The sizes should reside in device-accessible memory. + * Chunk sizes must not exceed 16 MB. For best performance, a chunk size of + * 64 KB is recommended. * @param max_uncompressed_chunk_bytes [in] The size of the largest uncompressed chunk. - * This parameter is currently unused, so if it is not set - * with the maximum size, it should be set to zero. If a future version makes - * use of it, it will return an error if it is set to zero. * @param num_chunks [in] Number of chunks of data to compress. * @param device_temp_ptr [in] The temporary GPU workspace, could be NULL in case * temporary memory is not needed. + * Must be aligned to the value in the {@code temp} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedZstdCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param temp_bytes [in] The size of the temporary GPU memory pointed to by * {@code device_temp_ptr}. * @param device_compressed_chunk_ptrs [out] Array with size \p num_chunks of pointers @@ -3092,6 +3552,10 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * buffers should reside in device-accessible memory. Each compressed buffer * should be preallocated with the size given by * {@code nvcompBatchedZstdCompressGetMaxOutputChunkSize}. + * Each compressed buffer must be aligned to the value in the {@code output} member of the + * \ref nvcompAlignmentRequirements_t object output by + * {@code nvcompBatchedZstdCompressGetRequiredAlignments} when called with the same + * \p format_opts. * @param device_compressed_chunk_bytes [out] Array with size \p num_chunks, * to be filled with the compressed sizes of each chunk. * The buffer should be preallocated in device-accessible memory. @@ -3123,6 +3587,11 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { @ByVal nvcompBatchedZstdOpts_t format_opts, CUstream_st stream); +/** + * Minimum buffer alignment requirements for decompression. + */ +@MemberGetter public static native @Const @ByRef nvcompAlignmentRequirements_t nvcompBatchedZstdDecompressRequiredAlignments(); + /** * \brief Get the amount of temporary memory required on the GPU for decompression. * @@ -3158,15 +3627,20 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * \brief Asynchronously compute the number of bytes of uncompressed data for * each compressed chunk. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of * pointers in device-accessible memory to compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedZstdDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes * of the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_chunk_bytes [out] Array with size \p num_chunks * to be filled with the sizes, in bytes, of each uncompressed data chunk. * If there is an error when retrieving the size of a chunk, the * uncompressed size of that chunk will be set to 0. This argument needs to - * be prealloated in device-accessible memory. + * be preallocated in device-accessible memory. * @param num_chunks [in] Number of data chunks to compute sizes of. * @param stream [in] The CUDA stream to operate on. * @@ -3188,9 +3662,13 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { /** * \brief Perform batched asynchronous decompression. * + * \note Violating any of the conditions listed in the parameter descriptions + * below may result in undefined behaviour. + * * @param device_compressed_chunk_ptrs [in] Array with size \p num_chunks of pointers - * in device-accessible memory to compressed buffers. Each compressed buffer - * should reside in device-accessible memory. + * in device-accessible memory to device-accessible compressed buffers. + * Each buffer must be aligned to the value in + * {@code nvcompBatchedZstdDecompressRequiredAlignments.input}. * @param device_compressed_chunk_bytes [in] Array with size \p num_chunks of sizes of * the compressed buffers in bytes. The sizes should reside in device-accessible memory. * @param device_uncompressed_buffer_bytes [in] Array with size \p num_chunks of sizes, @@ -3203,11 +3681,14 @@ public class nvcomp extends org.bytedeco.cuda.presets.nvcomp { * be filled with the actual number of bytes decompressed for every chunk. * @param num_chunks [in] Number of chunks of data to decompress. * @param device_temp_ptr [in] The temporary GPU space, could be NULL in case temporary space is not needed. + * Must be aligned to the value in {@code nvcompBatchedZstdDecompressRequiredAlignments.temp}. * @param temp_bytes [in] The size of the temporary GPU space. * @param device_uncompressed_chunk_ptrs [out] Array with size \p num_chunks of * pointers in device-accessible memory to decompressed data. Each uncompressed * buffer needs to be preallocated in device-accessible memory, have the size - * specified by the corresponding entry in device_uncompressed_buffer_bytes. + * specified by the corresponding entry in \p device_uncompressed_buffer_bytes, + * and be aligned to the value in + * {@code nvcompBatchedZstdDecompressRequiredAlignments.output}. * @param device_statuses [out] Array with size \p num_chunks of statuses in * device-accessible memory. This argument needs to be preallocated. For each * chunk, if the decompression is successful, the status will be set to diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/BitcompManager.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/BitcompManager.java index 3d9560a719f..9d4d2f94c28 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/BitcompManager.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/BitcompManager.java @@ -30,32 +30,32 @@ public class BitcompManager extends PimplManager { // If user_stream is specified, the lifetime of the BitcompManager instance must // extend beyond that of the user_stream public BitcompManager( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream/*=0*/, @Cast("nvcomp::ChecksumPolicy") int checksum_policy/*=nvcomp::NoComputeNoVerify*/, @Cast("nvcomp::BitstreamKind") int bitstream_kind/*=nvcomp::BitstreamKind::NVCOMP_NATIVE*/) { super((Pointer)null); allocate(uncomp_chunk_size, format_opts, user_stream, checksum_policy, bitstream_kind); } private native void allocate( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream/*=0*/, @Cast("nvcomp::ChecksumPolicy") int checksum_policy/*=nvcomp::NoComputeNoVerify*/, @Cast("nvcomp::BitstreamKind") int bitstream_kind/*=nvcomp::BitstreamKind::NVCOMP_NATIVE*/); public BitcompManager( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts) { super((Pointer)null); allocate(uncomp_chunk_size, format_opts); } + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts) { super((Pointer)null); allocate(uncomp_chunk_size, format_opts); } private native void allocate( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts); + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts); // This signature is deprecated, in favour of the one that does not accept a // device_id, and instead gets the device from the stream. @Deprecated public BitcompManager( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream, int device_id, @Cast("nvcomp::ChecksumPolicy") int checksum_policy/*=nvcomp::NoComputeNoVerify*/, @Cast("nvcomp::BitstreamKind") int bitstream_kind/*=nvcomp::BitstreamKind::NVCOMP_NATIVE*/) { super((Pointer)null); allocate(uncomp_chunk_size, format_opts, user_stream, device_id, checksum_policy, bitstream_kind); } @Deprecated private native void allocate( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream, int device_id, @Cast("nvcomp::ChecksumPolicy") int checksum_policy/*=nvcomp::NoComputeNoVerify*/, @Cast("nvcomp::BitstreamKind") int bitstream_kind/*=nvcomp::BitstreamKind::NVCOMP_NATIVE*/); @Deprecated public BitcompManager( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream, int device_id) { super((Pointer)null); allocate(uncomp_chunk_size, format_opts, user_stream, device_id); } @Deprecated private native void allocate( - @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompFormatOpts format_opts, + @Cast("size_t") long uncomp_chunk_size, @Const @ByRef nvcompBatchedBitcompOpts_t format_opts, CUstream_st user_stream, int device_id); } diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/DeflateFormatSpecHeader.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/DeflateFormatSpecHeader.java index 0ddcdc6b040..3f09f51be05 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/DeflateFormatSpecHeader.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/DeflateFormatSpecHeader.java @@ -36,6 +36,7 @@ public class DeflateFormatSpecHeader extends Pointer { /** * Compression algorithm to use. Permitted values are: + * - 0: highest-throughput, entropy-only compression (use for symmetric compression/decompression performance) * - 1: high-throughput, low compression ratio (default) * - 2: medium-througput, medium compression ratio, beat Zlib level 1 on the compression ratio * - 3: placeholder for further compression level support, will fall into MEDIUM_COMPRESSION at this point diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompAlignmentRequirements_t.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompAlignmentRequirements_t.java new file mode 100644 index 00000000000..dd81941eb06 --- /dev/null +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompAlignmentRequirements_t.java @@ -0,0 +1,45 @@ +// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE + +package org.bytedeco.cuda.nvcomp; + +import java.nio.*; +import org.bytedeco.javacpp.*; +import org.bytedeco.javacpp.annotation.*; + +import static org.bytedeco.javacpp.presets.javacpp.*; +import org.bytedeco.cuda.cudart.*; +import static org.bytedeco.cuda.global.cudart.*; + +import static org.bytedeco.cuda.global.nvcomp.*; + + +/** + * \brief Per-algorithm buffer alignment requirements. + */ +@Properties(inherit = org.bytedeco.cuda.presets.nvcomp.class) +public class nvcompAlignmentRequirements_t extends Pointer { + static { Loader.load(); } + /** Default native constructor. */ + public nvcompAlignmentRequirements_t() { super((Pointer)null); allocate(); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public nvcompAlignmentRequirements_t(long size) { super((Pointer)null); allocateArray(size); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public nvcompAlignmentRequirements_t(Pointer p) { super(p); } + private native void allocate(); + private native void allocateArray(long size); + @Override public nvcompAlignmentRequirements_t position(long position) { + return (nvcompAlignmentRequirements_t)super.position(position); + } + @Override public nvcompAlignmentRequirements_t getPointer(long i) { + return new nvcompAlignmentRequirements_t((Pointer)this).offsetAddress(i); + } + + /** Minimum alignment requirement of each input buffer. */ + public native @Cast("size_t") long input(); public native nvcompAlignmentRequirements_t input(long setter); + /** Minimum alignment requirement of each output buffer. */ + public native @Cast("size_t") long output(); public native nvcompAlignmentRequirements_t output(long setter); + /** Minimum alignment requirement of temporary-storage buffer, if any. For + * algorithms that do not use temporary storage, this field is always equal + * to 1. */ + public native @Cast("size_t") long temp(); public native nvcompAlignmentRequirements_t temp(long setter); +} diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompFormatOpts.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompOpts_t.java similarity index 64% rename from cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompFormatOpts.java rename to cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompOpts_t.java index 1715c26fa8f..9e52603b99a 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompFormatOpts.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedBitcompOpts_t.java @@ -22,33 +22,33 @@ * \brief Structure for configuring Bitcomp compression. */ @Properties(inherit = org.bytedeco.cuda.presets.nvcomp.class) -public class nvcompBatchedBitcompFormatOpts extends Pointer { +public class nvcompBatchedBitcompOpts_t extends Pointer { static { Loader.load(); } /** Default native constructor. */ - public nvcompBatchedBitcompFormatOpts() { super((Pointer)null); allocate(); } + public nvcompBatchedBitcompOpts_t() { super((Pointer)null); allocate(); } /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public nvcompBatchedBitcompFormatOpts(long size) { super((Pointer)null); allocateArray(size); } + public nvcompBatchedBitcompOpts_t(long size) { super((Pointer)null); allocateArray(size); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public nvcompBatchedBitcompFormatOpts(Pointer p) { super(p); } + public nvcompBatchedBitcompOpts_t(Pointer p) { super(p); } private native void allocate(); private native void allocateArray(long size); - @Override public nvcompBatchedBitcompFormatOpts position(long position) { - return (nvcompBatchedBitcompFormatOpts)super.position(position); + @Override public nvcompBatchedBitcompOpts_t position(long position) { + return (nvcompBatchedBitcompOpts_t)super.position(position); } - @Override public nvcompBatchedBitcompFormatOpts getPointer(long i) { - return new nvcompBatchedBitcompFormatOpts((Pointer)this).offsetAddress(i); + @Override public nvcompBatchedBitcompOpts_t getPointer(long i) { + return new nvcompBatchedBitcompOpts_t((Pointer)this).offsetAddress(i); } /** * \brief Bitcomp algorithm options. * * - 0 : Default algorithm, usually gives the best compression ratios - * - 1 : "Sparse" algorithm, works well on sparse data (with lots of zeroes). - * and is usually a faster than the default algorithm. + * - 1 : "Sparse" algorithm, works well on sparse data (with lots of zeroes) + * and is usually faster than the default algorithm. */ - public native int algorithm_type(); public native nvcompBatchedBitcompFormatOpts algorithm_type(int setter); + public native int algorithm_type(); public native nvcompBatchedBitcompOpts_t algorithm_type(int setter); /** * \brief One of nvcomp's possible data types */ - public native @Cast("nvcompType_t") int data_type(); public native nvcompBatchedBitcompFormatOpts data_type(int setter); + public native @Cast("nvcompType_t") int data_type(); public native nvcompBatchedBitcompOpts_t data_type(int setter); } diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedCascadedOpts_t.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedCascadedOpts_t.java index 477df87ead7..55fa625429a 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedCascadedOpts_t.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedCascadedOpts_t.java @@ -12,6 +12,7 @@ import static org.bytedeco.cuda.global.nvcomp.*; +// #endif /****************************************************************************** * Batched compression/decompression interface @@ -39,7 +40,7 @@ public class nvcompBatchedCascadedOpts_t extends Pointer { } /** - * \brief The size of each internal chunk of data to decompress indepentently with + * \brief The size of each internal chunk of data to decompress independently with * * Cascaded compression. The value should be in the range of [512, 16384] * depending on the datatype of the input and the shared memory size of diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedDeflateOpts_t.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedDeflateOpts_t.java index 54a9a58681b..ed9b0f728f3 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedDeflateOpts_t.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedDeflateOpts_t.java @@ -41,6 +41,7 @@ public class nvcompBatchedDeflateOpts_t extends Pointer { /** * Compression algorithm to use. Permitted values are: + * - 0: highest-throughput, entropy-only compression (use for symmetric compression/decompression performance) * - 1: high-throughput, low compression ratio (default) * - 2: medium-througput, medium compression ratio, beat Zlib level 1 on the compression ratio * - 3: placeholder for further compression level support, will fall into MEDIUM_COMPRESSION at this point diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedGzipOpts_t.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedGzipOpts_t.java new file mode 100644 index 00000000000..a23cf8e559d --- /dev/null +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedGzipOpts_t.java @@ -0,0 +1,38 @@ +// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE + +package org.bytedeco.cuda.nvcomp; + +import java.nio.*; +import org.bytedeco.javacpp.*; +import org.bytedeco.javacpp.annotation.*; + +import static org.bytedeco.javacpp.presets.javacpp.*; +import org.bytedeco.cuda.cudart.*; +import static org.bytedeco.cuda.global.cudart.*; + +import static org.bytedeco.cuda.global.nvcomp.*; + + +/** + * Gzip compression options for the low-level API + */ +@Properties(inherit = org.bytedeco.cuda.presets.nvcomp.class) +public class nvcompBatchedGzipOpts_t extends Pointer { + static { Loader.load(); } + /** Default native constructor. */ + public nvcompBatchedGzipOpts_t() { super((Pointer)null); allocate(); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public nvcompBatchedGzipOpts_t(long size) { super((Pointer)null); allocateArray(size); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public nvcompBatchedGzipOpts_t(Pointer p) { super(p); } + private native void allocate(); + private native void allocateArray(long size); + @Override public nvcompBatchedGzipOpts_t position(long position) { + return (nvcompBatchedGzipOpts_t)super.position(position); + } + @Override public nvcompBatchedGzipOpts_t getPointer(long i) { + return new nvcompBatchedGzipOpts_t((Pointer)this).offsetAddress(i); + } + + public native int reserved(); public native nvcompBatchedGzipOpts_t reserved(int setter); +} diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedLZ4Opts_t.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedLZ4Opts_t.java index c723daf360e..b690121ecac 100644 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedLZ4Opts_t.java +++ b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompBatchedLZ4Opts_t.java @@ -12,6 +12,11 @@ import static org.bytedeco.cuda.global.nvcomp.*; +// #endif + +/****************************************************************************** + * Batched compression/decompression interface for LZ4 + *****************************************************************************/ /** * LZ4 compression options for the low-level API diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompCascadedFormatOpts.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompCascadedFormatOpts.java deleted file mode 100644 index 36dd42a60bf..00000000000 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompCascadedFormatOpts.java +++ /dev/null @@ -1,52 +0,0 @@ -// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE - -package org.bytedeco.cuda.nvcomp; - -import java.nio.*; -import org.bytedeco.javacpp.*; -import org.bytedeco.javacpp.annotation.*; - -import static org.bytedeco.javacpp.presets.javacpp.*; -import org.bytedeco.cuda.cudart.*; -import static org.bytedeco.cuda.global.cudart.*; - -import static org.bytedeco.cuda.global.nvcomp.*; - -// #endif - -/** - * \brief Structure that stores the compression configuration - */ -@Properties(inherit = org.bytedeco.cuda.presets.nvcomp.class) -public class nvcompCascadedFormatOpts extends Pointer { - static { Loader.load(); } - /** Default native constructor. */ - public nvcompCascadedFormatOpts() { super((Pointer)null); allocate(); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public nvcompCascadedFormatOpts(long size) { super((Pointer)null); allocateArray(size); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public nvcompCascadedFormatOpts(Pointer p) { super(p); } - private native void allocate(); - private native void allocateArray(long size); - @Override public nvcompCascadedFormatOpts position(long position) { - return (nvcompCascadedFormatOpts)super.position(position); - } - @Override public nvcompCascadedFormatOpts getPointer(long i) { - return new nvcompCascadedFormatOpts((Pointer)this).offsetAddress(i); - } - - /** - * \brief The number of Run Length Encodings to perform. - */ - public native int num_RLEs(); public native nvcompCascadedFormatOpts num_RLEs(int setter); - - /** - * \brief The number of Delta Encodings to perform. - */ - public native int num_deltas(); public native nvcompCascadedFormatOpts num_deltas(int setter); - - /** - * \brief Whether or not to bitpack the final layers. - */ - public native int use_bp(); public native nvcompCascadedFormatOpts use_bp(int setter); -} diff --git a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompLZ4FormatOpts.java b/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompLZ4FormatOpts.java deleted file mode 100644 index 7765b9848ab..00000000000 --- a/cuda/src/gen/java/org/bytedeco/cuda/nvcomp/nvcompLZ4FormatOpts.java +++ /dev/null @@ -1,49 +0,0 @@ -// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE - -package org.bytedeco.cuda.nvcomp; - -import java.nio.*; -import org.bytedeco.javacpp.*; -import org.bytedeco.javacpp.annotation.*; - -import static org.bytedeco.javacpp.presets.javacpp.*; -import org.bytedeco.cuda.cudart.*; -import static org.bytedeco.cuda.global.cudart.*; - -import static org.bytedeco.cuda.global.nvcomp.*; - -// #endif - -/****************************************************************************** - * Batched compression/decompression interface for LZ4 - *****************************************************************************/ - -/** - * \brief Structure for configuring LZ4 compression. - */ -@Properties(inherit = org.bytedeco.cuda.presets.nvcomp.class) -public class nvcompLZ4FormatOpts extends Pointer { - static { Loader.load(); } - /** Default native constructor. */ - public nvcompLZ4FormatOpts() { super((Pointer)null); allocate(); } - /** Native array allocator. Access with {@link Pointer#position(long)}. */ - public nvcompLZ4FormatOpts(long size) { super((Pointer)null); allocateArray(size); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public nvcompLZ4FormatOpts(Pointer p) { super(p); } - private native void allocate(); - private native void allocateArray(long size); - @Override public nvcompLZ4FormatOpts position(long position) { - return (nvcompLZ4FormatOpts)super.position(position); - } - @Override public nvcompLZ4FormatOpts getPointer(long i) { - return new nvcompLZ4FormatOpts((Pointer)this).offsetAddress(i); - } - - /** - * \brief The size of each chunk of data to decompress indepentently with - * LZ4. Must be within the range of [32768, 16777216]. Larger sizes will - * result in higher compression, but with decreased parallelism. The - * recommended size is 65536. - */ - public native @Cast("size_t") long chunk_size(); public native nvcompLZ4FormatOpts chunk_size(long setter); -} diff --git a/ffmpeg/src/gen/java/org/bytedeco/ffmpeg/global/postproc.java b/ffmpeg/src/gen/java/org/bytedeco/ffmpeg/global/postproc.java index 86aa7b289d5..26c73702a77 100644 --- a/ffmpeg/src/gen/java/org/bytedeco/ffmpeg/global/postproc.java +++ b/ffmpeg/src/gen/java/org/bytedeco/ffmpeg/global/postproc.java @@ -217,7 +217,7 @@ public class postproc extends org.bytedeco.ffmpeg.presets.postproc { // #include "version_major.h" -public static final int LIBPOSTPROC_VERSION_MINOR = 1; +public static final int LIBPOSTPROC_VERSION_MINOR = 3; public static final int LIBPOSTPROC_VERSION_MICRO = 100; // #define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, diff --git a/gsl/README.md b/gsl/README.md index ecc9c9bcef9..e0b83472af5 100644 --- a/gsl/README.md +++ b/gsl/README.md @@ -53,7 +53,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/gsl/samples/pom.xml b/gsl/samples/pom.xml index 445b4e82444..f268edb249b 100644 --- a/gsl/samples/pom.xml +++ b/gsl/samples/pom.xml @@ -19,7 +19,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/leptonica/cppbuild.sh b/leptonica/cppbuild.sh index 8cedbc84657..c1e32fb0d65 100755 --- a/leptonica/cppbuild.sh +++ b/leptonica/cppbuild.sh @@ -71,7 +71,7 @@ cd .. export PATH=$INSTALL_PATH/bin:$PATH export PKG_CONFIG_PATH=$INSTALL_PATH/lib/pkgconfig/ -CMAKE_CONFIG="-DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DCMAKE_INSTALL_LIBDIR=$INSTALL_PATH/lib -DBUILD_SHARED_LIBS=OFF -DENABLE_SHARED=FALSE -DPNG_SHARED=OFF" +CMAKE_CONFIG="-DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DCMAKE_INSTALL_LIBDIR=$INSTALL_PATH/lib -DBUILD_SHARED_LIBS=OFF -DENABLE_SHARED=FALSE -DPNG_SHARED=OFF -DPNG_FRAMEWORK=OFF" WEBP_CONFIG="-DWEBP_BUILD_ANIM_UTILS=OFF -DWEBP_BUILD_CWEBP=OFF -DWEBP_BUILD_DWEBP=OFF -DWEBP_BUILD_EXTRAS=OFF -DWEBP_BUILD_GIF2WEBP=OFF -DWEBP_BUILD_IMG2WEBP=OFF -DWEBP_BUILD_VWEBP=OFF -DWEBP_BUILD_WEBPINFO=OFF -DWEBP_BUILD_WEBPMUX=OFF -DWEBP_BUILD_WEBP_JS=OFF" case $PLATFORM in diff --git a/llvm/README.md b/llvm/README.md index 217e42a15e8..6e03030df4c 100644 --- a/llvm/README.md +++ b/llvm/README.md @@ -9,7 +9,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * LLVM 19.1.2 http://llvm.org/ + * LLVM 19.1.3 http://llvm.org/ Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -50,7 +50,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco llvm-platform - 19.1.2-1.5.11-SNAPSHOT + 19.1.3-1.5.11-SNAPSHOT diff --git a/llvm/cppbuild.sh b/llvm/cppbuild.sh index 653c2e85473..5fe69a69a9a 100755 --- a/llvm/cppbuild.sh +++ b/llvm/cppbuild.sh @@ -7,7 +7,7 @@ if [[ -z "$PLATFORM" ]]; then exit fi -LLVM_VERSION=19.1.2 +LLVM_VERSION=19.1.3 download https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/llvm-project-$LLVM_VERSION.src.tar.xz llvm-project-$LLVM_VERSION.src.tar.xz mkdir -p $PLATFORM diff --git a/llvm/platform/pom.xml b/llvm/platform/pom.xml index f43565e4d7e..14072e30071 100644 --- a/llvm/platform/pom.xml +++ b/llvm/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco llvm-platform - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} JavaCPP Presets Platform for LLVM diff --git a/llvm/pom.xml b/llvm/pom.xml index 364f82456d3..f4775feac05 100644 --- a/llvm/pom.xml +++ b/llvm/pom.xml @@ -11,7 +11,7 @@ org.bytedeco llvm - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} JavaCPP Presets for LLVM @@ -102,7 +102,6 @@ /link /FORCE:MULTIPLE - /WHOLEARCHIVE diff --git a/llvm/samples/clang/pom.xml b/llvm/samples/clang/pom.xml index 9da5c0aec10..07fc1f8fdff 100644 --- a/llvm/samples/clang/pom.xml +++ b/llvm/samples/clang/pom.xml @@ -12,7 +12,7 @@ org.bytedeco llvm-platform - 19.1.2-1.5.11-SNAPSHOT + 19.1.3-1.5.11-SNAPSHOT diff --git a/llvm/samples/llvm/pom.xml b/llvm/samples/llvm/pom.xml index 2785b5727e2..943058e4c6b 100644 --- a/llvm/samples/llvm/pom.xml +++ b/llvm/samples/llvm/pom.xml @@ -12,7 +12,7 @@ org.bytedeco llvm-platform - 19.1.2-1.5.11-SNAPSHOT + 19.1.3-1.5.11-SNAPSHOT org.bytedeco diff --git a/llvm/samples/polly/pom.xml b/llvm/samples/polly/pom.xml index 4b080d15433..b0b0ef821b7 100644 --- a/llvm/samples/polly/pom.xml +++ b/llvm/samples/polly/pom.xml @@ -13,7 +13,7 @@ org.bytedeco llvm-platform - 19.1.2-1.5.11-SNAPSHOT + 19.1.3-1.5.11-SNAPSHOT org.bytedeco @@ -23,12 +23,12 @@ org.bytedeco mkl-platform - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/llvm/src/gen/java/org/bytedeco/llvm/LLVM/LLVMOpaqueDbgRecord.java b/llvm/src/gen/java/org/bytedeco/llvm/LLVM/LLVMOpaqueDbgRecord.java deleted file mode 100644 index 731db67f6b6..00000000000 --- a/llvm/src/gen/java/org/bytedeco/llvm/LLVM/LLVMOpaqueDbgRecord.java +++ /dev/null @@ -1,23 +0,0 @@ -// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE - -package org.bytedeco.llvm.LLVM; - -import java.nio.*; -import org.bytedeco.javacpp.*; -import org.bytedeco.javacpp.annotation.*; - -import static org.bytedeco.javacpp.presets.javacpp.*; - -import static org.bytedeco.llvm.global.LLVM.*; - - -/** - * @see llvm::DbgRecord - */ -@Opaque @Properties(inherit = org.bytedeco.llvm.presets.LLVM.class) -public class LLVMOpaqueDbgRecord extends Pointer { - /** Empty constructor. Calls {@code super((Pointer)null)}. */ - public LLVMOpaqueDbgRecord() { super((Pointer)null); } - /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ - public LLVMOpaqueDbgRecord(Pointer p) { super(p); } -} diff --git a/mkl/README.md b/mkl/README.md index dc0f0da4e29..bfbe6931a7c 100644 --- a/mkl/README.md +++ b/mkl/README.md @@ -9,7 +9,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * MKL 2024.2.2 https://software.intel.com/mkl + * MKL 2025.0.0 https://software.intel.com/mkl Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -48,14 +48,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/mkl/platform/pom.xml b/mkl/platform/pom.xml index ddcd5e42a49..2d65f60a51b 100644 --- a/mkl/platform/pom.xml +++ b/mkl/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco mkl-platform - 2024.2-${project.parent.version} + 2025.0-${project.parent.version} JavaCPP Presets Platform for MKL diff --git a/mkl/platform/redist/pom.xml b/mkl/platform/redist/pom.xml index 27c834abced..922a371fbc8 100644 --- a/mkl/platform/redist/pom.xml +++ b/mkl/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco mkl-platform-redist - 2024.2-${project.parent.version} + 2025.0-${project.parent.version} JavaCPP Presets Platform Redist for MKL diff --git a/mkl/pom.xml b/mkl/pom.xml index 4eb85481757..4c658303c48 100644 --- a/mkl/pom.xml +++ b/mkl/pom.xml @@ -11,7 +11,7 @@ org.bytedeco mkl - 2024.2-${project.parent.version} + 2025.0-${project.parent.version} JavaCPP Presets for MKL diff --git a/mkl/samples/pom.xml b/mkl/samples/pom.xml index 4741e9b9647..0c3865c9c89 100644 --- a/mkl/samples/pom.xml +++ b/mkl/samples/pom.xml @@ -12,14 +12,14 @@ org.bytedeco mkl-platform - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/mkl/src/gen/java/org/bytedeco/mkl/global/mkl_rt.java b/mkl/src/gen/java/org/bytedeco/mkl/global/mkl_rt.java index 951ef2aca26..1280cac022f 100644 --- a/mkl/src/gen/java/org/bytedeco/mkl/global/mkl_rt.java +++ b/mkl/src/gen/java/org/bytedeco/mkl/global/mkl_rt.java @@ -83,13 +83,14 @@ public class mkl_rt extends org.bytedeco.mkl.presets.mkl_rt { // #ifndef _MKL_VERSION_H_ // #define _MKL_VERSION_H_ -public static final int __INTEL_MKL_BUILD_DATE = 20240823; +public static final int __INTEL_MKL_BUILD_DATE = 20241009; -public static final int __INTEL_MKL__ = 2024; +public static final int __INTEL_MKL__ = 2025; public static final int __INTEL_MKL_MINOR__ = 0; -public static final int __INTEL_MKL_UPDATE__ = 2; +public static final int __INTEL_MKL_UPDATE__ = 0; +public static final int __INTEL_MKL_PATCH__ = 0; -public static final int INTEL_MKL_VERSION = 20240002; +public static final int INTEL_MKL_VERSION = 20250000; // #endif @@ -97,7 +98,7 @@ public class mkl_rt extends org.bytedeco.mkl.presets.mkl_rt { // Parsed from mkl_types.h /******************************************************************************* -* Copyright 1999-2022 Intel Corporation. +* Copyright 1999 Intel Corporation. * * This software and the related documents are Intel copyrighted materials, and * your use of them is governed by the express license under which they were @@ -150,6 +151,7 @@ public static class MKLVersion extends Pointer { public native int MajorVersion(); public native MKLVersion MajorVersion(int setter); public native int MinorVersion(); public native MKLVersion MinorVersion(int setter); public native int UpdateVersion(); public native MKLVersion UpdateVersion(int setter); + public native int PatchVersion(); public native MKLVersion PatchVersion(int setter); public native @Cast("char*") BytePointer ProductStatus(); public native MKLVersion ProductStatus(BytePointer setter); public native @Cast("char*") BytePointer Build(); public native MKLVersion Build(BytePointer setter); public native @Cast("char*") BytePointer Processor(); public native MKLVersion Processor(BytePointer setter); @@ -2451,6 +2453,24 @@ public static native void cblas_strmm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, float alpha, @Const float[] A, int lda, float[] B, int ldb); +public static native void cblas_strmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const FloatPointer A, int lda, + @Const FloatPointer B, int ldb, float beta, + FloatPointer C, int ldc); +public static native void cblas_strmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const FloatBuffer A, int lda, + @Const FloatBuffer B, int ldb, float beta, + FloatBuffer C, int ldc); +public static native void cblas_strmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const float[] A, int lda, + @Const float[] B, int ldb, float beta, + float[] C, int ldc); public static native void cblas_strsm(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, @Cast("const CBLAS_DIAG") int Diag, int M, int N, @@ -2466,6 +2486,24 @@ public static native void cblas_strsm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, float alpha, @Const float[] A, int lda, float[] B, int ldb); +public static native void cblas_strsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const FloatPointer A, int lda, + @Const FloatPointer B, int ldb, float beta, + FloatPointer C, int ldc); +public static native void cblas_strsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const FloatBuffer A, int lda, + @Const FloatBuffer B, int ldb, float beta, + FloatBuffer C, int ldc); +public static native void cblas_strsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + float alpha, @Const float[] A, int lda, + @Const float[] B, int ldb, float beta, + float[] C, int ldc); public static native void cblas_strsm_batch(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE*") IntPointer Side_Array, @Cast("const CBLAS_UPLO*") IntPointer Uplo_Array, @Cast("const CBLAS_TRANSPOSE*") IntPointer TransA_Array, @Cast("const CBLAS_DIAG*") IntPointer Diag_Array, @Const IntPointer M_Array, @@ -2686,6 +2724,24 @@ public static native void cblas_dtrmm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, double alpha, @Const double[] A, int lda, double[] B, int ldb); +public static native void cblas_dtrmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const DoublePointer A, int lda, + @Const DoublePointer B, int ldb, double beta, + DoublePointer C, int ldc); +public static native void cblas_dtrmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const DoubleBuffer A, int lda, + @Const DoubleBuffer B, int ldb, double beta, + DoubleBuffer C, int ldc); +public static native void cblas_dtrmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const double[] A, int lda, + @Const double[] B, int ldb, double beta, + double[] C, int ldc); public static native void cblas_dtrsm(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, @Cast("const CBLAS_DIAG") int Diag, int M, int N, @@ -2701,6 +2757,24 @@ public static native void cblas_dtrsm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, double alpha, @Const double[] A, int lda, double[] B, int ldb); +public static native void cblas_dtrsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const DoublePointer A, int lda, + @Const DoublePointer B, int ldb, double beta, + DoublePointer C, int ldc); +public static native void cblas_dtrsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const DoubleBuffer A, int lda, + @Const DoubleBuffer B, int ldb, double beta, + DoubleBuffer C, int ldc); +public static native void cblas_dtrsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + double alpha, @Const double[] A, int lda, + @Const double[] B, int ldb, double beta, + double[] C, int ldc); public static native void cblas_dtrsm_batch(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE*") IntPointer Side_Array, @Cast("const CBLAS_UPLO*") IntPointer Uplo_Array, @Cast("const CBLAS_TRANSPOSE*") IntPointer Transa_Array, @Cast("const CBLAS_DIAG*") IntPointer Diag_Array, @Const IntPointer M_Array, @@ -2873,11 +2947,23 @@ public static native void cblas_ctrmm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, @Const Pointer alpha, @Const Pointer A, int lda, Pointer B, int ldb); +public static native void cblas_ctrmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + @Const Pointer alpha, @Const Pointer A, int lda, + @Const Pointer B, int ldb, @Const Pointer beta, + Pointer C, int ldc); public static native void cblas_ctrsm(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, @Cast("const CBLAS_DIAG") int Diag, int M, int N, @Const Pointer alpha, @Const Pointer A, int lda, Pointer B, int ldb); +public static native void cblas_ctrsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + @Const Pointer alpha, @Const Pointer A, int lda, + @Const Pointer B, int ldb, @Const Pointer beta, + Pointer C, int ldc); public static native void cblas_ctrsm_batch(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE*") IntPointer Side_Array, @Cast("const CBLAS_UPLO*") IntPointer Uplo_Array, @Cast("const CBLAS_TRANSPOSE*") IntPointer Transa_Array, @Cast("const CBLAS_DIAG*") IntPointer Diag_Array, @Const IntPointer M_Array, @@ -3036,11 +3122,23 @@ public static native void cblas_ztrmm(@Cast("const CBLAS_LAYOUT") int Layout, @C @Cast("const CBLAS_DIAG") int Diag, int M, int N, @Const Pointer alpha, @Const Pointer A, int lda, Pointer B, int ldb); +public static native void cblas_ztrmm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + @Const Pointer alpha, @Const Pointer A, int lda, + @Const Pointer B, int ldb, @Const Pointer beta, + Pointer C, int ldc); public static native void cblas_ztrsm(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, @Cast("const CBLAS_DIAG") int Diag, int M, int N, @Const Pointer alpha, @Const Pointer A, int lda, Pointer B, int ldb); +public static native void cblas_ztrsm_oop(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE") int Side, + @Cast("const CBLAS_UPLO") int Uplo, @Cast("const CBLAS_TRANSPOSE") int TransA, + @Cast("const CBLAS_DIAG") int Diag, int M, int N, + @Const Pointer alpha, @Const Pointer A, int lda, + @Const Pointer B, int ldb, @Const Pointer beta, + Pointer C, int ldc); public static native void cblas_ztrsm_batch(@Cast("const CBLAS_LAYOUT") int Layout, @Cast("const CBLAS_SIDE*") IntPointer Side_Array, @Cast("const CBLAS_UPLO*") IntPointer Uplo_Array, @Cast("const CBLAS_TRANSPOSE*") IntPointer Transa_Array, @Cast("const CBLAS_DIAG*") IntPointer Diag_Array, @Const IntPointer M_Array, @@ -233226,7 +233324,6 @@ public static class sparse_struct extends Pointer { /* Single Dynamic library threading */ public static final int MKL_THREADING_INTEL = 0; public static final int MKL_THREADING_SEQUENTIAL = 1; -public static final int MKL_THREADING_PGI = 2; public static final int MKL_THREADING_GNU = 3; public static final int MKL_THREADING_TBB = 4; public static native int MKL_Set_Threading_Layer(int code); @@ -233678,6 +233775,8 @@ public static class USRFCNXS extends FunctionPointer { // #ifndef __MKL_VML_DEFINES_H__ // #define __MKL_VML_DEFINES_H__ +// #include + // #ifdef __cplusplus // #endif /* __cplusplus */ @@ -233709,9 +233808,12 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_HA = 0x00000002; public static final int VML_EP = 0x00000003; -public static final int VML_LA_64 = 0x0000000000000001; -public static final int VML_HA_64 = 0x0000000000000002; -public static final int VML_EP_64 = 0x0000000000000003; +public static native @MemberGetter int VML_LA_64(); +public static final int VML_LA_64 = VML_LA_64(); +public static native @MemberGetter int VML_HA_64(); +public static final int VML_HA_64 = VML_HA_64(); +public static native @MemberGetter int VML_EP_64(); +public static final int VML_EP_64 = VML_EP_64(); /* // SETTING OPTIMAL FLOATING-POINT PRECISION AND ROUNDING MODE @@ -233739,10 +233841,14 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_DOUBLE_CONSISTENT = 0x00000020; public static final int VML_RESTORE = 0x00000030; -public static final int VML_DEFAULT_PRECISION_64 = 0x0000000000000000; -public static final int VML_FLOAT_CONSISTENT_64 = 0x0000000000000010; -public static final int VML_DOUBLE_CONSISTENT_64 = 0x0000000000000020; -public static final int VML_RESTORE_64 = 0x0000000000000030; +public static native @MemberGetter int VML_DEFAULT_PRECISION_64(); +public static final int VML_DEFAULT_PRECISION_64 = VML_DEFAULT_PRECISION_64(); +public static native @MemberGetter int VML_FLOAT_CONSISTENT_64(); +public static final int VML_FLOAT_CONSISTENT_64 = VML_FLOAT_CONSISTENT_64(); +public static native @MemberGetter int VML_DOUBLE_CONSISTENT_64(); +public static final int VML_DOUBLE_CONSISTENT_64 = VML_DOUBLE_CONSISTENT_64(); +public static native @MemberGetter int VML_RESTORE_64(); +public static final int VML_RESTORE_64 = VML_RESTORE_64(); /* // VML ERROR HANDLING CONTROL @@ -233771,14 +233877,20 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_ERRMODE_DEFAULT = VML_ERRMODE_ERRNO | VML_ERRMODE_CALLBACK | VML_ERRMODE_EXCEPT; -public static final int VML_ERRMODE_IGNORE_64 = 0x0000000000000100; -public static final int VML_ERRMODE_ERRNO_64 = 0x0000000000000200; -public static final int VML_ERRMODE_STDERR_64 = 0x0000000000000400; -public static final int VML_ERRMODE_EXCEPT_64 = 0x0000000000000800; -public static final int VML_ERRMODE_CALLBACK_64 = 0x0000000000001000; -public static final int VML_ERRMODE_NOERR_64 = 0x0000000000002000; -public static final int VML_ERRMODE_DEFAULT_64 = -VML_ERRMODE_ERRNO_64 | VML_ERRMODE_CALLBACK_64 | VML_ERRMODE_EXCEPT_64; +public static native @MemberGetter int VML_ERRMODE_IGNORE_64(); +public static final int VML_ERRMODE_IGNORE_64 = VML_ERRMODE_IGNORE_64(); +public static native @MemberGetter int VML_ERRMODE_ERRNO_64(); +public static final int VML_ERRMODE_ERRNO_64 = VML_ERRMODE_ERRNO_64(); +public static native @MemberGetter int VML_ERRMODE_STDERR_64(); +public static final int VML_ERRMODE_STDERR_64 = VML_ERRMODE_STDERR_64(); +public static native @MemberGetter int VML_ERRMODE_EXCEPT_64(); +public static final int VML_ERRMODE_EXCEPT_64 = VML_ERRMODE_EXCEPT_64(); +public static native @MemberGetter int VML_ERRMODE_CALLBACK_64(); +public static final int VML_ERRMODE_CALLBACK_64 = VML_ERRMODE_CALLBACK_64(); +public static native @MemberGetter int VML_ERRMODE_NOERR_64(); +public static final int VML_ERRMODE_NOERR_64 = VML_ERRMODE_NOERR_64(); +public static native @MemberGetter int VML_ERRMODE_DEFAULT_64(); +public static final int VML_ERRMODE_DEFAULT_64 = VML_ERRMODE_DEFAULT_64(); /* // OpenMP(R) number of threads mode macros @@ -233792,8 +233904,10 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_NUM_THREADS_OMP_AUTO = 0x00000000; public static final int VML_NUM_THREADS_OMP_FIXED = 0x00010000; -public static final int VML_NUM_THREADS_OMP_AUTO_64 = 0x0000000000000000; -public static final int VML_NUM_THREADS_OMP_FIXED_64 = 0x0000000000010000; +public static native @MemberGetter int VML_NUM_THREADS_OMP_AUTO_64(); +public static final int VML_NUM_THREADS_OMP_AUTO_64 = VML_NUM_THREADS_OMP_AUTO_64(); +public static native @MemberGetter int VML_NUM_THREADS_OMP_FIXED_64(); +public static final int VML_NUM_THREADS_OMP_FIXED_64 = VML_NUM_THREADS_OMP_FIXED_64(); /* // TBB partitioner control macros @@ -233810,9 +233924,12 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_TBB_PARTITIONER_STATIC = 0x00010000; public static final int VML_TBB_PARTITIONER_SIMPLE = 0x00020000; -public static final int VML_TBB_PARTITIONER_AUTO_64 = 0x0000000000000000; -public static final int VML_TBB_PARTITIONER_STATIC_64 = 0x0000000000010000; -public static final int VML_TBB_PARTITIONER_SIMPLE_64 = 0x0000000000020000; +public static native @MemberGetter int VML_TBB_PARTITIONER_AUTO_64(); +public static final int VML_TBB_PARTITIONER_AUTO_64 = VML_TBB_PARTITIONER_AUTO_64(); +public static native @MemberGetter int VML_TBB_PARTITIONER_STATIC_64(); +public static final int VML_TBB_PARTITIONER_STATIC_64 = VML_TBB_PARTITIONER_STATIC_64(); +public static native @MemberGetter int VML_TBB_PARTITIONER_SIMPLE_64(); +public static final int VML_TBB_PARTITIONER_SIMPLE_64 = VML_TBB_PARTITIONER_SIMPLE_64(); /* // FTZ & DAZ mode macros @@ -233828,9 +233945,12 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_FTZDAZ_OFF = 0x00140000; public static final int VML_FTZDAZ_CURRENT = 0x00000000; -public static final int VML_FTZDAZ_ON_64 = 0x0000000000280000; -public static final int VML_FTZDAZ_OFF_64 = 0x0000000000140000; -public static final int VML_FTZDAZ_CURRENT_64 = 0x0000000000000000; +public static native @MemberGetter int VML_FTZDAZ_ON_64(); +public static final int VML_FTZDAZ_ON_64 = VML_FTZDAZ_ON_64(); +public static native @MemberGetter int VML_FTZDAZ_OFF_64(); +public static final int VML_FTZDAZ_OFF_64 = VML_FTZDAZ_OFF_64(); +public static native @MemberGetter int VML_FTZDAZ_CURRENT_64(); +public static final int VML_FTZDAZ_CURRENT_64 = VML_FTZDAZ_CURRENT_64(); /* // Exception trap macros @@ -233844,10 +233964,14 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_TRAP_OVERFLOW = 0x04000000; public static final int VML_TRAP_UNDERFLOW = 0x08000000; -public static final int VML_TRAP_INVALID_64 = 0x0000000001000000; -public static final int VML_TRAP_DIVBYZERO_64 = 0x0000000002000000; -public static final int VML_TRAP_OVERFLOW_64 = 0x0000000004000000; -public static final int VML_TRAP_UNDERFLOW_64 = 0x0000000008000000; +public static native @MemberGetter int VML_TRAP_INVALID_64(); +public static final int VML_TRAP_INVALID_64 = VML_TRAP_INVALID_64(); +public static native @MemberGetter int VML_TRAP_DIVBYZERO_64(); +public static final int VML_TRAP_DIVBYZERO_64 = VML_TRAP_DIVBYZERO_64(); +public static native @MemberGetter int VML_TRAP_OVERFLOW_64(); +public static final int VML_TRAP_OVERFLOW_64 = VML_TRAP_OVERFLOW_64(); +public static native @MemberGetter int VML_TRAP_UNDERFLOW_64(); +public static final int VML_TRAP_UNDERFLOW_64 = VML_TRAP_UNDERFLOW_64(); /* // ACCURACY, FLOATING-POINT CONTROL, FTZDAZ AND ERROR HANDLING MASKS @@ -233877,15 +234001,24 @@ public static class USRFCNXS extends FunctionPointer { public static final int VML_FTZDAZ_MASK = 0x003C0000; public static final int VML_TRAP_EXCEPTIONS_MASK = 0x0F000000; -public static final int VML_ACCURACY_MASK_64 = 0x000000000000000F; -public static final int VML_FPUMODE_MASK_64 = 0x00000000000000F0; -public static final int VML_ERRMODE_MASK_64 = 0x000000000000FF00; -public static final int VML_ERRMODE_STDHANDLER_MASK_64 = 0x0000000000002F00; -public static final int VML_ERRMODE_CALLBACK_MASK_64 = 0x0000000000001000; -public static final int VML_NUM_THREADS_OMP_MASK_64 = 0x0000000000030000; -public static final int VML_TBB_PARTITIONER_MASK_64 = 0x0000000000030000; -public static final int VML_FTZDAZ_MASK_64 = 0x00000000003C0000; -public static final int VML_TRAP_EXCEPTIONS_MASK_64 = 0x000000000F000000; +public static native @MemberGetter int VML_ACCURACY_MASK_64(); +public static final int VML_ACCURACY_MASK_64 = VML_ACCURACY_MASK_64(); +public static native @MemberGetter int VML_FPUMODE_MASK_64(); +public static final int VML_FPUMODE_MASK_64 = VML_FPUMODE_MASK_64(); +public static native @MemberGetter int VML_ERRMODE_MASK_64(); +public static final int VML_ERRMODE_MASK_64 = VML_ERRMODE_MASK_64(); +public static native @MemberGetter int VML_ERRMODE_STDHANDLER_MASK_64(); +public static final int VML_ERRMODE_STDHANDLER_MASK_64 = VML_ERRMODE_STDHANDLER_MASK_64(); +public static native @MemberGetter int VML_ERRMODE_CALLBACK_MASK_64(); +public static final int VML_ERRMODE_CALLBACK_MASK_64 = VML_ERRMODE_CALLBACK_MASK_64(); +public static native @MemberGetter int VML_NUM_THREADS_OMP_MASK_64(); +public static final int VML_NUM_THREADS_OMP_MASK_64 = VML_NUM_THREADS_OMP_MASK_64(); +public static native @MemberGetter int VML_TBB_PARTITIONER_MASK_64(); +public static final int VML_TBB_PARTITIONER_MASK_64 = VML_TBB_PARTITIONER_MASK_64(); +public static native @MemberGetter int VML_FTZDAZ_MASK_64(); +public static final int VML_FTZDAZ_MASK_64 = VML_FTZDAZ_MASK_64(); +public static native @MemberGetter int VML_TRAP_EXCEPTIONS_MASK_64(); +public static final int VML_TRAP_EXCEPTIONS_MASK_64 = VML_TRAP_EXCEPTIONS_MASK_64(); /* // ERROR STATUS MACROS diff --git a/numpy/README.md b/numpy/README.md index 467262dcd8e..ebeb065cfc3 100644 --- a/numpy/README.md +++ b/numpy/README.md @@ -55,7 +55,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/numpy/samples/pom.xml b/numpy/samples/pom.xml index 1d6e4031cef..bbc76018ee1 100644 --- a/numpy/samples/pom.xml +++ b/numpy/samples/pom.xml @@ -19,7 +19,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/opencv/README.md b/opencv/README.md index 5c17dba09ec..6acc688ef52 100644 --- a/opencv/README.md +++ b/opencv/README.md @@ -70,7 +70,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/opencv/samples/pom.xml b/opencv/samples/pom.xml index 587f109d2cf..66083df6add 100644 --- a/opencv/samples/pom.xml +++ b/opencv/samples/pom.xml @@ -33,7 +33,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/platform/pom.xml b/platform/pom.xml index 830ba19b126..80fcbe436f7 100644 --- a/platform/pom.xml +++ b/platform/pom.xml @@ -172,7 +172,7 @@ org.bytedeco mkl-platform - 2024.2-${project.version} + 2025.0-${project.version} @@ -232,7 +232,7 @@ org.bytedeco llvm-platform - 19.1.2-${project.version} + 19.1.3-${project.version} org.bytedeco @@ -292,7 +292,7 @@ org.bytedeco pytorch-platform - 2.5.0-${project.version} + 2.5.1-${project.version} org.bytedeco @@ -317,7 +317,7 @@ org.bytedeco tritonserver-platform - 2.50.0-${project.version} + 2.51.0-${project.version} diff --git a/pytorch/README.md b/pytorch/README.md index 5ecaba34d7b..6147925d113 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -9,7 +9,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * PyTorch 2.5.0 https://pytorch.org/ + * PyTorch 2.5.1 https://pytorch.org/ Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -48,14 +48,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco pytorch-platform - 2.5.0-1.5.11-SNAPSHOT + 2.5.1-1.5.11-SNAPSHOT org.bytedeco pytorch-platform-gpu - 2.5.0-1.5.11-SNAPSHOT + 2.5.1-1.5.11-SNAPSHOT @@ -69,7 +69,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/pytorch/cppbuild.sh b/pytorch/cppbuild.sh index 7434bfa9eeb..ab299ca92df 100755 --- a/pytorch/cppbuild.sh +++ b/pytorch/cppbuild.sh @@ -38,7 +38,7 @@ if [[ $PLATFORM == windows* ]]; then export PYTHON_BIN_PATH=$(which python.exe) fi -PYTORCH_VERSION=2.5.0 +PYTORCH_VERSION=2.5.1 export PYTORCH_BUILD_VERSION="$PYTORCH_VERSION" export PYTORCH_BUILD_NUMBER=1 diff --git a/pytorch/platform/gpu/pom.xml b/pytorch/platform/gpu/pom.xml index 0d8bd29ae81..ed09efb6122 100644 --- a/pytorch/platform/gpu/pom.xml +++ b/pytorch/platform/gpu/pom.xml @@ -12,7 +12,7 @@ org.bytedeco pytorch-platform-gpu - 2.5.0-${project.parent.version} + 2.5.1-${project.parent.version} JavaCPP Presets Platform GPU for PyTorch diff --git a/pytorch/platform/pom.xml b/pytorch/platform/pom.xml index 4ff46efa3fd..1aa3be3cc72 100644 --- a/pytorch/platform/pom.xml +++ b/pytorch/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco pytorch-platform - 2.5.0-${project.parent.version} + 2.5.1-${project.parent.version} JavaCPP Presets Platform for PyTorch diff --git a/pytorch/pom.xml b/pytorch/pom.xml index 575b759fa79..9a8baf0c921 100644 --- a/pytorch/pom.xml +++ b/pytorch/pom.xml @@ -11,7 +11,7 @@ org.bytedeco pytorch - 2.5.0-${project.parent.version} + 2.5.1-${project.parent.version} JavaCPP Presets for PyTorch diff --git a/pytorch/samples/pom.xml b/pytorch/samples/pom.xml index 97d4fb103ac..8a1fc21f8b2 100644 --- a/pytorch/samples/pom.xml +++ b/pytorch/samples/pom.xml @@ -12,14 +12,14 @@ org.bytedeco pytorch-platform - 2.5.0-1.5.11-SNAPSHOT + 2.5.1-1.5.11-SNAPSHOT org.bytedeco pytorch-platform-gpu - 2.5.0-1.5.11-SNAPSHOT + 2.5.1-1.5.11-SNAPSHOT @@ -33,7 +33,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/scipy/README.md b/scipy/README.md index 67f4961b97e..73afe1937fb 100644 --- a/scipy/README.md +++ b/scipy/README.md @@ -55,7 +55,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/scipy/samples/pom.xml b/scipy/samples/pom.xml index bec4cdfad9d..8a80ba6b478 100644 --- a/scipy/samples/pom.xml +++ b/scipy/samples/pom.xml @@ -19,7 +19,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/tritonserver/README.md b/tritonserver/README.md index 3c88de03088..6fa8505c6f7 100644 --- a/tritonserver/README.md +++ b/tritonserver/README.md @@ -23,7 +23,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * Triton Inference Server 2.50.0 https://github.com/triton-inference-server/server + * Triton Inference Server 2.51.0 https://github.com/triton-inference-server/server Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -51,9 +51,9 @@ This sample intends to show how to call the Java-mapped C API of Triton to execu 1. Get the source code of Triton Inference Server to prepare the model repository: ```bash - $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.50.0.tar.gz - $ tar zxvf v2.50.0.tar.gz - $ cd server-2.50.0/docs/examples/model_repository + $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.51.0.tar.gz + $ tar zxvf v2.51.0.tar.gz + $ cd server-2.51.0/docs/examples/model_repository $ mkdir models $ cd models; cp -a ../simple . ``` @@ -61,7 +61,7 @@ Now, this `models` directory will be our model repository. 2. Start the Docker container to run the sample (assuming we are under the `models` directory created above): ```bash - $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.09-py3 bash + $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:24.10-py3 bash $ apt update $ apt install -y openjdk-11-jdk $ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh index 611be9089e6..4c8f52ae41f 100755 --- a/tritonserver/cppbuild.sh +++ b/tritonserver/cppbuild.sh @@ -11,9 +11,9 @@ INCLUDE_DEVELOPER_TOOLS_SERVER=${INCLUDE_DEVELOPER_TOOLS_SERVER:=1} if [[ ! -f "/opt/tritonserver/include/triton/developer_tools/generic_server_wrapper.h" ]] && [[ ! -f "/opt/tritonserver/lib/libtritondevelopertoolsserver.so" ]] && [[ ${INCLUDE_DEVELOPER_TOOLS_SERVER} -ne 0 ]]; then TOOLS_BRANCH=${TOOLS_BRANCH:="https://github.com/triton-inference-server/developer_tools.git"} - TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.09"} + TOOLS_BRANCH_TAG=${TOOLS_BRANCH_TAG:="r24.10"} TRITON_CORE_REPO=${TRITON_CORE_REPO:="https://github.com/triton-inference-server/core.git"} - TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.09"} + TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG="r24.10"} TRITON_HOME="/opt/tritonserver" BUILD_HOME="$PWD"/tritonbuild mkdir -p ${BUILD_HOME} && cd ${BUILD_HOME} diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml index 82851eac3d8..82d85d45c6c 100644 --- a/tritonserver/platform/pom.xml +++ b/tritonserver/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.50.0-${project.parent.version} + 2.51.0-${project.parent.version} JavaCPP Presets Platform for Triton Inference Server diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml index 638848272ad..8f45eb75249 100644 --- a/tritonserver/platform/redist/pom.xml +++ b/tritonserver/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform-redist - 2.50.0-${project.parent.version} + 2.51.0-${project.parent.version} JavaCPP Presets Platform Redist for Triton Inference Server diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml index 5c6b326cf94..ba6f246af0f 100644 --- a/tritonserver/pom.xml +++ b/tritonserver/pom.xml @@ -11,7 +11,7 @@ org.bytedeco tritonserver - 2.50.0-${project.parent.version} + 2.51.0-${project.parent.version} JavaCPP Presets for Triton Inference Server diff --git a/tritonserver/samples/simple/pom.xml b/tritonserver/samples/simple/pom.xml index 817cab07f5a..9d71732ad40 100644 --- a/tritonserver/samples/simple/pom.xml +++ b/tritonserver/samples/simple/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.50.0-1.5.11-SNAPSHOT + 2.51.0-1.5.11-SNAPSHOT shaded diff --git a/tritonserver/samples/simplecpp/pom.xml b/tritonserver/samples/simplecpp/pom.xml index fa631c00b08..a790be743b0 100644 --- a/tritonserver/samples/simplecpp/pom.xml +++ b/tritonserver/samples/simplecpp/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tritonserver-platform - 2.50.0-1.5.11-SNAPSHOT + 2.51.0-1.5.11-SNAPSHOT shaded diff --git a/tritonserver/samples/unsupported/pom.xml b/tritonserver/samples/unsupported/pom.xml index b3a891ddcdc..b4b253724c5 100644 --- a/tritonserver/samples/unsupported/pom.xml +++ b/tritonserver/samples/unsupported/pom.xml @@ -23,7 +23,7 @@ org.bytedeco tritonserver-platform - 2.50.0-1.5.11-SNAPSHOT + 2.51.0-1.5.11-SNAPSHOT shaded diff --git a/tvm/README.md b/tvm/README.md index 2ae70e56309..04355eb772c 100644 --- a/tvm/README.md +++ b/tvm/README.md @@ -70,7 +70,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT diff --git a/tvm/cppbuild.sh b/tvm/cppbuild.sh index 3548a565e33..6dfaba124af 100755 --- a/tvm/cppbuild.sh +++ b/tvm/cppbuild.sh @@ -202,7 +202,7 @@ cp -a 3rdparty/dlpack/include/dlpack 3rdparty/dmlc-core/include/dmlc ../include # Adjust the directory structure a bit to facilitate packaging in JAR file mkdir -p ../python -export MODULES=(attr cloudpickle decorator ml_dtypes psutil synr typed_ast tornado typing_extensions tvm) +export MODULES=(attr cloudpickle decorator ml_dtypes packaging psutil synr typed_ast tornado typing_extensions tvm) for MODULE in ${MODULES[@]}; do mkdir -p ../python/$MODULE.egg-info cp -r $PYTHON_INSTALL_PATH/$MODULE*/$MODULE* ../python/ || true diff --git a/tvm/platform/gpu/pom.xml b/tvm/platform/gpu/pom.xml index c811107d28b..6a803d3b8e4 100644 --- a/tvm/platform/gpu/pom.xml +++ b/tvm/platform/gpu/pom.xml @@ -29,7 +29,7 @@ org.bytedeco llvm-platform - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} org.bytedeco diff --git a/tvm/platform/pom.xml b/tvm/platform/pom.xml index 8b3da48192b..f215f56bd77 100644 --- a/tvm/platform/pom.xml +++ b/tvm/platform/pom.xml @@ -28,7 +28,7 @@ org.bytedeco llvm-platform - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} org.bytedeco diff --git a/tvm/pom.xml b/tvm/pom.xml index fb3e5ba4f24..dc418aac99d 100644 --- a/tvm/pom.xml +++ b/tvm/pom.xml @@ -23,7 +23,7 @@ org.bytedeco llvm - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} org.bytedeco @@ -61,7 +61,7 @@ org.bytedeco llvm-platform - 19.1.2-${project.parent.version} + 19.1.3-${project.parent.version} org.bytedeco diff --git a/tvm/samples/pom.xml b/tvm/samples/pom.xml index 1299336d265..9c59499ccfd 100644 --- a/tvm/samples/pom.xml +++ b/tvm/samples/pom.xml @@ -33,7 +33,7 @@ org.bytedeco mkl-platform-redist - 2024.2-1.5.11-SNAPSHOT + 2025.0-1.5.11-SNAPSHOT