Add support for CUMSUM and TRI for CUDA. (#17584) #1323
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release | |
| on: | |
| workflow_dispatch: # allows manual triggering | |
| inputs: | |
| create_release: | |
| description: 'Create new release' | |
| required: true | |
| type: boolean | |
| push: | |
| branches: | |
| - master | |
| paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" | |
| jobs: | |
| macOS-arm64: | |
| runs-on: macos-14 | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: macOS-latest-cmake-arm64 | |
| evict-old-files: 1d | |
| - name: Dependencies | |
| id: depends | |
| continue-on-error: true | |
| run: | | |
| brew update | |
| brew install curl | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| sysctl -a | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='@loader_path' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_METAL_USE_BF16=ON \ | |
| -DGGML_METAL_EMBED_LIBRARY=ON \ | |
| -DGGML_RPC=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -C ./build/bin . | |
| - name: Upload artifacts (zip) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip | |
| name: llama-bin-macos-arm64.zip | |
| - name: Upload artifacts (tar) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz | |
| name: llama-bin-macos-arm64.tar.gz | |
| macOS-x64: | |
| runs-on: macos-15-intel | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: macOS-latest-cmake-x64 | |
| evict-old-files: 1d | |
| - name: Dependencies | |
| id: depends | |
| continue-on-error: true | |
| run: | | |
| brew update | |
| brew install curl | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| sysctl -a | |
| # Metal is disabled due to intermittent failures with Github runners not having a GPU: | |
| # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='@loader_path' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_METAL=OFF \ | |
| -DGGML_RPC=ON \ | |
| -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -C ./build/bin . | |
| - name: Upload artifacts (zip) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip | |
| name: llama-bin-macos-x64.zip | |
| - name: Upload artifacts (tar) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz | |
| name: llama-bin-macos-x64.tar.gz | |
| ubuntu-22-cpu: | |
| strategy: | |
| matrix: | |
| include: | |
| - build: 'x64' | |
| os: ubuntu-22.04 | |
| - build: 's390x' | |
| os: ubuntu-24.04-s390x | |
| # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm | |
| # - build: 'arm64' | |
| # os: ubuntu-22.04-arm | |
| runs-on: ${{ matrix.os }} | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-cpu-cmake-${{ matrix.build }} | |
| evict-old-files: 1d | |
| - name: Dependencies | |
| id: depends | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install build-essential libcurl4-openssl-dev | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_BACKEND_DL=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CPU_ALL_VARIANTS=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/* | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz -C ./build/bin . | |
| - name: Upload artifacts (zip) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip | |
| name: llama-bin-ubuntu-${{ matrix.build }}.zip | |
| - name: Upload artifacts (tar) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz | |
| name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz | |
| ubuntu-22-vulkan: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-22-cmake-vulkan | |
| evict-old-files: 1d | |
| - name: Dependencies | |
| id: depends | |
| run: | | |
| wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - | |
| sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list | |
| sudo apt-get update -y | |
| sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_BACKEND_DL=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CPU_ALL_VARIANTS=ON \ | |
| -DGGML_VULKAN=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/* | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz -C ./build/bin . | |
| - name: Upload artifacts (zip) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip | |
| name: llama-bin-ubuntu-vulkan-x64.zip | |
| - name: Upload artifacts (tar) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz | |
| name: llama-bin-ubuntu-vulkan-x64.tar.gz | |
| windows-cpu: | |
| runs-on: windows-2025 | |
| strategy: | |
| matrix: | |
| include: | |
| - arch: 'x64' | |
| - arch: 'arm64' | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-latest-cmake-cpu-${{ matrix.arch }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Ninja | |
| run: | | |
| choco install ninja | |
| - name: libCURL | |
| id: get_libcurl | |
| uses: ./.github/actions/windows-setup-curl | |
| with: | |
| architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} | |
| - name: Build | |
| shell: cmd | |
| env: | |
| CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_BACKEND_DL=ON ^ | |
| -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ | |
| -DGGML_OPENMP=ON ^ | |
| -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| env: | |
| CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
| run: | | |
| Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ | |
| Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ | |
| 7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-bin-win-cpu-${{ matrix.arch }}.zip | |
| name: llama-bin-win-cpu-${{ matrix.arch }}.zip | |
| windows: | |
| runs-on: windows-2025 | |
| env: | |
| OPENBLAS_VERSION: 0.3.23 | |
| VULKAN_VERSION: 1.4.313.2 | |
| strategy: | |
| matrix: | |
| include: | |
| - backend: 'vulkan' | |
| arch: 'x64' | |
| defines: '-DGGML_VULKAN=ON' | |
| target: 'ggml-vulkan' | |
| - backend: 'opencl-adreno' | |
| arch: 'arm64' | |
| defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' | |
| target: 'ggml-opencl' | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Vulkan SDK | |
| id: get_vulkan | |
| if: ${{ matrix.backend == 'vulkan' }} | |
| run: | | |
| curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" | |
| & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install | |
| Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" | |
| Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" | |
| - name: Install Ninja | |
| id: install_ninja | |
| run: | | |
| choco install ninja | |
| - name: Install OpenCL Headers and Libs | |
| id: install_opencl | |
| if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }} | |
| run: | | |
| git clone https://github.com/KhronosGroup/OpenCL-Headers | |
| cd OpenCL-Headers | |
| cmake -B build ` | |
| -DBUILD_TESTING=OFF ` | |
| -DOPENCL_HEADERS_BUILD_TESTING=OFF ` | |
| -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` | |
| -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" | |
| cmake --build build --target install | |
| git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader | |
| cd OpenCL-ICD-Loader | |
| cmake -B build-arm64-release ` | |
| -A arm64 ` | |
| -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` | |
| -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" | |
| cmake --build build-arm64-release --target install --config release | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF | |
| cmake --build build --config Release --target ${{ matrix.target }} | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| 7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip | |
| name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip | |
| windows-cuda: | |
| runs-on: windows-2022 | |
| strategy: | |
| matrix: | |
| cuda: ['12.4'] | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| - name: Install ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-cuda-${{ matrix.cuda }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Cuda Toolkit | |
| uses: ./.github/actions/windows-setup-cuda | |
| with: | |
| cuda_version: ${{ matrix.cuda }} | |
| - name: Install Ninja | |
| id: install_ninja | |
| run: | | |
| choco install ninja | |
| - name: Build | |
| id: cmake_build | |
| shell: cmd | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -DGGML_BACKEND_DL=ON ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_CPU=OFF ^ | |
| -DGGML_CUDA=ON ^ | |
| -DLLAMA_CURL=OFF | |
| set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
| cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| 7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| - name: Copy and pack Cuda runtime | |
| run: | | |
| echo "Cuda install location: ${{ env.CUDA_PATH }}" | |
| $dst='.\build\bin\cudart\' | |
| robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* | |
| - name: Upload Cuda runtime | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| windows-sycl: | |
| runs-on: windows-2022 | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe | |
| WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel | |
| ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-latest-cmake-sycl | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install | |
| run: | | |
| scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL | |
| - name: Build | |
| id: cmake_build | |
| shell: cmd | |
| run: | | |
| call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force | |
| cmake -G "Ninja" -B build ^ | |
| -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^ | |
| -DCMAKE_BUILD_TYPE=Release ^ | |
| -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^ | |
| -DGGML_CPU=OFF -DGGML_SYCL=ON ^ | |
| -DLLAMA_CURL=OFF | |
| cmake --build build --target ggml-sycl -j | |
| - name: Build the release package | |
| id: pack_artifacts | |
| run: | | |
| echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" | |
| cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin | |
| cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin | |
| echo "cp oneAPI running time dll files to ./build/bin done" | |
| 7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/* | |
| - name: Upload the release package | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-bin-win-sycl-x64.zip | |
| name: llama-bin-win-sycl-x64.zip | |
| windows-hip: | |
| runs-on: windows-2022 | |
| env: | |
| HIPSDK_INSTALLER_VERSION: "25.Q3" | |
| strategy: | |
| matrix: | |
| include: | |
| - name: "radeon" | |
| gpu_targets: "gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| - name: Grab rocWMMA package | |
| id: grab_rocwmma | |
| run: | | |
| curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb" | |
| 7z x rocwmma.deb | |
| 7z x data.tar | |
| - name: Cache ROCm Installation | |
| id: cache-rocm | |
| uses: actions/cache@v4 | |
| with: | |
| path: C:\Program Files\AMD\ROCm | |
| key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 | |
| evict-old-files: 1d | |
| - name: Install ROCm | |
| if: steps.cache-rocm.outputs.cache-hit != 'true' | |
| id: depends | |
| run: | | |
| $ErrorActionPreference = "Stop" | |
| write-host "Downloading AMD HIP SDK Installer" | |
| Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" | |
| write-host "Installing AMD HIP SDK" | |
| $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru | |
| $completed = $proc.WaitForExit(600000) | |
| if (-not $completed) { | |
| Write-Error "ROCm installation timed out after 10 minutes. Killing the process" | |
| $proc.Kill() | |
| exit 1 | |
| } | |
| if ($proc.ExitCode -ne 0) { | |
| Write-Error "ROCm installation failed with exit code $($proc.ExitCode)" | |
| exit 1 | |
| } | |
| write-host "Completed AMD HIP SDK installation" | |
| - name: Verify ROCm | |
| id: verify | |
| run: | | |
| # Find and test ROCm installation | |
| $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 | |
| if (-not $clangPath) { | |
| Write-Error "ROCm installation not found" | |
| exit 1 | |
| } | |
| & $clangPath.FullName --version | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) | |
| $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" | |
| cmake -G "Unix Makefiles" -B build -S . ` | |
| -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` | |
| -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
| -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" ` | |
| -DCMAKE_BUILD_TYPE=Release ` | |
| -DGGML_BACKEND_DL=ON ` | |
| -DGGML_NATIVE=OFF ` | |
| -DGGML_CPU=OFF ` | |
| -DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" ` | |
| -DGGML_HIP_ROCWMMA_FATTN=ON ` | |
| -DGGML_HIP=ON ` | |
| -DLLAMA_CURL=OFF | |
| cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS} | |
| md "build\bin\rocblas\library\" | |
| md "build\bin\hipblaslt\library" | |
| cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" | |
| cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\" | |
| cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" | |
| cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" | |
| cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\" | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| 7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
| name: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
| ios-xcode-build: | |
| runs-on: macos-15 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Setup Xcode | |
| run: | | |
| sudo xcode-select -s /Applications/Xcode_16.4.app | |
| - name: Build | |
| id: cmake_build | |
| run: | | |
| sysctl -a | |
| cmake -B build -G Xcode \ | |
| -DGGML_METAL_USE_BF16=ON \ | |
| -DGGML_METAL_EMBED_LIBRARY=ON \ | |
| -DLLAMA_CURL=OFF \ | |
| -DLLAMA_BUILD_EXAMPLES=OFF \ | |
| -DLLAMA_BUILD_TOOLS=OFF \ | |
| -DLLAMA_BUILD_TESTS=OFF \ | |
| -DLLAMA_BUILD_SERVER=OFF \ | |
| -DCMAKE_SYSTEM_NAME=iOS \ | |
| -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ | |
| -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO | |
| - name: xcodebuild for swift package | |
| id: xcodebuild | |
| run: | | |
| ./build-xcframework.sh | |
| - name: Build Xcode project | |
| run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| id: pack_artifacts | |
| run: | | |
| zip -y -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz -C build-apple llama.xcframework | |
| - name: Upload artifacts (zip) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-xcframework.zip | |
| name: llama-${{ steps.tag.outputs.name }}-xcframework.zip | |
| - name: Upload artifacts (tar) | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz | |
| name: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz | |
| release: | |
| if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} | |
| # Fine-grant permission | |
| # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token | |
| permissions: | |
| contents: write # for creating release | |
| runs-on: ubuntu-latest | |
| needs: | |
| - windows | |
| - windows-cpu | |
| - windows-cuda | |
| - windows-sycl | |
| - windows-hip | |
| - ubuntu-22-cpu | |
| - ubuntu-22-vulkan | |
| - macOS-arm64 | |
| - macOS-x64 | |
| - ios-xcode-build | |
| steps: | |
| - name: Clone | |
| id: checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Download artifacts | |
| id: download-artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: ./artifact | |
| merge-multiple: true | |
| - name: Move artifacts | |
| id: move_artifacts | |
| run: | | |
| mkdir -p release | |
| echo "Adding CPU backend files to existing zips..." | |
| for arch in x64 arm64; do | |
| cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip" | |
| temp_dir=$(mktemp -d) | |
| echo "Extracting CPU backend for $arch..." | |
| unzip "$cpu_zip" -d "$temp_dir" | |
| echo "Adding CPU files to $arch zips..." | |
| for target_zip in artifact/llama-bin-win-*-${arch}.zip; do | |
| if [[ "$target_zip" == "$cpu_zip" ]]; then | |
| continue | |
| fi | |
| echo "Adding CPU backend to $(basename "$target_zip")" | |
| realpath_target_zip=$(realpath "$target_zip") | |
| (cd "$temp_dir" && zip -r "$realpath_target_zip" .) | |
| done | |
| rm -rf "$temp_dir" | |
| done | |
| echo "Renaming and moving zips to release..." | |
| for zip_file in artifact/llama-bin-win-*.zip; do | |
| base_name=$(basename "$zip_file" .zip) | |
| zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip" | |
| echo "Moving $zip_file to release/$zip_name" | |
| mv "$zip_file" "release/$zip_name" | |
| done | |
| echo "Moving other artifacts..." | |
| mv -v artifact/*.zip release | |
| mv -v artifact/*.tar.gz release | |
| - name: Create release | |
| id: create_release | |
| uses: ggml-org/action-create-release@v1 | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| with: | |
| tag_name: ${{ steps.tag.outputs.name }} | |
| body: | | |
| > [!WARNING] | |
| > **Release Format Update**: Linux releases will soon use .tar.gz archives instead of .zip. Please make the necessary changes to your deployment scripts. | |
| <details open> | |
| ${{ github.event.head_commit.message }} | |
| </details> | |
| **macOS/iOS:** | |
| - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) | |
| - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz) | |
| - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz) | |
| **Linux:** | |
| - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz) | |
| - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz) | |
| - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz) | |
| **Windows:** | |
| - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip) | |
| - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip) | |
| - [Windows x64 (CUDA)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) | |
| - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip) | |
| - [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip) | |
| - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip) | |
| - name: Upload release | |
| id: upload_release | |
| uses: actions/github-script@v3 | |
| with: | |
| github-token: ${{secrets.GITHUB_TOKEN}} | |
| script: | | |
| const path = require('path'); | |
| const fs = require('fs'); | |
| const release_id = '${{ steps.create_release.outputs.id }}'; | |
| for (let file of await fs.readdirSync('./release')) { | |
| if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) { | |
| console.log('uploadReleaseAsset', file); | |
| await github.repos.uploadReleaseAsset({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| release_id: release_id, | |
| name: file, | |
| data: await fs.readFileSync(`./release/${file}`) | |
| }); | |
| } | |
| } |