Skip to content

Commit 4a554ac

Browse files
committed
Revert "ggml : remove OpenCL (ggml-org#7735)"
This reverts commit 554c247.
1 parent 554c247 commit 4a554ac

21 files changed

+2639
-29
lines changed

.github/workflows/build.yml

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,8 @@ jobs:
688688

689689
env:
690690
OPENBLAS_VERSION: 0.3.23
691+
OPENCL_VERSION: 2023.04.17
692+
CLBLAST_VERSION: 1.6.0
691693
SDE_VERSION: 9.33.0-2024-01-07
692694
VULKAN_VERSION: 1.3.261.1
693695

@@ -704,6 +706,8 @@ jobs:
704706
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
705707
- build: 'avx512-x64'
706708
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709+
- build: 'clblast-x64'
710+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
707711
- build: 'openblas-x64'
708712
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
709713
- build: 'kompute-x64'
@@ -728,6 +732,27 @@ jobs:
728732
run: |
729733
git submodule update --init kompute
730734
735+
- name: Download OpenCL SDK
736+
id: get_opencl
737+
if: ${{ matrix.build == 'clblast-x64' }}
738+
run: |
739+
curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
740+
mkdir $env:RUNNER_TEMP/opencl
741+
tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
742+
743+
- name: Download CLBlast
744+
id: get_clblast
745+
if: ${{ matrix.build == 'clblast-x64' }}
746+
run: |
747+
curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
748+
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
749+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
750+
rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast
751+
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
752+
$txt = Get-Content -Path $f -Raw
753+
$txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
754+
}
755+
731756
- name: Download OpenBLAS
732757
id: get_openblas
733758
if: ${{ matrix.build == 'openblas-x64' }}
@@ -761,6 +786,13 @@ jobs:
761786
cmake -S . -B build ${{ matrix.defines }}
762787
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
763788
789+
- name: Add clblast.dll
790+
id: add_clblast_dll
791+
if: ${{ matrix.build == 'clblast-x64' }}
792+
run: |
793+
cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
794+
cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
795+
764796
- name: Add libopenblas.dll
765797
id: add_libopenblas_dll
766798
if: ${{ matrix.build == 'openblas-x64' }}
@@ -784,7 +816,7 @@ jobs:
784816
- name: Test
785817
id: cmake_test
786818
# not all machines have native AVX-512
787-
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
819+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'clblast-x64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
788820
run: |
789821
cd build
790822
ctest -L main -C Release --verbose --timeout 900
@@ -1039,7 +1071,7 @@ jobs:
10391071
# hypervisor: 'qemu'
10401072
# run: |
10411073
# sudo pkg update
1042-
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
1074+
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
10431075
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
10441076

10451077
release:

CMakeLists.txt

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for Flas
111111
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
112112
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
113113
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
114+
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
114115
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
115116
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
116117
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
@@ -501,6 +502,22 @@ if (LLAMA_RPC)
501502
set(GGML_SOURCES_RPC ggml-rpc.cpp)
502503
endif()
503504

505+
if (LLAMA_CLBLAST)
506+
find_package(CLBlast)
507+
if (CLBlast_FOUND)
508+
message(STATUS "CLBlast found")
509+
510+
set(GGML_HEADERS_OPENCL ggml-opencl.h)
511+
set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
512+
513+
add_compile_definitions(GGML_USE_CLBLAST)
514+
515+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
516+
else()
517+
message(WARNING "CLBlast not found")
518+
endif()
519+
endif()
520+
504521
if (LLAMA_VULKAN)
505522
find_package(Vulkan)
506523
if (Vulkan_FOUND)
@@ -1248,6 +1265,7 @@ add_library(ggml OBJECT
12481265
ggml-quants.c
12491266
ggml-quants.h
12501267
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1268+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
12511269
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
12521270
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
12531271
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
@@ -1335,9 +1353,8 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
13351353
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
13361354

13371355
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
1338-
"${GGML_HEADERS_CUDA}"
1339-
"${GGML_HEADERS_METAL}"
1340-
"${GGML_HEADERS_EXTRA}")
1356+
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1357+
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
13411358

13421359
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
13431360
install(TARGETS ggml PUBLIC_HEADER)

Makefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,23 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h
547547
$(NVCC_COMPILE)
548548
endif # LLAMA_CUDA
549549

550+
ifdef LLAMA_CLBLAST
551+
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
552+
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
553+
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
554+
555+
# Mac provides OpenCL as a framework
556+
ifeq ($(UNAME_S),Darwin)
557+
MK_LDFLAGS += -lclblast -framework OpenCL
558+
else
559+
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
560+
endif
561+
OBJS += ggml-opencl.o
562+
563+
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
564+
$(CXX) $(CXXFLAGS) -c $< -o $@
565+
endif # LLAMA_CLBLAST
566+
550567
ifdef LLAMA_VULKAN
551568
MK_CPPFLAGS += -DGGML_USE_VULKAN
552569
MK_LDFLAGS += -lvulkan

README-sycl.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based o
2929

3030
When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
3131

32-
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
32+
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, CLBlast etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
3333

3434
## News
3535

README.md

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ variety of hardware - locally and in the cloud.
7777
- AVX, AVX2 and AVX512 support for x86 architectures
7878
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
7979
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP)
80-
- Vulkan and SYCL backend support
80+
- Vulkan, SYCL, and (partial) OpenCL backend support
8181
- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity
8282

8383
Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has
@@ -371,11 +371,16 @@ In order to build llama.cpp you have four different options.
371371
3. Install compilation dependencies.
372372

373373
```bash
374-
sudo pkg install gmake automake autoconf pkgconf llvm15 openblas
374+
sudo pkg install gmake automake autoconf pkgconf llvm15 clinfo clover \
375+
opencl clblast openblas
375376
376377
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
377378
```
378379

380+
**Notes:** With this packages you can build llama.cpp with OPENBLAS and
381+
CLBLAST support for use OpenCL GPU acceleration in FreeBSD. Please read
382+
the instructions for use and activate this options in this document below.
383+
379384
### Homebrew
380385

381386
On Mac and Linux, the homebrew package manager can be used via
@@ -394,7 +399,7 @@ argument.
394399

395400
### BLAS Build
396401

397-
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS. There are currently several different BLAS implementations available for build and use:
402+
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:
398403
399404
- #### Accelerate Framework:
400405
@@ -548,6 +553,111 @@ Building the program with BLAS support may lead to some performance improvements
548553
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
549554
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
550555

556+
- #### CLBlast
557+
558+
OpenCL acceleration is provided by the matrix multiplication kernels from the [CLBlast](https://github.com/CNugteren/CLBlast) project and custom kernels for ggml that can generate tokens on the GPU.
559+
560+
You will need the [OpenCL SDK](https://github.com/KhronosGroup/OpenCL-SDK).
561+
- For Ubuntu, Debian, and Fedora the packages `opencl-headers`, `ocl-icd` may be needed.
562+
563+
- For Windows, a pre-built SDK is available on the [OpenCL Releases](https://github.com/KhronosGroup/OpenCL-SDK/releases) page.
564+
565+
- <details>
566+
<summary>Installing the OpenCL SDK from source</summary>
567+
568+
```sh
569+
git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git
570+
cd OpenCL-SDK
571+
cmake -B build -DBUILD_DOCS=OFF \
572+
-DBUILD_EXAMPLES=OFF \
573+
-DBUILD_TESTING=OFF \
574+
-DOPENCL_SDK_BUILD_SAMPLES=OFF \
575+
-DOPENCL_SDK_TEST_SAMPLES=OFF
576+
cmake --build build
577+
cmake --install build --prefix /some/path
578+
```
579+
</details>
580+
581+
##### Installing CLBlast
582+
583+
Pre-built CLBlast binaries may be found on the [CLBlast Releases](https://github.com/CNugteren/CLBlast/releases) page. For Unix variants, it may also be found in your operating system's packages.
584+
585+
Linux packaging:
586+
Fedora Linux:
587+
```bash
588+
sudo dnf install clblast
589+
```
590+
591+
Alternatively, they may be built from source.
592+
593+
- <details>
594+
<summary>Windows:</summary>
595+
596+
```cmd
597+
set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
598+
git clone https://github.com/CNugteren/CLBlast.git
599+
cd CLBlast
600+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
601+
cmake --build build --config Release
602+
cmake --install build --prefix C:/CLBlast
603+
```
604+
605+
(note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds)
606+
607+
- <details>
608+
<summary>Unix:</summary>
609+
610+
```sh
611+
git clone https://github.com/CNugteren/CLBlast.git
612+
cd CLBlast
613+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
614+
cmake --build build --config Release
615+
cmake --install build --prefix /some/path
616+
```
617+
618+
Where `/some/path` is where the built library will be installed (default is `/usr/local`).
619+
</details>
620+
621+
##### Building Llama with CLBlast
622+
623+
- Build with make:
624+
```sh
625+
make LLAMA_CLBLAST=1
626+
```
627+
- CMake (Unix):
628+
```sh
629+
cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
630+
cmake --build build --config Release
631+
```
632+
- CMake (Windows):
633+
```cmd
634+
set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
635+
git clone https://github.com/ggerganov/llama.cpp
636+
cd llama.cpp
637+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
638+
cmake --build build --config Release
639+
cmake --install build --prefix C:/LlamaCPP
640+
```
641+
642+
##### Running Llama with CLBlast
643+
644+
The CLBlast build supports `--gpu-layers|-ngl` like the CUDA version does.
645+
646+
To select the correct platform (driver) and device (GPU), you can use the environment variables `GGML_OPENCL_PLATFORM` and `GGML_OPENCL_DEVICE`.
647+
The selection can be a number (starting from 0) or a text string to search:
648+
649+
```sh
650+
GGML_OPENCL_PLATFORM=1 ./main ...
651+
GGML_OPENCL_DEVICE=2 ./main ...
652+
GGML_OPENCL_PLATFORM=Intel ./main ...
653+
GGML_OPENCL_PLATFORM=AMD GGML_OPENCL_DEVICE=1 ./main ...
654+
```
655+
656+
The default behavior is to find the first GPU device, but when it is an integrated GPU on a laptop, for instance, the selectors are useful.
657+
Using the variables it is possible to select a CPU-based driver as well, if so desired.
658+
659+
You can get a list of platforms and devices from the `clinfo -l` command, etc.
660+
551661
- #### Vulkan
552662
553663
**With docker**:

common/common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2844,6 +2844,7 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
28442844
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
28452845
fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false");
28462846
fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false");
2847+
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
28472848
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
28482849
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
28492850
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");

0 commit comments

Comments
 (0)