Skip to content

Commit 02fe919

Browse files
Green-Skyarthw
authored andcommitted
CUDA: compress mode option and default to size (ggml-org#12029)
cuda 12.8 added the option to specify stronger compression for binaries, so we now default to "size".
1 parent 2c1f2eb commit 02fe919

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

ggml/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM"
155155
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
156156
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
157157
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
158+
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
159+
"ggml: cuda link binary compression mode; requires cuda 12.8+")
160+
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
158161

159162
option(GGML_HIP "ggml: use HIP" OFF)
160163
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)

ggml/src/ggml-cuda/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ if (CUDAToolkit_FOUND)
102102

103103
set(CUDA_FLAGS -use_fast_math)
104104

105+
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
106+
# Options are:
107+
# - none (not recommended)
108+
# - speed (nvcc's default)
109+
# - balance
110+
# - size
111+
list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE})
112+
endif()
113+
105114
if (GGML_FATAL_WARNINGS)
106115
list(APPEND CUDA_FLAGS -Werror all-warnings)
107116
endif()

0 commit comments

Comments
 (0)