File tree Expand file tree Collapse file tree 2 files changed +12
-0
lines changed Expand file tree Collapse file tree 2 files changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM"
155
155
option (GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON )
156
156
option (GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF )
157
157
option (GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT} )
158
+ set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
159
+ "ggml: cuda link binary compression mode; requires cuda 12.8+" )
160
+ set_property (CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size" )
158
161
159
162
option (GGML_HIP "ggml: use HIP" OFF )
160
163
option (GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF )
Original file line number Diff line number Diff line change @@ -102,6 +102,15 @@ if (CUDAToolkit_FOUND)
102
102
103
103
set (CUDA_FLAGS -use_fast_math)
104
104
105
+ if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8" )
106
+ # Options are:
107
+ # - none (not recommended)
108
+ # - speed (nvcc's default)
109
+ # - balance
110
+ # - size
111
+ list (APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE} )
112
+ endif ()
113
+
105
114
if (GGML_FATAL_WARNINGS)
106
115
list (APPEND CUDA_FLAGS -Werror all -warnings)
107
116
endif ()
You can’t perform that action at this time.
0 commit comments