File tree Expand file tree Collapse file tree 2 files changed +5
-4
lines changed Expand file tree Collapse file tree 2 files changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -199,6 +199,9 @@ struct common_params_speculative {
199199 float p_split = 0 .1f ; // speculative decoding split probability
200200 float p_min = 0 .75f ; // minimum speculative decoding probability (greedy)
201201
202+ ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
203+ ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
204+
202205 struct cpu_params cpuparams;
203206 struct cpu_params cpuparams_batch;
204207
Original file line number Diff line number Diff line change @@ -1969,10 +1969,8 @@ struct server_context {
19691969 params_dft.n_ctx = params_base.speculative .n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_base.speculative .n_ctx ;
19701970 params_dft.n_gpu_layers = params_base.speculative .n_gpu_layers ;
19711971 params_dft.n_parallel = 1 ;
1972-
1973- // force F16 KV cache for the draft model for extra performance
1974- params_dft.cache_type_k = GGML_TYPE_F16;
1975- params_dft.cache_type_v = GGML_TYPE_F16;
1972+ params_dft.cache_type_k = params_base.speculative .cache_type_k ;
1973+ params_dft.cache_type_v = params_base.speculative .cache_type_v ;
19761974
19771975 llama_init_dft = common_init_from_params (params_dft);
19781976
You can’t perform that action at this time.
0 commit comments