File tree Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Original file line number Diff line number Diff line change @@ -5164,11 +5164,12 @@ static int llama_decode_internal(
51645164
51655165 // If all tensors can be run on the GPU then using more than 1 thread is detrimental.
51665166 const bool full_offload_supported =
5167- model.arch == LLM_ARCH_LLAMA ||
5168- model.arch == LLM_ARCH_BAICHUAN ||
5169- model.arch == LLM_ARCH_FALCON ||
5170- model.arch == LLM_ARCH_REFACT ||
5171- model.arch == LLM_ARCH_MPT;
5167+ model.arch == LLM_ARCH_LLAMA ||
5168+ model.arch == LLM_ARCH_BAICHUAN ||
5169+ model.arch == LLM_ARCH_FALCON ||
5170+ model.arch == LLM_ARCH_REFACT ||
5171+ model.arch == LLM_ARCH_MPT ||
5172+ model.arch == LLM_ARCH_STARCODER;
51725173
51735174 const bool fully_offloaded = model.n_gpu_layers >= (int ) hparams.n_layer + 3 ;
51745175 if (ggml_cpu_has_cublas () && full_offload_supported && fully_offloaded) {
You can’t perform that action at this time.
0 commit comments