Skip to content

Commit

Permalink
llama : mark LLM_ARCH_STARCODER as full offload supported (ggerganov#…
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys authored Nov 5, 2023
1 parent c41ea36 commit 3d48f42
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5164,11 +5164,12 @@ static int llama_decode_internal(

// If all tensors can be run on the GPU then using more than 1 thread is detrimental.
const bool full_offload_supported =
model.arch == LLM_ARCH_LLAMA ||
model.arch == LLM_ARCH_BAICHUAN ||
model.arch == LLM_ARCH_FALCON ||
model.arch == LLM_ARCH_REFACT ||
model.arch == LLM_ARCH_MPT;
model.arch == LLM_ARCH_LLAMA ||
model.arch == LLM_ARCH_BAICHUAN ||
model.arch == LLM_ARCH_FALCON ||
model.arch == LLM_ARCH_REFACT ||
model.arch == LLM_ARCH_MPT ||
model.arch == LLM_ARCH_STARCODER;

const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 3;
if (ggml_cpu_has_cublas() && full_offload_supported && fully_offloaded) {
Expand Down

0 comments on commit 3d48f42

Please sign in to comment.