Skip to content

Commit

Permalink
llama : fix falcon arch for tied output embeddings (ggerganov#4978)
Browse files Browse the repository at this point in the history
* falcon arch fix for tied output embeddings

* Update llama.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update llama.cpp

* Update llama.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update llama.cpp

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
  • Loading branch information
cmp-nct and ggerganov authored Jan 18, 2024
1 parent 9b6ea42 commit 57e2a7a
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3438,7 +3438,12 @@ static bool llm_load_tensors(
{
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd});
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
if (gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_OUTPUT, "weight").c_str()) >= 0) {
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
} else {
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // needs to be on GPU
ml.n_created--; // artificial tensor
}
}

for (int i = 0; i < n_layer; ++i) {
Expand Down

0 comments on commit 57e2a7a

Please sign in to comment.