Skip to content

Commit 398ede5

Browse files
pcullitonslaren
andauthored
Adding Gemma 2 2B configs (#8784)
* Adding Gemma 2 2B configs Updates to Q scaling and Gemma 2 model sizes to match v2 2B model. * Update src/llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
1 parent 44d28dd commit 398ede5

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

src/llama.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4969,6 +4969,7 @@ static void llm_load_hparams(
49694969
hparams.attn_soft_cap = true;
49704970

49714971
switch (hparams.n_layer) {
4972+
case 26: model.type = e_model::MODEL_2B; break;
49724973
case 42: model.type = e_model::MODEL_9B; break;
49734974
case 46: model.type = e_model::MODEL_27B; break;
49744975
default: model.type = e_model::MODEL_UNKNOWN;
@@ -11736,6 +11737,7 @@ struct llm_build_context {
1173611737

1173711738
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
1173811739
switch (model.type) {
11740+
case e_model::MODEL_2B:
1173911741
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
1174011742
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
1174111743
default: GGML_ABORT("fatal error");

0 commit comments

Comments
 (0)