@@ -225,6 +225,7 @@ enum llm_arch {
225
225
LLM_ARCH_COMMAND_R,
226
226
LLM_ARCH_DBRX,
227
227
LLM_ARCH_OLMO,
228
+ LLM_ARCH_GRANITE,
228
229
LLM_ARCH_UNKNOWN,
229
230
};
230
231
@@ -261,6 +262,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
261
262
{ LLM_ARCH_COMMAND_R, "command-r" },
262
263
{ LLM_ARCH_DBRX, "dbrx" },
263
264
{ LLM_ARCH_OLMO, "olmo" },
265
+ { LLM_ARCH_GRANITE, "granite" },
264
266
{ LLM_ARCH_UNKNOWN, "(unknown)" },
265
267
};
266
268
@@ -1036,6 +1038,32 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
1036
1038
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1037
1039
},
1038
1040
},
1041
+ {
1042
+ LLM_ARCH_GRANITE,
1043
+ {
1044
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1045
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1046
+ { LLM_TENSOR_OUTPUT, "output" },
1047
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1048
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1049
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1053
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1054
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1055
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1056
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1057
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1058
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1059
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
1060
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
1061
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
1062
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1063
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1064
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1065
+ },
1066
+ },
1039
1067
{
1040
1068
LLM_ARCH_UNKNOWN,
1041
1069
{
@@ -4288,6 +4316,18 @@ static void llm_load_hparams(
4288
4316
default: model.type = e_model::MODEL_UNKNOWN;
4289
4317
}
4290
4318
} break;
4319
+ case LLM_ARCH_GRANITE:
4320
+ {
4321
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
4322
+
4323
+ switch (hparams.n_layer) {
4324
+ case 32: model.type = e_model::MODEL_3B; break;
4325
+ case 36: model.type = e_model::MODEL_8B; break;
4326
+ case 52: model.type = e_model::MODEL_20B; break;
4327
+ case 88: model.type = e_model::MODEL_34B; break;
4328
+ default: model.type = e_model::MODEL_UNKNOWN;
4329
+ }
4330
+ } break;
4291
4331
default: (void)0;
4292
4332
}
4293
4333
@@ -4397,6 +4437,9 @@ static void llm_load_vocab(
4397
4437
} else {
4398
4438
if (tokenizer_model == "gpt2") {
4399
4439
vocab.type = LLAMA_VOCAB_TYPE_BPE;
4440
+ if (model.arch == LLM_ARCH_LLAMA) {
4441
+ vocab.add_space_prefix = false;
4442
+ }
4400
4443
} else {
4401
4444
LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_model.c_str());
4402
4445
LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
@@ -4967,6 +5010,7 @@ static bool llm_load_tensors(
4967
5010
case LLM_ARCH_LLAMA:
4968
5011
case LLM_ARCH_REFACT:
4969
5012
case LLM_ARCH_MINICPM:
5013
+ case LLM_ARCH_GRANITE:
4970
5014
{
4971
5015
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
4972
5016
@@ -10668,6 +10712,7 @@ static struct ggml_cgraph * llama_build_graph(
10668
10712
10669
10713
switch (model.arch) {
10670
10714
case LLM_ARCH_LLAMA:
10715
+ case LLM_ARCH_GRANITE:
10671
10716
{
10672
10717
result = llm.build_llama();
10673
10718
} break;
@@ -15811,6 +15856,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
15811
15856
case LLM_ARCH_PHI3:
15812
15857
case LLM_ARCH_GEMMA:
15813
15858
case LLM_ARCH_STARCODER2:
15859
+ case LLM_ARCH_GRANITE:
15814
15860
return LLAMA_ROPE_TYPE_NEOX;
15815
15861
15816
15862
// all model arches should be listed explicitly here
0 commit comments