@@ -15,26 +15,26 @@ adds support for the Solar Pro architecture
1515 7 files changed, 248 insertions(+)
1616
1717diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
18- index 1105139f..d9d5ec65 100644
18+ index dbf9774..eb6be95 100644
1919--- a/src/llama-arch.cpp
2020+++ b/src/llama-arch.cpp
21- @@ -75 ,6 +75 ,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
21+ @@ -77 ,6 +77 ,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
2222 { LLM_ARCH_GRANITE_MOE, "granitemoe" },
2323 { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
2424 { LLM_ARCH_CHAMELEON, "chameleon" },
2525+ { LLM_ARCH_SOLAR, "solar" },
2626 { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
2727 { LLM_ARCH_PLM, "plm" },
2828 { LLM_ARCH_BAILINGMOE, "bailingmoe" },
29- @@ -153 ,6 +154 ,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
29+ @@ -159 ,6 +160 ,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
3030 { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
3131 { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
3232 { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
3333+ { LLM_KV_ATTENTION_BLOCK_SKIP_CONNECTION, "%s.attention.block_skip_connection" },
3434 { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
3535 { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
3636
37- @@ -1697 ,6 +1699 ,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
37+ @@ -1755 ,6 +1757 ,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
3838 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
3939 },
4040 },
@@ -59,7 +59,7 @@ index 1105139f..d9d5ec65 100644
5959 {
6060 LLM_ARCH_WAVTOKENIZER_DEC,
6161 {
62- @@ -1981 ,6 +2001 ,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
62+ @@ -2123 ,6 +2143 ,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
6363 {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
6464 // this tensor is loaded for T5, but never used
6565 {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
@@ -68,26 +68,26 @@ index 1105139f..d9d5ec65 100644
6868 {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
6969 {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
7070diff --git a/src/llama-arch.h b/src/llama-arch.h
71- index a9dd188a..2cb0fd95 100644
71+ index 8267a8d..2983556 100644
7272--- a/src/llama-arch.h
7373+++ b/src/llama-arch.h
74- @@ -79 ,6 +79 ,7 @@ enum llm_arch {
74+ @@ -81 ,6 +81 ,7 @@ enum llm_arch {
7575 LLM_ARCH_GRANITE_MOE,
7676 LLM_ARCH_GRANITE_HYBRID,
7777 LLM_ARCH_CHAMELEON,
7878+ LLM_ARCH_SOLAR,
7979 LLM_ARCH_WAVTOKENIZER_DEC,
8080 LLM_ARCH_PLM,
8181 LLM_ARCH_BAILINGMOE,
82- @@ -157 ,6 +158 ,7 @@ enum llm_kv {
82+ @@ -163 ,6 +164 ,7 @@ enum llm_kv {
8383 LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
8484 LLM_KV_ATTENTION_SLIDING_WINDOW,
8585 LLM_KV_ATTENTION_SCALE,
8686+ LLM_KV_ATTENTION_BLOCK_SKIP_CONNECTION,
8787 LLM_KV_ATTENTION_KEY_LENGTH_MLA,
8888 LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
8989
90- @@ -380 ,6 +382 ,7 @@ enum llm_tensor {
90+ @@ -388 ,6 +390 ,7 @@ enum llm_tensor {
9191 LLM_TENSOR_ENC_OUTPUT_NORM,
9292 LLM_TENSOR_CLS,
9393 LLM_TENSOR_CLS_OUT,
@@ -96,10 +96,10 @@ index a9dd188a..2cb0fd95 100644
9696 LLM_TENSOR_CONVNEXT_DW,
9797 LLM_TENSOR_CONVNEXT_NORM,
9898diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp
99- index 86c814d5..f1c965b8 100644
99+ index 7a06368..35fc054 100644
100100--- a/src/llama-hparams.cpp
101101+++ b/src/llama-hparams.cpp
102- @@ -95 ,6 +95 ,14 @@ uint32_t llama_hparams::n_pos_per_embd() const {
102+ @@ -146 ,6 +146 ,14 @@ uint32_t llama_hparams::n_pos_per_embd() const {
103103 return rope_type == LLAMA_ROPE_TYPE_MROPE ? 4 : 1;
104104 }
105105
@@ -115,10 +115,10 @@ index 86c814d5..f1c965b8 100644
115115 if (il < n_layer) {
116116 return swa_layers[il];
117117diff --git a/src/llama-hparams.h b/src/llama-hparams.h
118- index 476d0a5e..906fa185 100644
118+ index 8b7e2a1..d5f673e 100644
119119--- a/src/llama-hparams.h
120120+++ b/src/llama-hparams.h
121- @@ -59 ,6 +59 ,8 @@ struct llama_hparams {
121+ @@ -61 ,6 +61 ,8 @@ struct llama_hparams {
122122 std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
123123 std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
124124
@@ -127,7 +127,7 @@ index 476d0a5e..906fa185 100644
127127 uint32_t n_layer_dense_lead = 0;
128128 uint32_t n_lora_q = 0;
129129 uint32_t n_lora_kv = 0;
130- @@ -201 ,6 +203 ,9 @@ struct llama_hparams {
130+ @@ -218 ,6 +220 ,9 @@ struct llama_hparams {
131131
132132 uint32_t n_pos_per_embd() const;
133133
@@ -138,7 +138,7 @@ index 476d0a5e..906fa185 100644
138138 };
139139
140140diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
141- index bd9e6da8..99ea20df 100644
141+ index bd9e6da..99ea20d 100644
142142--- a/src/llama-model-loader.cpp
143143+++ b/src/llama-model-loader.cpp
144144@@ -464,6 +464,7 @@ namespace GGUFMeta {
@@ -150,10 +150,10 @@ index bd9e6da8..99ea20df 100644
150150 llama_model_loader::llama_model_loader(
151151 const std::string & fname,
152152diff --git a/src/llama-model.cpp b/src/llama-model.cpp
153- index 8fc025af..35d7a4df 100644
153+ index e3aa9e6..20a7060 100644
154154--- a/src/llama-model.cpp
155155+++ b/src/llama-model.cpp
156- @@ -1567 ,6 +1567 ,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
156+ @@ -1648 ,6 +1648 ,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
157157 default: type = LLM_TYPE_UNKNOWN;
158158 }
159159 } break;
@@ -175,7 +175,7 @@ index 8fc025af..35d7a4df 100644
175175 case LLM_ARCH_WAVTOKENIZER_DEC:
176176 {
177177 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
178- @@ -4325 ,6 +4340 ,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
178+ @@ -4555 ,6 +4570 ,34 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
179179
180180 layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
181181
@@ -210,12 +210,12 @@ index 8fc025af..35d7a4df 100644
210210 layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
211211 layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
212212 layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
213- @@ -14369 ,6 +14412 ,165 @@ struct llm_build_granite_hybrid : public llm_graph_context_mamba {
213+ @@ -14925 ,6 +14968 ,165 @@ struct llm_build_granite_hybrid : public llm_graph_context_mamba {
214214 }
215215 };
216216
217217+ struct llm_build_solar : public llm_graph_context {
218- + llm_build_solar(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf ) : llm_graph_context(params) {
218+ + llm_build_solar(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
219219+ const int64_t n_embd_head = hparams.n_embd_head_v;
220220+ GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
221221+ GGML_ASSERT(n_embd_head == hparams.n_rot);
@@ -314,7 +314,7 @@ index 8fc025af..35d7a4df 100644
314314+ cb(Kcur, "Kcur", il);
315315+ cb(Vcur, "Vcur", il);
316316+
317- + cur = build_attn(inp_attn, gf,
317+ + cur = build_attn(inp_attn,
318318+ model.layers[il].wo, model.layers[il].bo,
319319+ Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
320320+ cb(cur, "attn_out", il);
@@ -376,18 +376,18 @@ index 8fc025af..35d7a4df 100644
376376 // ref: https://github.com/facebookresearch/chameleon
377377 // based on the original build_llama() function, changes:
378378 // * qk-norm
379- @@ -16225 ,6 +16427 ,10 @@ llm_graph_result_ptr llama_model::build_graph(
379+ @@ -17582 ,6 +17784 ,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
380380 {
381- llm = std::make_unique<llm_build_chameleon>(*this, params, gf );
381+ llm = std::make_unique<llm_build_chameleon>(*this, params);
382382 } break;
383383+ case LLM_ARCH_SOLAR:
384384+ {
385- + llm = std::make_unique<llm_build_solar>(*this, params, gf );
385+ + llm = std::make_unique<llm_build_solar>(*this, params);
386386+ } break;
387387 case LLM_ARCH_WAVTOKENIZER_DEC:
388388 {
389- llm = std::make_unique<llm_build_wavtokenizer_dec>(*this, params, gf );
390- @@ -16412 ,6 +16618 ,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
389+ llm = std::make_unique<llm_build_wavtokenizer_dec>(*this, params);
390+ @@ -17785 ,6 +17991 ,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
391391 case LLM_ARCH_GRANITE_MOE:
392392 case LLM_ARCH_GRANITE_HYBRID:
393393 case LLM_ARCH_CHAMELEON:
@@ -396,18 +396,18 @@ index 8fc025af..35d7a4df 100644
396396 case LLM_ARCH_NEO_BERT:
397397 case LLM_ARCH_SMOLLM3:
398398diff --git a/src/llama-model.h b/src/llama-model.h
399- index 431efbd5..05a9adfa 100644
399+ index 094e238..2692cf8 100644
400400--- a/src/llama-model.h
401401+++ b/src/llama-model.h
402- @@ -67 ,6 +67 ,7 @@ enum llm_type {
402+ @@ -70 ,6 +70 ,7 @@ enum llm_type {
403403 LLM_TYPE_15B,
404404 LLM_TYPE_16B,
405405 LLM_TYPE_20B,
406406+ LLM_TYPE_22B,
407407 LLM_TYPE_27B,
408408 LLM_TYPE_30B,
409409 LLM_TYPE_32B,
410- @@ -338 ,6 +339 ,8 @@ struct llama_layer {
410+ @@ -349 ,6 +350 ,8 @@ struct llama_layer {
411411 struct ggml_tensor * laurel_r = nullptr;
412412 struct ggml_tensor * laurel_post_norm = nullptr;
413413
0 commit comments