Skip to content

Commit 39bca8b

Browse files
committed
totally DID NOT miss those!
1 parent 5cb0ce3 commit 39bca8b

File tree

5 files changed

+62
-25
lines changed

5 files changed

+62
-25
lines changed

src/models/bailingmoe2.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,33 @@ llm_build_bailingmoe2::llm_build_bailingmoe2(const llama_model & model, const ll
7575
cb(cur, "ffn_norm", il);
7676

7777
if (static_cast<uint32_t>(il) < hparams.n_layer_dense_lead) {
78-
cur = build_ffn(cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL,
79-
model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
78+
cur = build_ffn(cur,
79+
model.layers[il].ffn_up, NULL, NULL,
80+
model.layers[il].ffn_gate, NULL, NULL,
81+
model.layers[il].ffn_down, NULL, NULL,
82+
NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
8083
cb(cur, "ffn_out", il);
8184
} else {
82-
ggml_tensor * moe_out = build_moe_ffn(
83-
cur, model.layers[il].ffn_gate_inp, model.layers[il].ffn_up_exps, model.layers[il].ffn_gate_exps,
84-
model.layers[il].ffn_down_exps, model.layers[il].ffn_exp_probs_b, n_expert, n_expert_used, LLM_FFN_SILU,
85-
hparams.expert_weights_norm, true, hparams.expert_weights_scale,
86-
(llama_expert_gating_func_type) hparams.expert_gating_func, il);
85+
ggml_tensor * moe_out = build_moe_ffn(cur,
86+
model.layers[il].ffn_gate_inp,
87+
model.layers[il].ffn_up_exps,
88+
model.layers[il].ffn_gate_exps,
89+
model.layers[il].ffn_down_exps,
90+
model.layers[il].ffn_exp_probs_b,
91+
n_expert, n_expert_used,
92+
LLM_FFN_SILU, hparams.expert_weights_norm,
93+
true, hparams.expert_weights_scale,
94+
(llama_expert_gating_func_type) hparams.expert_gating_func,
95+
il);
8796
cb(moe_out, "ffn_moe_out", il);
8897

8998
{
9099
ggml_tensor * ffn_shexp =
91-
build_ffn(cur, model.layers[il].ffn_up_shexp, NULL, NULL, model.layers[il].ffn_gate_shexp, NULL,
92-
NULL, model.layers[il].ffn_down_shexp, NULL, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
100+
build_ffn(cur,
101+
model.layers[il].ffn_up_shexp, NULL, NULL,
102+
model.layers[il].ffn_gate_shexp, NULL, NULL,
103+
model.layers[il].ffn_down_shexp, NULL, NULL,
104+
NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
93105
cb(ffn_shexp, "ffn_shexp", il);
94106

95107
cur = ggml_add(ctx0, moe_out, ffn_shexp);

src/models/deepseek2.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
200200
// FFN shared expert
201201
{
202202
ggml_tensor * ffn_shexp =
203-
build_ffn(cur, model.layers[il].ffn_up_shexp, NULL, NULL,
203+
build_ffn(cur,
204+
model.layers[il].ffn_up_shexp, NULL, NULL,
204205
model.layers[il].ffn_gate_shexp, NULL, NULL,
205206
model.layers[il].ffn_down_shexp, NULL, NULL,
206207
NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);

src/models/grovemoe.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,34 @@ llm_build_grovemoe::llm_build_grovemoe(const llama_model & model, const llm_grap
8282
cb(probs, "ffn_moe_logits", il);
8383

8484
ggml_tensor * moe_out =
85-
build_moe_ffn(cur, nullptr, model.layers[il].ffn_up_exps, model.layers[il].ffn_gate_exps,
86-
model.layers[il].ffn_down_exps, nullptr, n_expert, n_expert_used, LLM_FFN_SILU, true, false,
87-
0.0, LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il, probs);
85+
build_moe_ffn(cur,
86+
nullptr,
87+
model.layers[il].ffn_up_exps,
88+
model.layers[il].ffn_gate_exps,
89+
model.layers[il].ffn_down_exps,
90+
nullptr,
91+
n_expert, n_expert_used,
92+
LLM_FFN_SILU, true,
93+
false, 0.0,
94+
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
95+
il,
96+
probs);
8897
cb(moe_out, "ffn_moe_out", il);
8998
cur = moe_out;
9099

91100
// TODO: Only do the expert selection and weights once
92-
moe_out = build_moe_ffn(cur, nullptr, model.layers[il].ffn_up_chexps, model.layers[il].ffn_gate_chexps,
93-
model.layers[il].ffn_down_chexps, nullptr, n_chunk_expert,
94-
n_expert_used > n_chunk_expert ? n_chunk_expert : n_expert_used, LLM_FFN_SILU, true,
95-
false, 0.0, LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il, probs);
101+
moe_out = build_moe_ffn(cur,
102+
nullptr,
103+
model.layers[il].ffn_up_chexps,
104+
model.layers[il].ffn_gate_chexps,
105+
model.layers[il].ffn_down_chexps,
106+
nullptr,
107+
n_chunk_expert, n_expert_used > n_chunk_expert ? n_chunk_expert : n_expert_used,
108+
LLM_FFN_SILU, true,
109+
false, 0.0,
110+
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
111+
il,
112+
probs);
96113
cb(moe_out, "ffn_adj_moe_out", il);
97114

98115
cur = ggml_add(ctx0, cur, ggml_scale(ctx0, moe_out, hparams.expert_group_scale));

src/models/lfm2.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,22 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
5353
}
5454

5555
ggml_tensor * llm_build_lfm2::build_moe_feed_forward(ggml_tensor * cur, int il) const {
56-
return build_moe_ffn(cur, model.layers[il].ffn_gate_inp, model.layers[il].ffn_up_exps,
57-
model.layers[il].ffn_gate_exps, model.layers[il].ffn_down_exps,
58-
model.layers[il].ffn_exp_probs_b, n_expert, n_expert_used, LLM_FFN_SILU, true, false, 0.0,
59-
static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func), il);
56+
return build_moe_ffn(cur,
57+
model.layers[il].ffn_gate_inp, model.layers[il].ffn_up_exps,
58+
model.layers[il].ffn_gate_exps, model.layers[il].ffn_down_exps,
59+
model.layers[il].ffn_exp_probs_b, n_expert, n_expert_used, LLM_FFN_SILU, true, false, 0.0,
60+
static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func), il);
6061
}
6162

6263
ggml_tensor * llm_build_lfm2::build_dense_feed_forward(ggml_tensor * cur, int il) const {
6364
GGML_ASSERT(!model.layers[il].ffn_up_b);
6465
GGML_ASSERT(!model.layers[il].ffn_gate_b);
6566
GGML_ASSERT(!model.layers[il].ffn_down_b);
66-
return build_ffn(cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL,
67-
model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
67+
return build_ffn(cur,
68+
model.layers[il].ffn_up, NULL, NULL,
69+
model.layers[il].ffn_gate, NULL, NULL,
70+
model.layers[il].ffn_down, NULL, NULL,
71+
NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
6872
}
6973

7074
ggml_tensor * llm_build_lfm2::build_attn_block(ggml_tensor * cur,

src/models/llada.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,11 @@ llm_build_llada::llm_build_llada(const llama_model & model, const llm_graph_para
6969
cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
7070
cb(cur, "ffn_norm", il);
7171

72-
cur = build_ffn(cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL,
73-
model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
72+
cur = build_ffn(cur,
73+
model.layers[il].ffn_up, NULL, NULL,
74+
model.layers[il].ffn_gate, NULL, NULL,
75+
model.layers[il].ffn_down, NULL, NULL,
76+
NULL, LLM_FFN_SILU, LLM_FFN_PAR, il);
7477
cb(cur, "ffn_out", il);
7578

7679
cur = ggml_add(ctx0, cur, ffn_inp);

0 commit comments

Comments
 (0)