@@ -75,21 +75,33 @@ llm_build_bailingmoe2::llm_build_bailingmoe2(const llama_model & model, const ll
7575 cb (cur, " ffn_norm" , il);
7676
7777 if (static_cast <uint32_t >(il) < hparams.n_layer_dense_lead ) {
78- cur = build_ffn (cur, model.layers [il].ffn_up , NULL , NULL , model.layers [il].ffn_gate , NULL , NULL ,
79- model.layers [il].ffn_down , NULL , NULL , NULL , LLM_FFN_SILU, LLM_FFN_PAR, il);
78+ cur = build_ffn (cur,
79+ model.layers [il].ffn_up , NULL , NULL ,
80+ model.layers [il].ffn_gate , NULL , NULL ,
81+ model.layers [il].ffn_down , NULL , NULL ,
82+ NULL , LLM_FFN_SILU, LLM_FFN_PAR, il);
8083 cb (cur, " ffn_out" , il);
8184 } else {
82- ggml_tensor * moe_out = build_moe_ffn (
83- cur, model.layers [il].ffn_gate_inp , model.layers [il].ffn_up_exps , model.layers [il].ffn_gate_exps ,
84- model.layers [il].ffn_down_exps , model.layers [il].ffn_exp_probs_b , n_expert, n_expert_used, LLM_FFN_SILU,
85- hparams.expert_weights_norm , true , hparams.expert_weights_scale ,
86- (llama_expert_gating_func_type) hparams.expert_gating_func , il);
85+ ggml_tensor * moe_out = build_moe_ffn (cur,
86+ model.layers [il].ffn_gate_inp ,
87+ model.layers [il].ffn_up_exps ,
88+ model.layers [il].ffn_gate_exps ,
89+ model.layers [il].ffn_down_exps ,
90+ model.layers [il].ffn_exp_probs_b ,
91+ n_expert, n_expert_used,
92+ LLM_FFN_SILU, hparams.expert_weights_norm ,
93+ true , hparams.expert_weights_scale ,
94+ (llama_expert_gating_func_type) hparams.expert_gating_func ,
95+ il);
8796 cb (moe_out, " ffn_moe_out" , il);
8897
8998 {
9099 ggml_tensor * ffn_shexp =
91- build_ffn (cur, model.layers [il].ffn_up_shexp , NULL , NULL , model.layers [il].ffn_gate_shexp , NULL ,
92- NULL , model.layers [il].ffn_down_shexp , NULL , NULL , NULL , LLM_FFN_SILU, LLM_FFN_PAR, il);
100+ build_ffn (cur,
101+ model.layers [il].ffn_up_shexp , NULL , NULL ,
102+ model.layers [il].ffn_gate_shexp , NULL , NULL ,
103+ model.layers [il].ffn_down_shexp , NULL , NULL ,
104+ NULL , LLM_FFN_SILU, LLM_FFN_PAR, il);
93105 cb (ffn_shexp, " ffn_shexp" , il);
94106
95107 cur = ggml_add (ctx0, moe_out, ffn_shexp);
0 commit comments