Skip to content

Commit 4de7fb8

Browse files
CISCpwilkin
authored andcommitted
convert : enable expert group selection for all models with it (ggml-org#16691)
1 parent f515093 commit 4de7fb8

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,12 @@ def set_gguf_parameters(self):
742742
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
743743
self.gguf_writer.add_expert_used_count(n_experts_used)
744744
logger.info(f"gguf: experts used count = {n_experts_used}")
745+
if (n_expert_groups := self.hparams.get("n_group")) is not None:
746+
self.gguf_writer.add_expert_group_count(n_expert_groups)
747+
logger.info(f"gguf: expert groups count = {n_expert_groups}")
748+
if (n_group_used := self.hparams.get("topk_group")) is not None:
749+
self.gguf_writer.add_expert_group_used_count(n_group_used)
750+
logger.info(f"gguf: expert groups used count = {n_group_used}")
745751

746752
if (head_dim := self.hparams.get("head_dim")) is not None:
747753
self.gguf_writer.add_key_length(head_dim)
@@ -8233,8 +8239,6 @@ def set_gguf_parameters(self):
82338239
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
82348240
self.gguf_writer.add_expert_count(hparams["num_experts"])
82358241
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
8236-
self.gguf_writer.add_expert_group_count(hparams["n_group"])
8237-
self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
82388242
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
82398243

82408244
if hparams["score_function"] == "sigmoid":

src/llama-model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6371,6 +6371,8 @@ void llama_model::print_info() const {
63716371
LLAMA_LOG_INFO("%s: n_ff = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
63726372
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
63736373
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
6374+
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6375+
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
63746376
LLAMA_LOG_INFO("%s: causal attn = %d\n", __func__, hparams.causal_attn);
63756377
LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type);
63766378
LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type);
@@ -6471,8 +6473,6 @@ void llama_model::print_info() const {
64716473
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
64726474
LLAMA_LOG_INFO("%s: n_ff_shexp = %d\n", __func__, hparams.n_ff_shexp);
64736475
LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
6474-
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6475-
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
64766476
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
64776477
LLAMA_LOG_INFO("%s: expert_weights_norm = %d\n", __func__, hparams.expert_weights_norm);
64786478
LLAMA_LOG_INFO("%s: expert_gating_func = %s\n", __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));

0 commit comments

Comments
 (0)