Skip to content

Commit 854b09f

Browse files
pwilkinCISC
andauthored
convert : move experts permutation from Qwen2MoeModel to Qwen3VLMoeTextModel (ggml-org#19445)
* Add special case for Qwen3VLMoe * Fix down path, remove arrows and checkmarks * ws * Moved to Qwen3VL * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent 66d403c commit 854b09f

1 file changed

Lines changed: 43 additions & 22 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4109,37 +4109,29 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
41094109
# Expected GGML ne: {n_embd, n_ff_exp, n_expert} for gate/up, {n_ff_exp, n_embd, n_expert} for down
41104110
if name.endswith("mlp.experts.down_proj") or name.endswith("mlp.experts.down_proj.weight"):
41114111
mapped = f"{name}.weight" if not name.endswith(".weight") else name
4112-
# Input: (n_expert=128, n_ff_exp=768, n_embd=2048)
4113-
# Want GGML ne: {n_ff_exp, n_embd, n_expert} = {768, 2048, 128}
4114-
# Need PyTorch: (128, 2048, 768) [reversed of GGML]
4115-
# So: permute(0, 2, 1): (128, 768, 2048) -> (128, 2048, 768)
4116-
permuted = data_torch.permute(0, 2, 1).contiguous()
4117-
yield from super().modify_tensors(permuted, mapped, bid)
4112+
# HF: [n_expert, n_embd, n_ff] -> GGML: {n_ff, n_embd, n_expert}
4113+
yield from super().modify_tensors(data_torch, mapped, bid)
41184114
return
41194115

41204116
if name.endswith("mlp.experts.gate_up_proj") or name.endswith("mlp.experts.gate_up_proj.weight"):
4121-
if data_torch.ndim < 3 or data_torch.shape[-1] % 2 != 0:
4117+
if data_torch.ndim < 3 or data_torch.shape[-2] % 2 != 0:
41224118
raise ValueError(f"Unexpected gate_up_proj shape for {name}: {tuple(data_torch.shape)}")
4123-
split_dim = data_torch.shape[-1] // 2
4124-
gate = data_torch[..., :split_dim].contiguous()
4125-
up = data_torch[..., split_dim:].contiguous()
4126-
# Input gate/up: (n_expert=128, n_embd=2048, n_ff_exp=768)
4127-
# Want GGML ne: {n_embd, n_ff_exp, n_expert} = {2048, 768, 128}
4128-
# Need PyTorch: (128, 768, 2048) [reversed of GGML]
4129-
# So: permute(0, 2, 1): (128, 2048, 768) -> (128, 768, 2048)
4130-
base_name = name.removesuffix(".weight")
4131-
base = base_name.rsplit('.', 1)[0]
4132-
mapped_gate = f"{base}.gate_proj.weight"
4133-
mapped_up = f"{base}.up_proj.weight"
4134-
perm_gate = gate.permute(0, 2, 1).contiguous()
4135-
perm_up = up.permute(0, 2, 1).contiguous()
4136-
yield from super().modify_tensors(perm_gate, mapped_gate, bid)
4137-
yield from super().modify_tensors(perm_up, mapped_up, bid)
4119+
# HF: [n_expert, 2*n_ff, n_embd] -> split on dim=-2
4120+
n_ff = data_torch.shape[-2] // 2
4121+
gate = data_torch[..., :n_ff, :].contiguous()
4122+
up = data_torch[..., n_ff:, :].contiguous()
4123+
# gate/up: [n_expert, n_ff, n_embd] -> GGML: {n_embd, n_ff, n_expert}
4124+
base_name = name.removesuffix(".weight").removesuffix(".gate_up_proj")
4125+
mapped_gate = f"{base_name}.gate_proj.weight"
4126+
mapped_up = f"{base_name}.up_proj.weight"
4127+
yield from super().modify_tensors(gate, mapped_gate, bid)
4128+
yield from super().modify_tensors(up, mapped_up, bid)
41384129
return
41394130

41404131
if name.startswith("mlp") or name.startswith("vision_model") or name.startswith("model.vision_tower") or name.startswith("model.multi_modal_projector") or name.startswith("model.visual"):
41414132
# skip visual tensors
41424133
return
4134+
41434135
if name.find("experts") != -1:
41444136
n_experts = self.hparams["num_experts"]
41454137
assert bid is not None
@@ -4535,6 +4527,35 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
45354527
if name.startswith("model.visual."):
45364528
return
45374529

4530+
# Qwen3VL has transposed packed tensors, so we treat it differently from general Qwen2MoE packed tensors
4531+
if name.endswith("mlp.experts.down_proj") or name.endswith("mlp.experts.down_proj.weight"):
4532+
name = name.replace("language_model.", "")
4533+
mapped = f"{name}.weight" if not name.endswith(".weight") else name
4534+
permuted = data_torch.permute(0, 2, 1).contiguous()
4535+
yield from ModelBase.modify_tensors(self, permuted, mapped, bid)
4536+
return
4537+
4538+
if name.endswith("mlp.experts.gate_up_proj") or name.endswith("mlp.experts.gate_up_proj.weight"):
4539+
name = name.replace("language_model.", "")
4540+
if data_torch.ndim < 3 or data_torch.shape[-1] % 2 != 0:
4541+
raise ValueError(f"Unexpected gate_up_proj shape for {name}: {tuple(data_torch.shape)}")
4542+
split_dim = data_torch.shape[-1] // 2
4543+
gate = data_torch[..., :split_dim].contiguous()
4544+
up = data_torch[..., split_dim:].contiguous()
4545+
# Input gate/up: (n_expert=128, n_embd=2048, n_ff_exp=768)
4546+
# Want GGML ne: {n_embd, n_ff_exp, n_expert} = {2048, 768, 128}
4547+
# Need PyTorch: (128, 768, 2048) [reversed of GGML]
4548+
# So: permute(0, 2, 1): (128, 2048, 768) -> (128, 768, 2048)
4549+
base_name = name.removesuffix(".weight")
4550+
base = base_name.rsplit('.', 1)[0]
4551+
mapped_gate = f"{base}.gate_proj.weight"
4552+
mapped_up = f"{base}.up_proj.weight"
4553+
perm_gate = gate.permute(0, 2, 1).contiguous()
4554+
perm_up = up.permute(0, 2, 1).contiguous()
4555+
yield from ModelBase.modify_tensors(self, perm_gate, mapped_gate, bid)
4556+
yield from ModelBase.modify_tensors(self, perm_up, mapped_up, bid)
4557+
return
4558+
45384559
yield from super().modify_tensors(data_torch, name, bid)
45394560

45404561

0 commit comments

Comments
 (0)