Skip to content

Commit 3326dc4

Browse files
committed
llama : support running Mamba-Codestral-7B-v0.1
1 parent 40c0685 commit 3326dc4

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2843,6 +2843,10 @@ def set_gguf_parameters(self):
28432843
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
28442844
del bid # unused
28452845

2846+
if name.startswith("model.backbone") or name.startswith("model.lm_head"):
2847+
# map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2
2848+
name = name.removeprefix("model.")
2849+
28462850
if name.endswith(".dt_bias"):
28472851
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
28482852

src/llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9383,7 +9383,7 @@ static struct ggml_tensor * llm_build_mamba2(
93839383
// grouped RMS norm
93849384
y = ggml_reshape_4d(ctx, y, d_inner / n_group, n_group, n_seq_tokens, n_seqs);
93859385
y = llm_build_norm(ctx, y, hparams,
9386-
model.layers[il].ssm_norm, NULL,
9386+
ggml_reshape_2d(ctx, model.layers[il].ssm_norm, d_inner / n_group, n_group), NULL,
93879387
LLM_NORM_RMS, cb, il);
93889388
y = ggml_reshape_3d(ctx, y, d_inner, n_seq_tokens, n_seqs);
93899389

0 commit comments

Comments
 (0)