Skip to content

Commit 3678b83

Browse files
authored
llama : support GEGLU for jina-bert-v2 (ggml-org#14090)
1 parent 652b70e commit 3678b83

File tree

4 files changed

+9
-33
lines changed

4 files changed

+9
-33
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4798,25 +4798,6 @@ def prepare_tensors(self):
47984798
class JinaBertV2Model(BertModel):
47994799
model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
48004800

4801-
def __init__(self, *args, **kwargs):
4802-
super().__init__(*args, **kwargs)
4803-
self.intermediate_size = self.hparams["intermediate_size"]
4804-
4805-
def get_tensors(self):
4806-
for name, data in super().get_tensors():
4807-
if 'gated_layer' in name:
4808-
d1 = data[:self.intermediate_size, :]
4809-
name1 = name.replace('gated_layers', 'gated_layers_w')
4810-
name1 = name1.replace('up_gated_layer', 'gated_layers_v')
4811-
d2 = data[self.intermediate_size:, :]
4812-
name2 = name.replace('gated_layers', 'gated_layers_v')
4813-
name2 = name2.replace('up_gated_layer', 'gated_layers_w')
4814-
yield name1, d1
4815-
yield name2, d2
4816-
continue
4817-
4818-
yield name, data
4819-
48204801
def set_vocab(self):
48214802
tokenizer_class = 'BertTokenizer'
48224803
with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
@@ -4832,14 +4813,6 @@ def set_vocab(self):
48324813
self.gguf_writer.add_add_bos_token(True)
48334814
self.gguf_writer.add_add_eos_token(True)
48344815

4835-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
4836-
# if name starts with "bert.", remove the prefix
4837-
# e.g. https://huggingface.co/jinaai/jina-reranker-v1-tiny-en
4838-
if name.startswith("bert."):
4839-
name = name[5:]
4840-
4841-
return super().modify_tensors(data_torch, name, bid)
4842-
48434816

48444817
@ModelBase.register("OpenELMForCausalLM")
48454818
class OpenELMModel(TextModel):

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,9 @@ class TensorNameMap:
333333
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
334334
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
335335
"model.layers.{bid}.mlp.c_fc", # starcoder2
336-
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
336+
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2 (split up/gate, no longer used)
337+
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
338+
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
337339
"model.layers.{bid}.residual_mlp.w3", # arctic
338340
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
339341
"transformer.h.{bid}.mlp.c_fc_1", # exaone
@@ -370,7 +372,7 @@ class TensorNameMap:
370372
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
371373
"model.layers.{bid}.feed_forward.w1", # internlm2
372374
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
373-
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
375+
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
374376
"transformer.h.{bid}.mlp.linear_1", # refact
375377
"model.layers.{bid}.residual_mlp.w1", # arctic
376378
"transformer.h.{bid}.mlp.c_fc_0", # exaone

src/llama-graph.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ ggml_tensor * llm_graph_context::build_ffn(
650650
{
651651
// Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
652652
int64_t split_point = cur->ne[0] / 2;
653+
// TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
653654
ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0));
654655
ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur)));
655656

@@ -663,7 +664,7 @@ ggml_tensor * llm_graph_context::build_ffn(
663664
{
664665
// Split into two equal parts
665666
int64_t split_point = cur->ne[0] / 2;
666-
// TODO: these conts should not be needed
667+
// TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217
667668
ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0));
668669
ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur)));
669670

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2224,8 +2224,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22242224
layer.attn_norm_2 = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED);
22252225
layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
22262226

2227-
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
2228-
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
2227+
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
2228+
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
22292229

22302230
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
22312231
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
@@ -6043,7 +6043,7 @@ struct llm_build_bert : public llm_graph_context {
60436043
model.layers[il].ffn_gate, NULL, NULL,
60446044
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
60456045
NULL,
6046-
LLM_FFN_GELU, LLM_FFN_PAR, il);
6046+
model.layers[il].ffn_gate ? LLM_FFN_GELU : LLM_FFN_GEGLU, LLM_FFN_PAR, il);
60476047
cb(cur, "ffn_out", il);
60486048
} else {
60496049
cur = build_ffn(cur,

0 commit comments

Comments
 (0)