Skip to content

Commit d9c7b61

Browse files
committed
make n_embd_v_gqa_* dependent on layer
1 parent 3241b3d commit d9c7b61

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14608,8 +14608,6 @@ static int llama_decode_internal(
1460814608

1460914609
const struct llama_hparams & hparams = model.hparams;
1461014610
const int64_t kv_head = kv_self.head;
14611-
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
14612-
const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
1461314611

1461414612
for (int i = 0; i < gf->n_nodes; i++) {
1461514613
ggml_tensor * node = gf->nodes[i];
@@ -14619,6 +14617,7 @@ static int llama_decode_internal(
1461914617
const char* k_prefix = "k_cache_view-";
1462014618
if (strncmp(node->src[1]->name, k_prefix, strlen(k_prefix)) == 0) {
1462114619
int il = atoi(node->src[1]->name + strlen(k_prefix)); // Layer index from name
14620+
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa(il);
1462214621
ggml_tensor * tmp_tensor = kv_self.k_l[il];
1462314622
size_t tmp_offset = (ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa))*kv_head;
1462414623
node->src[1]->data = static_cast<char*>(tmp_tensor->data) + tmp_offset;
@@ -14628,6 +14627,7 @@ static int llama_decode_internal(
1462814627
const char* v_prefix = "v_cache_view-";
1462914628
if (strncmp(node->src[1]->name, v_prefix, strlen(v_prefix)) == 0) {
1463014629
int il = atoi(node->src[1]->name + strlen(v_prefix)); // Layer index from name
14630+
const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa(il);
1463114631
ggml_tensor * tmp_tensor = kv_self.v_l[il];
1463214632
size_t tmp_offset;
1463314633
if (cparams.flash_attn) {

0 commit comments

Comments
 (0)