Skip to content

Commit f21d5cd

Browse files
committed
fix: Add missing padding to n_ctx for hybrid cache construction
Branch: GraniteFour Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 88f20c3 commit f21d5cd

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13227,13 +13227,17 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1322713227
std::max((uint32_t) 1, cparams.n_seq_max),
1322813228
cparams.n_seq_max);
1322913229
} else if (llm_arch_is_hybrid_recurrent(arch)) {
13230+
const auto padding = llama_kv_cache_unified::get_padding(cparams);
13231+
13232+
cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding);
13233+
1323013234
res = new llama_kv_cache_hybrid_recurrent(
1323113235
/* model */ *this,
1323213236
/* attn_type_k */ params.type_k,
1323313237
/* attn_type_v */ params.type_v,
1323413238
/* attn_v_trans */ !cparams.flash_attn,
1323513239
/* attn_kv_size */ cparams.n_ctx,
13236-
/* attn_n_pad */ llama_kv_cache_unified::get_padding(cparams),
13240+
/* attn_n_pad */ padding,
1323713241
/* attn_n_swa */ hparams.n_swa,
1323813242
/* attn_swa_type */ hparams.swa_type,
1323913243
/* recurrent_type_k */ GGML_TYPE_F32,

0 commit comments

Comments
 (0)