@@ -280,7 +280,7 @@ llama_context::llama_context(
280280 }
281281
282282 // reserve worst-case graph
283- if (!hparams.vocab_only && memory ) {
283+ if (!hparams.vocab_only ) {
284284 const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max ;
285285 const uint32_t n_tokens = std::min (cparams.n_ctx , cparams.n_ubatch );
286286
@@ -292,11 +292,13 @@ llama_context::llama_context(
292292 int n_splits_tg = -1 ;
293293 int n_nodes_tg = -1 ;
294294
295- // simulate full KV cache
296-
297- const auto mctx = memory->init_full ();
298- if (!mctx) {
299- throw std::runtime_error (" failed to initialize KV cache" );
295+ llama_memory_context_ptr mctx;
296+ if (memory) {
297+ LLAMA_LOG_DEBUG (" %s: reserving full memory module\n " , __func__);
298+ mctx = memory->init_full ();
299+ if (!mctx) {
300+ throw std::runtime_error (" failed to initialize memory module" );
301+ }
300302 }
301303
302304 cross.v_embd .clear ();
@@ -1056,7 +1058,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
10561058 const auto * res = process_ubatch (ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get (), status);
10571059
10581060 if (!res) {
1059- // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache
1061+ // the last ubatch failed or was aborted -> remove all positions of that ubatch from the memory module
10601062 llama_pos pos_min[LLAMA_MAX_SEQ];
10611063 for (int s = 0 ; s < LLAMA_MAX_SEQ; ++s) {
10621064 pos_min[s] = std::numeric_limits<llama_pos>::max ();
@@ -1073,7 +1075,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
10731075 continue ;
10741076 }
10751077
1076- LLAMA_LOG_WARN (" %s: removing KV cache entries for seq_id = %d, pos = [%d, +inf)\n " , __func__, s, pos_min[s]);
1078+ LLAMA_LOG_WARN (" %s: removing memory module entries for seq_id = %d, pos = [%d, +inf)\n " , __func__, s, pos_min[s]);
10771079
10781080 memory->seq_rm (s, pos_min[s], -1 );
10791081 }
@@ -1857,7 +1859,7 @@ size_t llama_context::state_write_data(llama_io_write_i & io) {
18571859 }
18581860
18591861 if (memory != nullptr ) {
1860- LLAMA_LOG_DEBUG (" %s: - writing KV self \n " , __func__);
1862+ LLAMA_LOG_DEBUG (" %s: - writing memory module \n " , __func__);
18611863 memory->state_write (io);
18621864 }
18631865
@@ -1943,7 +1945,7 @@ size_t llama_context::state_read_data(llama_io_read_i & io) {
19431945 }
19441946
19451947 if (memory) {
1946- LLAMA_LOG_DEBUG (" %s: - reading KV self \n " , __func__);
1948+ LLAMA_LOG_DEBUG (" %s: - reading memory module \n " , __func__);
19471949
19481950 memory->state_read (io);
19491951 }
0 commit comments