Skip to content

memory : correctly handle failure in apply() #14438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/llama-kv-cache-unified-iswa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {
}

bool llama_kv_cache_unified_iswa_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));

bool res = true;

Expand Down
2 changes: 1 addition & 1 deletion src/llama-kv-cache-unified.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1776,7 +1776,7 @@ bool llama_kv_cache_unified_context::next() {
}

bool llama_kv_cache_unified_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));

// no ubatches -> this is a KV cache update
if (ubatches.empty()) {
Expand Down
2 changes: 1 addition & 1 deletion src/llama-memory-hybrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {
}

bool llama_memory_hybrid_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));

bool res = true;

Expand Down
10 changes: 9 additions & 1 deletion src/llama-memory-recurrent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,15 @@ bool llama_memory_recurrent_context::next() {
}

bool llama_memory_recurrent_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));

// no ubatches -> this is an update
if (ubatches.empty()) {
// recurrent cache never performs updates
assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE);

return true;
}

mem->find_slot(ubatches[i_next]);

Expand Down
17 changes: 17 additions & 0 deletions src/llama-memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,20 @@ llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_me
// if either status has an update, then the combined status has an update
return has_update ? LLAMA_MEMORY_STATUS_SUCCESS : LLAMA_MEMORY_STATUS_NO_UPDATE;
}

bool llama_memory_status_is_fail(llama_memory_status status) {
switch (status) {
case LLAMA_MEMORY_STATUS_SUCCESS:
case LLAMA_MEMORY_STATUS_NO_UPDATE:
{
return false;
}
case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
{
return true;
}
}

return false;
}
3 changes: 3 additions & 0 deletions src/llama-memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ enum llama_memory_status {
// useful for implementing hybrid memory types (e.g. iSWA)
llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);

// helper function for checking if a memory status indicates a failure
bool llama_memory_status_is_fail(llama_memory_status status);

// the interface for managing the memory context during batch processing
// this interface is implemented per memory type. see:
// - llama_kv_cache_unified_context
Expand Down
Loading