From a225e350655e13db1f5ca15a334d35def51dbc55 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 27 Jun 2025 16:15:22 -0600 Subject: [PATCH] fix: Only call apply on child caches in the success state There are conditions where the two child conditions can end up with different status values based on the logic in the init_update constructor for llama_kv_cache_unified_context which can conditionally set status to either LLAMA_MEMORY_STATUS_SUCCESS or LLAMA_MEMORY_STATUS_NO_UPDATE. See full discussion: https://github.com/ggml-org/llama.cpp/pull/13550#issuecomment-3014200391 Branch: HybridCacheApplyLogic Signed-off-by: Gabe Goodhart --- src/llama-memory-hybrid.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp index 15cde98d138a8..3d8b2db46705e 100644 --- a/src/llama-memory-hybrid.cpp +++ b/src/llama-memory-hybrid.cpp @@ -222,8 +222,12 @@ bool llama_memory_hybrid_context::apply() { bool res = true; - res = res & ctx_attn->apply(); - res = res & ctx_recr->apply(); + if (ctx_attn->get_status() == LLAMA_MEMORY_STATUS_SUCCESS) { + res = res & ctx_attn->apply(); + } + if (ctx_recr->get_status() == LLAMA_MEMORY_STATUS_SUCCESS) { + res = res & ctx_recr->apply(); + } return res; }