From a225e350655e13db1f5ca15a334d35def51dbc55 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Fri, 27 Jun 2025 16:15:22 -0600
Subject: [PATCH] fix: Only call apply on child caches in the success state

There are conditions where the two child conditions can end up with
different status values based on the logic in the init_update constructor
for llama_kv_cache_unified_context which can conditionally set status to
either LLAMA_MEMORY_STATUS_SUCCESS or LLAMA_MEMORY_STATUS_NO_UPDATE.

See full discussion:
https://github.com/ggml-org/llama.cpp/pull/13550#issuecomment-3014200391

Branch: HybridCacheApplyLogic

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 src/llama-memory-hybrid.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp
index 15cde98d138a8..3d8b2db46705e 100644
--- a/src/llama-memory-hybrid.cpp
+++ b/src/llama-memory-hybrid.cpp
@@ -222,8 +222,12 @@ bool llama_memory_hybrid_context::apply() {
 
     bool res = true;
 
-    res = res & ctx_attn->apply();
-    res = res & ctx_recr->apply();
+    if (ctx_attn->get_status() == LLAMA_MEMORY_STATUS_SUCCESS) {
+        res = res & ctx_attn->apply();
+    }
+    if (ctx_recr->get_status() == LLAMA_MEMORY_STATUS_SUCCESS) {
+        res = res & ctx_recr->apply();
+    }
 
     return res;
 }