From 3b1c814cd1b7c7579ff9855082f41158f6f52e21 Mon Sep 17 00:00:00 2001 From: compilade <113953597+compilade@users.noreply.github.com> Date: Thu, 28 Mar 2024 08:05:54 -0400 Subject: [PATCH] llama : fix command-r inference when omitting outputs (#6367) --- llama.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 892d46fbcfcec..77ec9b7a1935d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9152,8 +9152,9 @@ struct llm_build_context { if (il == n_layer - 1) { // skip computing output for unused tokens struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); } struct ggml_tensor * attn_out = cur;