llama : fix command-r inference when omitting outputs (ggerganov#6367)

hodlen · Apr 3, 2024 · a9a64ff · a9a64ff
1 parent c61f3cc
commit a9a64ff
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/llama.cpp b/llama.cpp
@@ -9152,8 +9152,9 @@ struct llm_build_context {
  if (il == n_layer - 1) {
  // skip computing output for unused tokens
  struct ggml_tensor * inp_out_ids = build_inp_out_ids();
- cur = ggml_get_rows(ctx0, cur, inp_out_ids);
- inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
+ cur = ggml_get_rows(ctx0, cur, inp_out_ids);
+ inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
+ ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids);
  }
 
  struct ggml_tensor * attn_out = cur;