llama.cpp: Include the changes from ggerganov#6122 to exclude the unu…

…sed outputs of the last layers.
hxer7963 · Mar 27, 2024 · e4a16f2 · e4a16f2
1 parent 3c0b830
commit e4a16f2
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/llama.cpp b/llama.cpp
@@ -6525,6 +6525,13 @@ struct llm_build_context {
  cb(cur, "kqv_out", il);
  }
 
+ if (il == n_layer - 1) {
+ // skip computing output for unused tokens
+ struct ggml_tensor * inp_out_ids = build_inp_out_ids();
+ cur = ggml_get_rows(ctx0, cur, inp_out_ids);
+ inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
+ }
+
  struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
  cb(ffn_inp, "ffn_inp", il);