Skip to content

Commit

Permalink
llama.cpp: Include the changes from ggerganov#6122 to exclude the unu…
Browse files Browse the repository at this point in the history
…sed outputs of the last layers.
  • Loading branch information
root committed Mar 27, 2024
1 parent 3c0b830 commit e4a16f2
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions llama.cpp
Expand Up @@ -6525,6 +6525,13 @@ struct llm_build_context {
cb(cur, "kqv_out", il);
}

if (il == n_layer - 1) {
// skip computing output for unused tokens
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
}

struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
cb(ffn_inp, "ffn_inp", il);

Expand Down

0 comments on commit e4a16f2

Please sign in to comment.