Skip to content

Commit

Permalink
server : fix kv cache management (#3588)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 12, 2023
1 parent b8fe4b5 commit 57dd55e
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,13 +405,17 @@ struct llama_server_context
// compare the evaluated prompt with the new prompt
n_past = common_part(embd, prompt_tokens);
embd = prompt_tokens;

if (n_past == num_prompt_tokens)
{
// we have to evaluate at least 1 token to generate logits.
printf("we have to evaluate at least 1 token to generate logits\n");
n_past--;
}

// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);

LOG_VERBOSE("prompt ingested", {
{"n_past", n_past},
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
Expand Down Expand Up @@ -461,16 +465,16 @@ struct llama_server_context
// compare the evaluated prompt with the new prompt
n_past = common_part(embd, prompt_tokens);

// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);

embd = prompt_tokens;
if (n_past == num_prompt_tokens)
{
// we have to evaluate at least 1 token to generate logits.
n_past--;
}

// since #3228 we now have to manually manage the KV cache
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);

LOG_VERBOSE("prompt ingested", {
{"n_past", n_past},
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
Expand Down

0 comments on commit 57dd55e

Please sign in to comment.