Skip to content

Commit

Permalink
Update backtrack_encoder.rs
Browse files Browse the repository at this point in the history
  • Loading branch information
aneubeck committed Jul 17, 2024
1 parent 1d0d21b commit e8b7706
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion crates/bpe/src/backtrack_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,18 @@ impl<'a> BacktrackEncoder<'a> {
.map(|last_token| self.bpe.is_valid_token_pair(last_token, token))
.unwrap_or(true)
{
self.bitfield.clear(end_pos);
self.tokens.push(token);
self.pos = end_pos;
// In principle, we could in some cases reuse the leftmost longest match iterator.
// Especially when it has to look ahead, this could save scanning the input multiple times.
// But on average this seems to be slower due to the overhead of storing the iterator as part of the struct.
self.next_token = self.bpe.next_match(&self.text[end_pos..]);
break;
} else if let Some(shorter) = self.bpe.next_prefix(token) {
token = shorter;
} else {
// Clearing the bitfield when we pop tokens saves a little bit of work...
self.bitfield.clear(self.pos);
self.tokens.pop();
self.pos -= last.map(|t| self.bpe.token_len(t)).unwrap_or(0);
self.next_token = last;
Expand Down

0 comments on commit e8b7706

Please sign in to comment.