Skip to content

Commit

Permalink
Use the right tiktoken encoding for comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikvanantwerpen committed Sep 25, 2024
1 parent aa14609 commit 7bf5093
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ Cargo.lock
/target/
/crates/*/target/
/crates/*/Cargo.lock
.vscode/
.vscode/
7 changes: 3 additions & 4 deletions crates/bpe/benches/counting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ fn counting_benchmark(c: &mut Criterion) {
}

fn encoding_benchmark(c: &mut Criterion) {
for (name, bpe) in [
("cl100k", BytePairEncoding::cl100k()),
("o200k", BytePairEncoding::o200k()),
for (name, bpe, tiktoken) in [
("cl100k", BytePairEncoding::cl100k(), tiktoken_rs::cl100k_base().unwrap()),
("o200k", BytePairEncoding::o200k(), tiktoken_rs::o200k_base().unwrap()),
] {
let tiktoken = tiktoken_rs::cl100k_base().unwrap();
let text = create_test_string(&bpe, 20000);
let input = text.as_bytes();

Expand Down
2 changes: 2 additions & 0 deletions criterion.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# save report in this directory, even if a custom target directory is set
criterion_home = "./target/criterion"

0 comments on commit 7bf5093

Please sign in to comment.