-
Notifications
You must be signed in to change notification settings - Fork 3
/
bench.rs
124 lines (112 loc) · 4.38 KB
/
bench.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use std::env;
use std::fs;
use memchr::memmem;
use stringzilla::StringZilla;
fn configure_bench() -> Criterion {
Criterion::default()
.sample_size(1000) // Test this many needles.
.warm_up_time(std::time::Duration::from_secs(10)) // Let the CPU frequencies settle.
.measurement_time(std::time::Duration::from_secs(120)) // Actual measurement time.
}
fn benchmarks(c: &mut Criterion) {
// Get the haystack path from the environment variable.
let haystack_path =
env::var("HAYSTACK_PATH").expect("HAYSTACK_PATH environment variable not set");
let haystack_content = fs::read_to_string(&haystack_path).expect("Could not read haystack");
// Tokenize the haystack content by white space.
let needles: Vec<&str> = haystack_content.split_whitespace().collect();
if needles.is_empty() {
panic!("No tokens found in the haystack.");
}
let haystack = haystack_content.as_bytes();
let haystack_length = haystack.len();
// Benchmarks for forward search
let mut g = c.benchmark_group("search-forward");
g.throughput(Throughput::Bytes(haystack_length as u64));
perform_forward_benchmarks(&mut g, &needles, haystack);
g.finish();
// Benchmarks for reverse search
let mut g = c.benchmark_group("search-reverse");
g.throughput(Throughput::Bytes(haystack_length as u64));
perform_reverse_benchmarks(&mut g, &needles, haystack);
g.finish();
}
fn perform_forward_benchmarks(
g: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
needles: &[&str],
haystack: &[u8],
) {
// Benchmark for StringZilla forward search
let mut token_index: usize = 0;
g.bench_function("stringzilla::find", |b| {
b.iter(|| {
let token = needles[token_index];
let token_bytes = token.as_bytes();
let mut pos: usize = 0;
while let Some(found) = (&haystack[pos..]).sz_find(token_bytes) {
pos += found + token_bytes.len();
}
token_index = (token_index + 1) % needles.len();
})
});
// Benchmark for memchr (forward search)
let mut token_index: usize = 0; // Reset token index for the next benchmark
g.bench_function("memmem::find", |b| {
b.iter(|| {
let token = needles[token_index];
let token_bytes = token.as_bytes();
let mut pos: usize = 0;
while let Some(found) = memmem::find(&haystack[pos..], token_bytes) {
pos += found + token_bytes.len();
}
token_index = (token_index + 1) % needles.len();
})
});
}
fn perform_reverse_benchmarks(
g: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
needles: &[&str],
haystack: &[u8],
) {
// Benchmark for StringZilla reverse search
let mut token_index: usize = 0;
g.bench_function("stringzilla::rfind", |b| {
b.iter(|| {
let token = needles[token_index];
let token_bytes = token.as_bytes();
let mut pos: Option<usize> = Some(haystack.len());
while let Some(end) = pos {
if let Some(found) = (&haystack[..end]).sz_rfind(token_bytes) {
pos = Some(found); // Update position to the start of the found token for the next search.
} else {
break; // No more occurrences found.
}
}
token_index = (token_index + 1) % needles.len();
})
});
// Benchmark for memchr reverse search
let mut token_index: usize = 0;
g.bench_function("memmem::rfind", |b| {
b.iter(|| {
let token = needles[token_index];
let token_bytes = token.as_bytes();
let mut pos: Option<usize> = Some(haystack.len());
while let Some(end) = pos {
if let Some(found) = memmem::rfind(&haystack[..end], token_bytes) {
pos = Some(found); // Update position to the start of the found token for the next search.
} else {
break; // No more occurrences found.
}
}
token_index = (token_index + 1) % needles.len();
})
});
}
criterion_group! {
name = sz_bench;
config = configure_bench();
targets = benchmarks
}
criterion_main!(sz_bench);