Skip to content

Commit e069bfd

Browse files
authored
Merge pull request #122 from fjall-rs/2.8.0
2.8.0
2 parents 0991f34 + a1a6e0f commit e069bfd

40 files changed

+1001
-345
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,5 @@ Cargo.lock
1414
*.pdb
1515

1616
.lsm.data
17-
.data
18-
/old_*
1917
.test*
20-
.block_index_test
2118
.bench

Cargo.toml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "lsm-tree"
33
description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs)"
44
license = "MIT OR Apache-2.0"
5-
version = "2.7.6"
5+
version = "2.8.0"
66
edition = "2021"
77
rust-version = "1.75.0"
88
readme = "README.md"
@@ -37,7 +37,7 @@ quick_cache = { version = "0.6.5", default-features = false, features = [] }
3737
rustc-hash = "2.0.0"
3838
self_cell = "1.0.4"
3939
tempfile = "3.12.0"
40-
value-log = { version = "=1.7.3", default-features = false, features = [] }
40+
value-log = { version = "~1.8", default-features = false, features = [] }
4141
varint-rs = "2.2.0"
4242
xxhash-rust = { version = "0.8.12", features = ["xxh3"] }
4343

@@ -101,3 +101,9 @@ name = "fd_table"
101101
harness = false
102102
path = "benches/fd_table.rs"
103103
required-features = []
104+
105+
[[bench]]
106+
name = "partition_point"
107+
harness = false
108+
path = "benches/partition_point.rs"
109+
required-features = []

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rus
1919
This is the most feature-rich LSM-tree implementation in Rust! It features:
2020

2121
- Thread-safe BTreeMap-like API
22-
- 100% safe & stable Rust
22+
- [99.9% safe](./UNSAFE.md) & stable Rust
2323
- Block-based tables with compression support
2424
- Range & prefix searching with forward and reverse iteration
2525
- Size-tiered, (concurrent) Leveled and FIFO compaction

UNSAFE.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Unsafe usage
2+
3+
Currently, the project itself only uses one **1** unsafe block (ignoring dependencies which are tested themselves separately):
4+
5+
- https://github.com/fjall-rs/lsm-tree/blob/2d8686e873369bd9c4ff2b562ed988c1cea38331/src/binary_search.rs#L23-L25

benches/partition_point.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use criterion::{criterion_group, criterion_main, Criterion};
2+
use lsm_tree::binary_search::partition_point;
3+
use rand::Rng;
4+
5+
fn bench_partition_point(c: &mut Criterion) {
6+
let mut group = c.benchmark_group("partition_point");
7+
8+
let mut rng = rand::rng();
9+
10+
for item_count in [10, 100, 1_000, 10_000, 100_000, 1_000_000] {
11+
let items = (0..item_count).collect::<Vec<_>>();
12+
13+
group.bench_function(format!("native {item_count}"), |b| {
14+
b.iter(|| {
15+
let needle = rng.random_range(0..item_count);
16+
items.partition_point(|&x| x <= needle)
17+
})
18+
});
19+
20+
group.bench_function(format!("rewrite {item_count}"), |b| {
21+
b.iter(|| {
22+
let needle = rng.random_range(0..item_count);
23+
partition_point(&items, |&x| x <= needle)
24+
})
25+
});
26+
}
27+
}
28+
29+
criterion_group!(benches, bench_partition_point);
30+
criterion_main!(benches);

fuzz/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
corpus
2+
artifacts

fuzz/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "lsm-tree-fuzz"
3+
version = "0.0.0"
4+
publish = false
5+
edition = "2021"
6+
7+
[package.metadata]
8+
cargo-fuzz = true
9+
10+
[dependencies]
11+
libfuzzer-sys = "0.4"
12+
lsm-tree = { path = ".." }
13+
14+
[[bin]]
15+
name = "partition_point"
16+
path = "fuzz_targets/partition_point.rs"
17+
test = false
18+
doc = false
19+
bench = false

fuzz/fuzz_targets/partition_point.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#![no_main]
2+
use libfuzzer_sys::{
3+
arbitrary::{Arbitrary, Unstructured},
4+
fuzz_target,
5+
};
6+
use lsm_tree::binary_search::partition_point;
7+
8+
fuzz_target!(|data: &[u8]| {
9+
let mut unstructured = Unstructured::new(data);
10+
11+
if let Ok(mut items) = <Vec<u8> as Arbitrary>::arbitrary(&mut unstructured) {
12+
items.sort();
13+
items.dedup();
14+
15+
let mut index = <u8 as Arbitrary>::arbitrary(&mut unstructured).unwrap();
16+
17+
let idx = partition_point(&items, |&x| x < index);
18+
let std_pp_idx = items.partition_point(|&x| x < index);
19+
assert_eq!(std_pp_idx, idx);
20+
}
21+
});

src/abstract.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@ pub type RangeItem = crate::Result<KvPair>;
1818
#[allow(clippy::module_name_repetitions)]
1919
#[enum_dispatch]
2020
pub trait AbstractTree {
21+
/// Ingests a sorted stream of key-value pairs into the tree.
22+
///
23+
/// Can only be called on a new fresh, empty tree.
24+
///
25+
/// # Errors
26+
///
27+
/// Will return `Err` if an IO error occurs.
28+
///
29+
/// # Panics
30+
///
31+
/// Panics if the tree is **not** initially empty.
32+
///
33+
/// Will panic if the input iterator is not sorted in ascending order.
34+
#[doc(hidden)]
35+
fn ingest(&self, iter: impl Iterator<Item = (UserKey, UserValue)>) -> crate::Result<()>;
36+
2137
/// Performs major compaction, blocking the caller until it's done.
2238
///
2339
/// # Errors
@@ -478,7 +494,6 @@ pub trait AbstractTree {
478494
fn snapshot(&self, seqno: SeqNo) -> Snapshot;
479495

480496
/// Opens a snapshot of this partition with a given sequence number
481-
#[must_use]
482497
fn snapshot_at(&self, seqno: SeqNo) -> Snapshot {
483498
self.snapshot(seqno)
484499
}

src/binary_search.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Copyright (c) 2024-present, fjall-rs
2+
// This source code is licensed under both the Apache 2.0 and MIT License
3+
// (found in the LICENSE-* files in the repository)
4+
5+
/// Returns the index of the partition point according to the given predicate
6+
/// (the index of the first element of the second partition).
7+
///
8+
/// This seems to be faster than std's `partition_point`: <https://github.com/rust-lang/rust/issues/138796>
9+
pub fn partition_point<T, F>(slice: &[T], pred: F) -> usize
10+
where
11+
F: Fn(&T) -> bool,
12+
{
13+
let mut left = 0;
14+
let mut right = slice.len();
15+
16+
if right == 0 {
17+
return 0;
18+
}
19+
20+
while left < right {
21+
let mid = (left + right) / 2;
22+
23+
// SAFETY: See https://github.com/rust-lang/rust/blob/ebf0cf75d368c035f4c7e7246d203bd469ee4a51/library/core/src/slice/mod.rs#L2834-L2836
24+
#[warn(unsafe_code)]
25+
let item = unsafe { slice.get_unchecked(mid) };
26+
27+
if pred(item) {
28+
left = mid + 1;
29+
} else {
30+
right = mid;
31+
}
32+
}
33+
34+
left
35+
}
36+
37+
#[cfg(test)]
38+
mod tests {
39+
use super::partition_point;
40+
use test_log::test;
41+
42+
#[test]
43+
fn binary_search_first() {
44+
let items = [1, 2, 3, 4, 5];
45+
let idx = partition_point(&items, |&x| x < 1);
46+
assert_eq!(0, idx);
47+
48+
let std_pp_idx = items.partition_point(|&x| x < 1);
49+
assert_eq!(std_pp_idx, idx);
50+
}
51+
52+
#[test]
53+
fn binary_search_last() {
54+
let items = [1, 2, 3, 4, 5];
55+
let idx = partition_point(&items, |&x| x < 5);
56+
assert_eq!(4, idx);
57+
58+
let std_pp_idx = items.partition_point(|&x| x < 5);
59+
assert_eq!(std_pp_idx, idx);
60+
}
61+
62+
#[test]
63+
fn binary_search_middle() {
64+
let items = [1, 2, 3, 4, 5];
65+
let idx = partition_point(&items, |&x| x < 3);
66+
assert_eq!(2, idx);
67+
68+
let std_pp_idx = items.partition_point(|&x| x < 3);
69+
assert_eq!(std_pp_idx, idx);
70+
}
71+
72+
#[test]
73+
fn binary_search_none() {
74+
let items = [1, 2, 3, 4, 5];
75+
let idx = partition_point(&items, |&x| x < 10);
76+
assert_eq!(5, idx);
77+
78+
let std_pp_idx = items.partition_point(|&x| x < 10);
79+
assert_eq!(std_pp_idx, idx);
80+
}
81+
82+
#[test]
83+
fn binary_search_empty() {
84+
let items: [i32; 0] = [];
85+
let idx = partition_point(&items, |&x| x < 10);
86+
assert_eq!(0, idx);
87+
88+
let std_pp_idx = items.partition_point(|&x| x < 10);
89+
assert_eq!(std_pp_idx, idx);
90+
}
91+
}

0 commit comments

Comments
 (0)