Skip to content

Commit d466a07

Browse files
authored
Merge pull request #61 from fjall-rs/2.1.0
2.1.0
2 parents 659d444 + aeca336 commit d466a07

38 files changed

+1066
-401
lines changed

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "lsm-tree"
33
description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs)"
44
license = "MIT OR Apache-2.0"
5-
version = "2.0.2"
5+
version = "2.1.0"
66
edition = "2021"
77
rust-version = "1.74.0"
88
readme = "README.md"
@@ -34,10 +34,11 @@ lz4_flex = { version = "0.11.3", optional = true }
3434
miniz_oxide = { version = "0.8.0", optional = true }
3535
path-absolutize = "3.1.1"
3636
quick_cache = { version = "0.6.5", default-features = false, features = [] }
37+
rustc-hash = "2.0.0"
3738
self_cell = "1.0.4"
3839
smallvec = { version = "1.13.2" }
3940
tempfile = "3.12.0"
40-
value-log = "1.0.0"
41+
value-log = "1.1.0"
4142
varint-rs = "2.2.0"
4243
xxhash-rust = { version = "0.8.12", features = ["xxh3"] }
4344

benches/block.rs

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use criterion::{criterion_group, criterion_main, Criterion};
22
use lsm_tree::{
3+
coding::Encode,
34
segment::{
45
block::{header::Header as BlockHeader, ItemSize},
56
meta::CompressionType,
67
value_block::ValueBlock,
78
},
8-
serde::Serializable,
99
Checksum, InternalValue,
1010
};
1111
use std::io::Write;
@@ -99,13 +99,56 @@ fn value_block_find(c: &mut Criterion) {
9999
}
100100
}
101101

102+
fn encode_block(c: &mut Criterion) {
103+
let mut group = c.benchmark_group("Encode block");
104+
105+
for comp_type in [
106+
CompressionType::None,
107+
CompressionType::Lz4,
108+
CompressionType::Miniz(3),
109+
] {
110+
for block_size in [1, 4, 8, 16, 32, 64, 128] {
111+
let block_size = block_size * 1_024;
112+
113+
let mut size = 0;
114+
115+
let mut items = vec![];
116+
117+
for x in 0u64.. {
118+
let value = InternalValue::from_components(
119+
x.to_be_bytes(),
120+
x.to_string().repeat(50).as_bytes(),
121+
63,
122+
lsm_tree::ValueType::Value,
123+
);
124+
125+
size += value.size();
126+
127+
items.push(value);
128+
129+
if size >= block_size {
130+
break;
131+
}
132+
}
133+
134+
group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| {
135+
b.iter(|| {
136+
// Serialize block
137+
let (mut header, data) =
138+
ValueBlock::to_bytes_compressed(&items, 0, comp_type).unwrap();
139+
});
140+
});
141+
}
142+
}
143+
}
144+
102145
fn load_value_block_from_disk(c: &mut Criterion) {
103146
let mut group = c.benchmark_group("Load block from disk");
104147

105148
for comp_type in [
106-
CompressionType::None,
149+
//CompressionType::None,
107150
CompressionType::Lz4,
108-
CompressionType::Miniz(6),
151+
//CompressionType::Miniz(3),
109152
] {
110153
for block_size in [1, 4, 8, 16, 32, 64, 128] {
111154
let block_size = block_size * 1_024;
@@ -133,7 +176,6 @@ fn load_value_block_from_disk(c: &mut Criterion) {
133176

134177
// Serialize block
135178
let (mut header, data) = ValueBlock::to_bytes_compressed(&items, 0, comp_type).unwrap();
136-
header.checksum = Checksum::from_bytes(&data);
137179

138180
let mut file = tempfile::tempfile().unwrap();
139181
header.encode_into(&mut file).unwrap();
@@ -156,5 +198,10 @@ fn load_value_block_from_disk(c: &mut Criterion) {
156198
}
157199
}
158200

159-
criterion_group!(benches, value_block_find, load_value_block_from_disk,);
201+
criterion_group!(
202+
benches,
203+
encode_block,
204+
value_block_find,
205+
load_value_block_from_disk,
206+
);
160207
criterion_main!(benches);

benches/level_manifest.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ fn iterate_segments(c: &mut Criterion) {
66
group.sample_size(10);
77

88
for segment_count in [0, 1, 5, 10, 100, 500, 1_000, 2_000, 4_000] {
9-
group.bench_function(&format!("iterate {segment_count} segments"), |b| {
9+
group.bench_function(format!("iterate {segment_count} segments"), |b| {
1010
let folder = tempfile::tempdir_in(".bench").unwrap();
1111
let tree = Config::new(folder).data_block_size(1_024).open().unwrap();
1212

@@ -30,7 +30,7 @@ fn find_segment(c: &mut Criterion) {
3030

3131
for segment_count in [1u64, 5, 10, 100, 500, 1_000, 2_000, 4_000] {
3232
group.bench_function(
33-
&format!("find segment in {segment_count} segments - binary search"),
33+
format!("find segment in {segment_count} segments - binary search"),
3434
|b| {
3535
let folder = tempfile::tempdir_in(".bench").unwrap();
3636
let tree = Config::new(folder).data_block_size(1_024).open().unwrap();
@@ -49,14 +49,16 @@ fn find_segment(c: &mut Criterion) {
4949
.levels
5050
.first()
5151
.expect("should exist")
52+
.as_disjoint()
53+
.expect("should be disjoint")
5254
.get_segment_containing_key(key)
5355
.expect("should exist")
5456
});
5557
},
5658
);
5759

5860
group.bench_function(
59-
&format!("find segment in {segment_count} segments - linear search"),
61+
format!("find segment in {segment_count} segments - linear search"),
6062
|b| {
6163
let folder = tempfile::tempdir().unwrap();
6264
let tree = Config::new(folder).data_block_size(1_024).open().unwrap();

benches/tli.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use criterion::{criterion_group, criterion_main, Criterion};
2-
use lsm_tree::segment::{block_index::BlockIndex, value_block::CachePolicy};
2+
use lsm_tree::segment::{
3+
block_index::BlockIndex, value_block::BlockOffset, value_block::CachePolicy,
4+
};
35

46
fn tli_find_item(c: &mut Criterion) {
57
use lsm_tree::segment::block_index::{
@@ -15,7 +17,7 @@ fn tli_find_item(c: &mut Criterion) {
1517
for x in 0..item_count {
1618
items.push(KeyedBlockHandle {
1719
end_key: x.to_be_bytes().into(),
18-
offset: x,
20+
offset: BlockOffset(x),
1921
});
2022
}
2123

src/abstract.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ pub type RangeItem = crate::Result<KvPair>;
1919
#[allow(clippy::module_name_repetitions)]
2020
#[enum_dispatch]
2121
pub trait AbstractTree {
22+
/// Gets the memory usage of all bloom filters in the tree.
23+
#[cfg(feature = "bloom")]
24+
fn bloom_filter_size(&self) -> usize;
25+
2226
/* /// Imports data from a flat file (see [`Tree::export`]),
2327
/// blocking the caller until it is done.
2428
///

src/blob_tree/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ impl BlobTree {
163163
}
164164

165165
impl AbstractTree for BlobTree {
166+
#[cfg(feature = "bloom")]
167+
fn bloom_filter_size(&self) -> usize {
168+
self.index.bloom_filter_size()
169+
}
170+
166171
fn sealed_memtable_count(&self) -> usize {
167172
self.index.sealed_memtable_count()
168173
}
@@ -231,7 +236,6 @@ impl AbstractTree for BlobTree {
231236
segment_id,
232237
data_block_size: self.index.config.data_block_size,
233238
index_block_size: self.index.config.index_block_size,
234-
evict_tombstones: false,
235239
folder: lsm_segment_folder,
236240
})?
237241
.use_compression(self.index.config.compression);

src/block_cache.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use crate::either::Either::{self, Left, Right};
66
use crate::segment::id::GlobalSegmentId;
7+
use crate::segment::value_block::BlockOffset;
78
use crate::segment::{block_index::IndexBlock, value_block::ValueBlock};
89
use quick_cache::Weighter;
910
use quick_cache::{sync::Cache, Equivalent};
@@ -13,16 +14,16 @@ type Item = Either<Arc<ValueBlock>, Arc<IndexBlock>>;
1314

1415
// (Type (disk or index), Segment ID, Block offset)
1516
#[derive(Eq, std::hash::Hash, PartialEq)]
16-
struct CacheKey(GlobalSegmentId, u64);
17+
struct CacheKey(GlobalSegmentId, BlockOffset);
1718

18-
impl Equivalent<CacheKey> for (GlobalSegmentId, u64) {
19+
impl Equivalent<CacheKey> for (GlobalSegmentId, BlockOffset) {
1920
fn equivalent(&self, key: &CacheKey) -> bool {
2021
self.0 == key.0 && self.1 == key.1
2122
}
2223
}
2324

24-
impl From<(GlobalSegmentId, u64)> for CacheKey {
25-
fn from((gid, bid): (GlobalSegmentId, u64)) -> Self {
25+
impl From<(GlobalSegmentId, BlockOffset)> for CacheKey {
26+
fn from((gid, bid): (GlobalSegmentId, BlockOffset)) -> Self {
2627
Self(gid, bid)
2728
}
2829
}
@@ -65,7 +66,11 @@ impl Weighter<CacheKey, Item> for BlockWeighter {
6566
/// # Ok::<(), lsm_tree::Error>(())
6667
/// ```
6768
pub struct BlockCache {
68-
data: Cache<CacheKey, Item, BlockWeighter, xxhash_rust::xxh3::Xxh3Builder>,
69+
// NOTE: rustc_hash performed best: https://fjall-rs.github.io/post/fjall-2-1
70+
/// Concurrent cache implementation
71+
data: Cache<CacheKey, Item, BlockWeighter, rustc_hash::FxBuildHasher>,
72+
73+
/// Capacity in bytes
6974
capacity: u64,
7075
}
7176

@@ -75,14 +80,17 @@ impl BlockCache {
7580
pub fn with_capacity_bytes(bytes: u64) -> Self {
7681
use quick_cache::sync::DefaultLifecycle;
7782

83+
#[allow(clippy::default_trait_access)]
84+
let quick_cache = Cache::with(
85+
1_000_000,
86+
bytes,
87+
BlockWeighter,
88+
Default::default(),
89+
DefaultLifecycle::default(),
90+
);
91+
7892
Self {
79-
data: Cache::with(
80-
1_000_000,
81-
bytes,
82-
BlockWeighter,
83-
xxhash_rust::xxh3::Xxh3Builder::new(),
84-
DefaultLifecycle::default(),
85-
),
93+
data: quick_cache,
8694
capacity: bytes,
8795
}
8896
}
@@ -115,7 +123,7 @@ impl BlockCache {
115123
pub fn insert_disk_block(
116124
&self,
117125
segment_id: GlobalSegmentId,
118-
offset: u64,
126+
offset: BlockOffset,
119127
value: Arc<ValueBlock>,
120128
) {
121129
if self.capacity > 0 {
@@ -127,7 +135,7 @@ impl BlockCache {
127135
pub fn insert_index_block(
128136
&self,
129137
segment_id: GlobalSegmentId,
130-
offset: u64,
138+
offset: BlockOffset,
131139
value: Arc<IndexBlock>,
132140
) {
133141
if self.capacity > 0 {
@@ -140,7 +148,7 @@ impl BlockCache {
140148
pub fn get_disk_block(
141149
&self,
142150
segment_id: GlobalSegmentId,
143-
offset: u64,
151+
offset: BlockOffset,
144152
) -> Option<Arc<ValueBlock>> {
145153
let key = (segment_id, offset);
146154
let item = self.data.get(&key)?;
@@ -152,7 +160,7 @@ impl BlockCache {
152160
pub fn get_index_block(
153161
&self,
154162
segment_id: GlobalSegmentId,
155-
offset: u64,
163+
offset: BlockOffset,
156164
) -> Option<Arc<IndexBlock>> {
157165
let key = (segment_id, offset);
158166
let item = self.data.get(&key)?;

0 commit comments

Comments
 (0)