Skip to content

Commit

Permalink
perf: hash sharing
Browse files Browse the repository at this point in the history
  • Loading branch information
marvin-j97 committed Jun 8, 2024
1 parent f465f86 commit 7f8e128
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "lsm-tree"
description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs)"
license = "MIT OR Apache-2.0"
version = "1.1.1"
version = "1.2.0"
edition = "2021"
rust-version = "1.74.0"
readme = "README.md"
Expand Down
20 changes: 15 additions & 5 deletions src/bloom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use std::io::{Read, Write};

pub const BLOOM_HEADER_MAGIC: &[u8] = &[b'F', b'J', b'L', b'L', b'S', b'B', b'F', b'1'];

pub type CompositeHash = (u64, u64);

/// A standard bloom filter
///
/// Allows buffering the key hashes before actual filter construction
Expand Down Expand Up @@ -127,12 +129,12 @@ impl BloomFilter {
}
}

/// Returns `true` if the item may be contained.
/// Returns `true` if the hash may be contained.
///
/// Will never have a false negative.
#[must_use]
pub fn contains(&self, key: &[u8]) -> bool {
let (mut h1, mut h2) = Self::get_hash(key);
pub fn contains_hash(&self, hash: CompositeHash) -> bool {
let (mut h1, mut h2) = hash;

for i in 0..(self.k as u64) {
let idx = h1 % (self.m as u64);
Expand All @@ -150,8 +152,16 @@ impl BloomFilter {
true
}

/* /// Returns `true` if the item may be contained.
///
/// Will never have a false negative.
#[must_use]
pub fn contains(&self, key: &[u8]) -> bool {
self.contains_hash(Self::get_hash(key))
} */

/// Adds the key to the filter
pub fn set_with_hash(&mut self, (mut h1, mut h2): (u64, u64)) {
pub fn set_with_hash(&mut self, (mut h1, mut h2): CompositeHash) {
for i in 0..(self.k as u64) {
let idx = h1 % (self.m as u64);

Expand All @@ -169,7 +179,7 @@ impl BloomFilter {

/// Gets the hash of a key
#[must_use]
pub fn get_hash(key: &[u8]) -> (u64, u64) {
pub fn get_hash(key: &[u8]) -> CompositeHash {
let mut hasher = SeaHasher::default();
hasher.write(key);
let h1 = hasher.finish();
Expand Down
72 changes: 59 additions & 13 deletions src/segment/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::{
use std::{ops::Bound, path::Path, sync::Arc};

#[cfg(feature = "bloom")]
use crate::bloom::BloomFilter;
use crate::bloom::{BloomFilter, CompositeHash};

/// Disk segment (a.k.a. `SSTable`, `SST`, `sorted string table`) that is located on disk
///
Expand Down Expand Up @@ -198,18 +198,13 @@ impl Segment {
self.bloom_filter.len()
}

/// Retrieves an item from the segment.
///
/// # Errors
///
/// Will return `Err` if an IO error occurs.
pub fn get<K: AsRef<[u8]>>(
#[cfg(feature = "bloom")]
pub fn get_with_hash<K: AsRef<[u8]>>(
&self,
key: K,
seqno: Option<SeqNo>,
hash: CompositeHash,
) -> crate::Result<Option<Value>> {
use value_block::{CachePolicy, ValueBlock};

if let Some(seqno) = seqno {
if self.metadata.seqnos.0 >= seqno {
return Ok(None);
Expand All @@ -220,15 +215,28 @@ impl Segment {
return Ok(None);
}

let key = key.as_ref();

#[cfg(feature = "bloom")]
{
if !self.bloom_filter.contains(key) {
/* let start = std::time::Instant::now(); */
let probe = self.bloom_filter.contains_hash(hash);
/* eprintln!("probe in {}ns", start.elapsed().as_nanos()); */

if !probe {
return Ok(None);
}
}

self.point_read(key, seqno)
}

fn point_read<K: AsRef<[u8]>>(
&self,
key: K,
seqno: Option<SeqNo>,
) -> crate::Result<Option<Value>> {
use value_block::{CachePolicy, ValueBlock};

let key = key.as_ref();

let Some(first_block_handle) = self
.block_index
.get_lowest_data_block_handle_containing_item(key.as_ref(), CachePolicy::Write)?
Expand Down Expand Up @@ -307,6 +315,44 @@ impl Segment {
Ok(None)
}

/// Retrieves an item from the segment.
///
/// # Errors
///get
/// Will return `Err` if an IO error occurs.
pub fn get<K: AsRef<[u8]>>(
&self,
key: K,
seqno: Option<SeqNo>,
) -> crate::Result<Option<Value>> {
if let Some(seqno) = seqno {
if self.metadata.seqnos.0 >= seqno {
return Ok(None);
}
}

if !self.metadata.key_range.contains_key(&key) {
return Ok(None);
}

let key = key.as_ref();

#[cfg(feature = "bloom")]
{
debug_assert!(false, "Use Segment::get_with_hash instead");

/* let start = std::time::Instant::now(); */
let probe = self.bloom_filter.contains(key);
/* eprintln!("probe in {}ns", start.elapsed().as_nanos()); */

if !probe {
return Ok(None);
}
}

self.point_read(key, seqno)
}

/// Creates an iterator over the `Segment`.
///
/// # Errors
Expand Down
18 changes: 16 additions & 2 deletions src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,14 +428,23 @@ impl Tree {
}
drop(memtable_lock);

// NOTE: Create key hash for hash sharing
#[cfg(feature = "bloom")]
let key_hash = crate::bloom::BloomFilter::get_hash(key.as_ref());

// Now look in segments... this may involve disk I/O
let level_manifest = self.levels.read().expect("lock is poisoned");

for level in &level_manifest.levels {
// NOTE: Based on benchmarking, binary search is only worth it after ~5 segments
if level.is_disjoint && level.len() > 5 {
if let Some(segment) = level.get_segment_containing_key(&key) {
if let Some(item) = segment.get(&key, seqno)? {
#[cfg(not(feature = "bloom"))]
let maybe_item = segment.get(&key, seqno)?;
#[cfg(feature = "bloom")]
let maybe_item = segment.get_with_hash(&key, seqno, key_hash)?;

if let Some(item) = maybe_item {
if evict_tombstone {
return Ok(ignore_tombstone_value(item));
}
Expand All @@ -444,7 +453,12 @@ impl Tree {
}
} else {
for segment in &level.segments {
if let Some(item) = segment.get(&key, seqno)? {
#[cfg(not(feature = "bloom"))]
let maybe_item = segment.get(&key, seqno)?;
#[cfg(feature = "bloom")]
let maybe_item = segment.get_with_hash(&key, seqno, key_hash)?;

if let Some(item) = maybe_item {
if evict_tombstone {
return Ok(ignore_tombstone_value(item));
}
Expand Down

0 comments on commit 7f8e128

Please sign in to comment.