Skip to content

Commit d56721c

Browse files
committed
feat: two_merge_iterator done
1 parent ae0716b commit d56721c

File tree

4 files changed

+99
-4
lines changed

4 files changed

+99
-4
lines changed

src/iterator.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub(crate) mod merge_iterator;
2+
mod two_merge_iterator;
23

34
pub trait StorageIterator {
45
// type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord where Self: 'a;

src/iterator/merge_iterator.rs

+21-3
Original file line numberDiff line numberDiff line change
@@ -74,26 +74,44 @@ impl<T: StorageIterator> MergeIterator<T> {
7474
}
7575

7676
impl<T: StorageIterator> StorageIterator for MergeIterator<T> {
77+
// the right way to think about this is just:
78+
// keys in iter1: a, c, d;
79+
// keys in iter2: a, b, c;
80+
// keys in iter3: b, c ,d;
81+
// since iter1 is the most recent mem_table iterator, we take a from iter1, detect that the next iter
82+
// has the equal, then call .next() for iter2, then call .next() for the current iterator
83+
// the situation will turn into this:
84+
// keys in iter1: c, d;
85+
// keys in iter2: b, c;
86+
// keys in iter3: b, c ,d;
87+
// but the binary heap will keep the order for you, so the real one is like this:
88+
// keys in iter2: b, c;
89+
// keys in iter3: b, c ,d;
90+
// keys in iter1: c, d;
91+
// then we can take b
7792
fn next(&mut self) -> anyhow::Result<()> {
7893
let current = self.current.as_mut().unwrap();
79-
// Pop the item out of the heap if they have the same value.
94+
// Pop the item out of the heap if they have the same value using while.
8095
while let Some(mut inner_iter) = self.iters.peek_mut() {
8196
debug_assert!(
8297
inner_iter.1.key() >= current.1.key(),
8398
"heap invariant violated"
8499
);
85100
if inner_iter.1.key() == current.1.key() {
86-
// Case 1: an error occurred when calling `next`.
101+
// call next in the mem_table iterator that is coming up since they have the
102+
// same key
87103
if let e @ Err(_) = inner_iter.1.next() {
88104
PeekMut::pop(inner_iter);
89105
return e;
90106
}
91107

92-
// Case 2: iter is no longer valid.
108+
// deal with it when iter is no longer valid.
93109
if !inner_iter.1.is_valid() {
94110
PeekMut::pop(inner_iter);
95111
}
96112
} else {
113+
// break the loop when something with bigger key appears
114+
// it maybe from a different mem_table iterator
97115
break;
98116
}
99117
}

src/iterator/two_merge_iterator.rs

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use crate::iterator::StorageIterator;
2+
use anyhow::Result;
3+
pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
4+
a: A,
5+
b: B,
6+
choose_a: bool,
7+
}
8+
9+
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
10+
fn choose_a(a: &A, b: &B) -> bool {
11+
if !a.is_valid() {
12+
return false;
13+
}
14+
if !b.is_valid() {
15+
return true;
16+
}
17+
a.key() < b.key()
18+
}
19+
20+
fn skip_b(&mut self) -> Result<()> {
21+
if self.a.is_valid() {
22+
if self.b.is_valid() && self.b.key() == self.a.key() {
23+
self.b.next()?;
24+
}
25+
}
26+
Ok(())
27+
}
28+
29+
pub fn create(a: A, b: B) -> Result<Self> {
30+
let mut iter = Self {
31+
choose_a: false,
32+
a,
33+
b,
34+
};
35+
iter.skip_b()?;
36+
iter.choose_a = Self::choose_a(&iter.a, &iter.b);
37+
Ok(iter)
38+
}
39+
}
40+
41+
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
42+
fn key(&self) -> &[u8] {
43+
if self.choose_a {
44+
self.a.key()
45+
} else {
46+
self.b.key()
47+
}
48+
}
49+
50+
fn value(&self) -> &[u8] {
51+
if self.choose_a {
52+
self.a.value()
53+
} else {
54+
self.b.value()
55+
}
56+
}
57+
58+
fn is_valid(&self) -> bool {
59+
if self.choose_a {
60+
self.a.is_valid()
61+
} else {
62+
self.b.is_valid()
63+
}
64+
}
65+
66+
fn next(&mut self) -> Result<()> {
67+
if self.choose_a {
68+
self.a.next()?;
69+
} else {
70+
self.b.next()?;
71+
}
72+
self.skip_b()?;
73+
self.choose_a = Self::choose_a(&self.a, &self.b);
74+
Ok(())
75+
}
76+
}

src/mem_table.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ impl MemTable {
7777
// 2. the reason I use Arc to wrap the original skipmap is that we need the ownership to move one thing
7878
// into another thread, also, it is efficient to clone Arc.
7979
// 3. the reason I use self-reference is that the Rust compiler cannot make sure the skipmap always
80-
// exist when I try to use the iterator that points to it. (Note we can also use 'a here, but it can be
80+
// exist when I try to use the iterator that points to it. (Note we can also use 'a here, but it can
8181
// become quite complicated)
8282
type SkipMapRangeIter<'a> = crossbeam_skiplist::map::Range<'a, Bytes, (Bound<Bytes>, Bound<Bytes>), Bytes, Bytes>;
8383

0 commit comments

Comments
 (0)