Skip to content

Commit

Permalink
Better adaptive behaviour on from_iter
Browse files Browse the repository at this point in the history
  • Loading branch information
Firstyear committed Apr 10, 2021
1 parent 5509de3 commit 3b8fbdc
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 69 deletions.
27 changes: 19 additions & 8 deletions benches/bench_idl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ mod idl_simple;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};

use idl_simple::IDLSimple;
use idlset::IDLBitRange;
use idlset::v2::IDLBitRange as IDLBitRangeV2;
use idlset::IDLBitRange;
use std::iter::FromIterator;

// Trying to make these work with trait bounds is literally too hard
Expand Down Expand Up @@ -35,11 +35,17 @@ struct V2Duplex(IDLBitRangeV2, IDLBitRangeV2);

impl std::fmt::Display for V2Duplex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} {} - {} {}", self.0.len(), self.0.is_compressed(), self.1.len(), self.1.is_compressed())
write!(
f,
"{} {} - {} {}",
self.0.len(),
self.0.is_compressed(),
self.1.len(),
self.1.is_compressed()
)
}
}


struct Triplex(Vec<u64>, Vec<u64>, Vec<u64>);

struct STriplex(IDLSimple, IDLSimple, IDLSimple);
Expand Down Expand Up @@ -77,9 +83,12 @@ impl std::fmt::Display for V2Triplex {
write!(
f,
"{} {} -- {} {} -- {} {}",
self.0.len(), self.0.is_compressed(),
self.1.len(), self.1.is_compressed(),
self.2.len(), self.2.is_compressed(),
self.0.len(),
self.0.is_compressed(),
self.1.len(),
self.1.is_compressed(),
self.2.len(),
self.2.is_compressed(),
)
}
}
Expand Down Expand Up @@ -119,8 +128,10 @@ impl std::fmt::Display for V2Complex {
write!(
f,
"{} {} -- {} {} -- [{}]",
self.0.len(), self.0.is_compressed(),
self.1.len(), self.1.is_compressed(),
self.0.len(),
self.0.is_compressed(),
self.1.len(),
self.1.is_compressed(),
self.2.len()
)
}
Expand Down
116 changes: 55 additions & 61 deletions src/v2.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@

use std::iter::FromIterator;
use std::cmp::Ordering;
use std::ops::{BitOr, BitAnd};
use smallvec::SmallVec;
use std::cmp::Ordering;
use std::iter::FromIterator;
use std::ops::{BitAnd, BitOr};
use std::slice;

/// Default number of IDL ranges to keep in stack before we spill into heap. As many
/// operations in a system like kanidm are either single item indexes (think equality)
/// or very large indexes (think pres, class), we can keep this small.
// was 5
///
/// A sparse alloc of 2 keeps the comp vs sparse variants equal size in the non-overflow
/// case. Larger means we are losing space in the comp case.
const DEFAULT_SPARSE_ALLOC: usize = 2;
// const DEFAULT_COMP_ALLOC: usize = 2;
// const DEFAULT_SPARSE_ALLOC: usize = 5 + 8;
// const DEFAULT_COMP_ALLOC: usize = 2 + 4;


// 10 per range
const AVG_RANGE_COMP_REQ: usize = 8;

/// The core representation of sets of integers in compressed format.
#[derive(Serialize, Deserialize, Debug, Clone)]
Expand Down Expand Up @@ -52,7 +48,7 @@ enum IDLState {

#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
pub struct IDLBitRange {
state: IDLState
state: IDLState,
}

impl IDLRange {
Expand All @@ -70,9 +66,9 @@ impl IDLRange {
}

impl Default for IDLBitRange {
fn default() -> Self {
fn default() -> Self {
IDLBitRange {
state: IDLState::Sparse(SmallVec::new())
state: IDLState::Sparse(SmallVec::new()),
}
}
}
Expand All @@ -84,7 +80,7 @@ impl IDLBitRange {

pub fn from_u64(id: u64) -> Self {
IDLBitRange {
state: IDLState::Sparse(smallvec![id])
state: IDLState::Sparse(smallvec![id]),
}
}

Expand All @@ -109,29 +105,22 @@ impl IDLBitRange {
pub fn len(&self) -> usize {
match &self.state {
IDLState::Sparse(list) => list.len(),
IDLState::Compressed(list) =>
list
.iter()
.fold(0, |acc, i| (i.mask.count_ones() as usize) + acc),
IDLState::Compressed(list) => list
.iter()
.fold(0, |acc, i| (i.mask.count_ones() as usize) + acc),
}
}

pub fn sum(&self) -> u64 {
match &self.state {
IDLState::Sparse(list) => list.iter().fold(0, |acc, x| x + acc),
IDLState::Compressed(list) =>
IDLBitRangeIter::new(&list)
.fold(0, |acc, x| x + acc),
IDLState::Compressed(list) => IDLBitRangeIter::new(&list).fold(0, |acc, x| x + acc),
}
}

pub fn contains(&self, id: u64) -> bool {
match &self.state {
IDLState::Sparse(list) => {
list.as_slice()
.binary_search(&id)
.is_ok()
}
IDLState::Sparse(list) => list.as_slice().binary_search(&id).is_ok(),
IDLState::Compressed(list) => {
let bvalue: u64 = id % 64;
let range: u64 = id - bvalue;
Expand All @@ -144,18 +133,11 @@ impl IDLBitRange {
} else {
false
}

}
}
}

pub unsafe fn push_id(&mut self, id: u64) {
if let IDLState::Sparse(list) = &self.state {
if list.len() >= DEFAULT_SPARSE_ALLOC {
self.compress()
}
};

match &mut self.state {
IDLState::Sparse(list) => {
list.push(id);
Expand All @@ -179,12 +161,6 @@ impl IDLBitRange {
}

pub fn insert_id(&mut self, id: u64) {
if let IDLState::Sparse(list) = &self.state {
if list.len() >= DEFAULT_SPARSE_ALLOC {
self.compress()
}
};

match &mut self.state {
IDLState::Sparse(list) => {
let r = list.binary_search(&id);
Expand Down Expand Up @@ -260,14 +236,22 @@ impl IDLBitRange {
let mut prev_state = IDLState::Compressed(Vec::new());
std::mem::swap(&mut prev_state, &mut self.state);
match prev_state {
IDLState::Sparse(list) => {
list.into_iter().for_each(|i|
unsafe { self.push_id(i); })
}
IDLState::Sparse(list) => list.into_iter().for_each(|i| unsafe {
self.push_id(i);
}),
IDLState::Compressed(_) => panic!("Unexpected state!"),
}
}

fn should_compress(&self) -> bool {
if let IDLState::Compressed(list) = &self.state {
// num values / num ranges == avg range bits.
(self.len() / list.len()) >= AVG_RANGE_COMP_REQ
} else {
unreachable!();
}
}

#[inline(always)]
fn bitand_inner(&self, rhs: &Self) -> Self {
match (&self.state, &rhs.state) {
Expand Down Expand Up @@ -297,11 +281,11 @@ impl IDLBitRange {
}

IDLBitRange {
state: IDLState::Sparse(nlist)
state: IDLState::Sparse(nlist),
}
}
(IDLState::Sparse(sparselist), IDLState::Compressed(list)) |
(IDLState::Compressed(list), IDLState::Sparse(sparselist)) => {
(IDLState::Sparse(sparselist), IDLState::Compressed(list))
| (IDLState::Compressed(list), IDLState::Sparse(sparselist)) => {
let mut nlist = SmallVec::new();

sparselist.iter().for_each(|id| {
Expand All @@ -318,7 +302,7 @@ impl IDLBitRange {
});

IDLBitRange {
state: IDLState::Sparse(nlist)
state: IDLState::Sparse(nlist),
}
}
(IDLState::Compressed(list1), IDLState::Compressed(list2)) => {
Expand Down Expand Up @@ -351,14 +335,13 @@ impl IDLBitRange {
IDLBitRange::new()
} else {
IDLBitRange {
state: IDLState::Compressed(nlist)
state: IDLState::Compressed(nlist),
}
}
}
}
}


#[inline(always)]
fn bitor_inner(&self, rhs: &Self) -> Self {
match (&self.state, &rhs.state) {
Expand Down Expand Up @@ -401,11 +384,11 @@ impl IDLBitRange {
}

IDLBitRange {
state: IDLState::Sparse(nlist)
state: IDLState::Sparse(nlist),
}
}
(IDLState::Sparse(sparselist), IDLState::Compressed(list)) |
(IDLState::Compressed(list), IDLState::Sparse(sparselist)) => {
(IDLState::Sparse(sparselist), IDLState::Compressed(list))
| (IDLState::Compressed(list), IDLState::Sparse(sparselist)) => {
// Duplicate the compressed set.
let mut list = list.clone();

Expand All @@ -429,7 +412,7 @@ impl IDLBitRange {
});

IDLBitRange {
state: IDLState::Compressed(list)
state: IDLState::Compressed(list),
}
}
(IDLState::Compressed(list1), IDLState::Compressed(list2)) => {
Expand Down Expand Up @@ -479,7 +462,7 @@ impl IDLBitRange {
rnextrange = riter.next();
}
IDLBitRange {
state: IDLState::Compressed(nlist)
state: IDLState::Compressed(nlist),
}
}
} // end match
Expand All @@ -494,24 +477,34 @@ impl FromIterator<u64> for IDLBitRange {
let iter = iter.into_iter();
let (lower_bound, _) = iter.size_hint();

let mut new = IDLBitRange {
state: IDLState::Sparse(SmallVec::with_capacity(lower_bound))
let mut new_sparse = IDLBitRange {
state: IDLState::Sparse(SmallVec::with_capacity(lower_bound)),
};
let mut new_comp = IDLBitRange {
state: IDLState::Compressed(Vec::with_capacity((lower_bound / AVG_RANGE_COMP_REQ) + 1)),
};

let mut max_seen = 0;
iter.for_each(|i| {
if i >= max_seen {
// if we have a sorted list, we can take a fast append path.
unsafe {
new.push_id(i);
new_sparse.push_id(i);
new_comp.push_id(i);
}
max_seen = i;
} else {
// if not, we have to bst each time to get the right place.
new.insert_id(i);
new_sparse.insert_id(i);
new_comp.insert_id(i);
}
});
new

if new_comp.should_compress() {
new_comp
} else {
new_sparse
}
}
}

Expand Down Expand Up @@ -941,11 +934,12 @@ mod tests {
2, 3, 8, 35, 64, 128, 130, 150, 152, 180, 256, 800, 900,
]);
let idl_b = IDLBitRange::from_iter(1..1024);
let idl_expect = IDLBitRange::from_iter(vec![
let mut idl_expect = IDLBitRange::from_iter(vec![
2, 3, 8, 35, 64, 128, 130, 150, 152, 180, 256, 800, 900,
]);

idl_a.compress();
idl_expect.compress();

let idl_result = idl_a & idl_b;
assert_eq!(idl_result, idl_expect);
Expand Down

0 comments on commit 3b8fbdc

Please sign in to comment.