Skip to content

Commit

Permalink
perf(expr): further optimize performance (#744)
Browse files Browse the repository at this point in the history
* optimize to string array

Signed-off-by: Runji Wang <[email protected]>

* optimize bitvec

Signed-off-by: Runji Wang <[email protected]>

* avoid zip_eq for performance

Signed-off-by: Runji Wang <[email protected]>

* array: add `is_null` and `get_raw`

Signed-off-by: Runji Wang <[email protected]>

* add bench for array filter

Signed-off-by: Runji Wang <[email protected]>

* optimize filter -30%

Signed-off-by: Runji Wang <[email protected]>

* optimize filter from bool array

Signed-off-by: Runji Wang <[email protected]>

* clear null data

Signed-off-by: Runji Wang <[email protected]>

* fix cardinality error

Signed-off-by: Runji Wang <[email protected]>

* remove array iterator

Signed-off-by: Runji Wang <[email protected]>

* introduce non-null iterator

Signed-off-by: Runji Wang <[email protected]>

* optimize bitmap &&

Signed-off-by: Runji Wang <[email protected]>

* optimize BitVec operations

Signed-off-by: Runji Wang <[email protected]>

* fix clippy and test

Signed-off-by: Runji Wang <[email protected]>

Signed-off-by: Runji Wang <[email protected]>
  • Loading branch information
wangrunji0408 committed Dec 20, 2022
1 parent c4b8054 commit 0265c50
Show file tree
Hide file tree
Showing 20 changed files with 287 additions and 251 deletions.
42 changes: 35 additions & 7 deletions benches/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,17 @@ fn ops(c: &mut Criterion) {
}

for_all_size(c, "and(bool,bool)", |b, &size| {
let a1: ArrayImpl = (0..size).map(|i| i % 2 == 0).collect::<BoolArray>().into();
let a2: ArrayImpl = a1.clone();
let a1: ArrayImpl = make_bool_array(size);
let a2: ArrayImpl = make_bool_array(size);
b.iter(|| a1.and(&a2));
});
for_all_size(c, "or(bool,bool)", |b, &size| {
let a1: ArrayImpl = make_bool_array(size);
let a2: ArrayImpl = make_bool_array(size);
b.iter(|| a1.or(&a2));
});
for_all_size(c, "not(bool)", |b, &size| {
let a1: ArrayImpl = (0..size).map(|i| i % 2 == 0).collect::<BoolArray>().into();
let a1: ArrayImpl = make_bool_array(size);
b.iter(|| a1.not());
});
}
Expand Down Expand Up @@ -85,9 +90,24 @@ fn cast(c: &mut Criterion) {
let a1 = make_f64_array(size);
b.iter(|| a1.cast(&DataTypeKind::Decimal(None, None)))
});
for_all_size(c, "cast(i32->string)", |b, &size| {
for ty in ["i32", "f64", "decimal"] {
for_all_size(c, format!("cast({ty}->string)"), |b, &size| {
let a1 = match ty {
"i32" => make_i32_array(size),
"f64" => make_f64_array(size),
"decimal" => make_decimal_array(size),
_ => unreachable!(),
};
b.iter(|| a1.cast(&DataTypeKind::String))
});
}
}

fn filter(c: &mut Criterion) {
for_all_size(c, "filter(i32)", |b, &size| {
let a1 = make_i32_array(size);
b.iter(|| a1.cast(&DataTypeKind::String))
let ArrayImpl::Bool(a2) = make_bool_array(size) else { unreachable!() };
b.iter(|| a1.filter(a2.true_array()))
});
}

Expand Down Expand Up @@ -155,6 +175,14 @@ fn function(c: &mut Criterion) {
}
}

fn make_bool_array(size: usize) -> ArrayImpl {
let mask = make_valid_bitmap(size);
let iter = (0..size as i32)
.zip(mask.clone())
.map(|(i, v)| if v { i % 2 == 0 } else { false });
BoolArray::from_data(iter, mask).into()
}

fn make_i32_array(size: usize) -> ArrayImpl {
let mask = make_valid_bitmap(size);
let iter = (0..size as i32)
Expand Down Expand Up @@ -199,11 +227,11 @@ fn for_all_size(
) {
let mut group = c.benchmark_group(name);
group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
for size in [1, 16, 256, 4096, 65536] {
for size in [1, 16, 256, 4096] {
group.bench_with_input(BenchmarkId::from_parameter(size), &size, &mut f);
}
group.finish();
}

criterion_group!(benches, function, ops, agg, cast);
criterion_group!(benches, function, ops, agg, cast, filter);
criterion_main!(benches);
13 changes: 6 additions & 7 deletions src/array/data_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,14 @@ impl DataChunk {
}

/// Filter elements and create a new chunk.
pub fn filter(&self, visibility: impl Iterator<Item = bool> + Clone) -> Self {
let arrays = self
.arrays
.iter()
.map(|a| a.filter(visibility.clone()))
.collect();
pub fn filter(&self, visibility: &[bool]) -> Self {
let arrays: Arc<[ArrayImpl]> = self.arrays.iter().map(|a| a.filter(visibility)).collect();
DataChunk {
cardinality: match arrays.first() {
Some(a) => a.len(),
None => visibility.iter().filter(|b| **b).count(),
},
arrays,
cardinality: visibility.filter(|b| *b).count(),
}
}

Expand Down
6 changes: 2 additions & 4 deletions src/array/data_chunk_builder.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use std::iter::IntoIterator;

use itertools::Itertools;

use super::{ArrayBuilderImpl, DataChunk};
use crate::types::{ConvertError, DataType, DataValue};

Expand Down Expand Up @@ -37,7 +35,7 @@ impl DataChunkBuilder {
pub fn push_row(&mut self, row: impl IntoIterator<Item = DataValue>) -> Option<DataChunk> {
self.array_builders
.iter_mut()
.zip_eq(row)
.zip(row)
.for_each(|(builder, v)| builder.push(&v));
self.size += 1;
if self.size == self.capacity {
Expand All @@ -60,7 +58,7 @@ impl DataChunkBuilder {
&mut self,
row: impl IntoIterator<Item = &'a str>,
) -> Result<Option<DataChunk>, ConvertError> {
for (builder, r) in self.array_builders.iter_mut().zip_eq(row) {
for (builder, r) in self.array_builders.iter_mut().zip(row) {
builder.push_str(r)?
}

Expand Down
93 changes: 0 additions & 93 deletions src/array/iterator.rs

This file was deleted.

61 changes: 32 additions & 29 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use std::convert::TryFrom;
use std::fmt::Debug;
use std::iter::TrustedLen;
use std::ops::{Bound, RangeBounds};
use std::sync::Arc;

Expand All @@ -17,14 +16,12 @@ use crate::types::{

mod data_chunk;
mod data_chunk_builder;
mod iterator;
pub mod ops;
mod primitive_array;
mod utf8_array;

pub use self::data_chunk::*;
pub use self::data_chunk_builder::*;
pub use self::iterator::ArrayIter;
pub use self::primitive_array::*;
pub use self::utf8_array::*;

Expand Down Expand Up @@ -101,50 +98,56 @@ pub trait Array: Sized + Send + Sync + 'static {
/// Type of element in the array.
type Item: ToOwned + ?Sized;

type RawIter<'a>: Iterator<Item = &'a Self::Item> + TrustedLen;
/// Returns true if the value at `idx` is null.
fn is_null(&self, idx: usize) -> bool;

/// Retrieve a reference to value.
fn get(&self, idx: usize) -> Option<&Self::Item>;

fn get_unchecked(&self, idx: usize) -> &Self::Item;
/// Returns the raw value at `idx` regardless of null.
fn get_raw(&self, idx: usize) -> &Self::Item;

/// Number of items of array.
fn len(&self) -> usize;

/// Retrieve a reference to value.
fn get(&self, idx: usize) -> Option<&Self::Item> {
if self.is_null(idx) {
None
} else {
Some(self.get_raw(idx))
}
}

fn filter(&self, p: &[bool]) -> Self;

/// Get iterator of current array.
fn iter(&self) -> ArrayIter<'_, Self> {
ArrayIter::new(self)
fn iter(&self) -> impl DoubleEndedIterator<Item = Option<&Self::Item>> {
(0..self.len()).map(|i| self.get(i))
}

/// Get iterator over the raw values.
fn raw_iter(&self) -> impl DoubleEndedIterator<Item = &Self::Item> {
(0..self.len()).map(|i| self.get_raw(i))
}

/// Get iterator over the non-null values.
fn nonnull_iter(&self) -> impl DoubleEndedIterator<Item = &Self::Item> {
(0..self.len())
.filter(|i| !self.is_null(*i))
.map(|i| self.get_raw(i))
}

/// Check if `Array` is empty.
fn is_empty(&self) -> bool {
self.len() == 0
}

fn raw_iter(&self) -> Self::RawIter<'_>;
}

/// An extension trait for [`Array`].
pub trait ArrayExt: Array {
/// Filter the elements and return a new array.
fn filter(&self, visibility: impl Iterator<Item = bool>) -> Self;

/// Return a slice of self for the provided range.
fn slice(&self, range: impl RangeBounds<usize>) -> Self;
}

impl<A: Array> ArrayExt for A {
/// Filter the elements and return a new array.
fn filter(&self, visibility: impl Iterator<Item = bool>) -> Self {
let mut builder = Self::Builder::with_capacity(self.len());
for (a, visible) in self.iter().zip(visibility) {
if visible {
builder.push(a);
}
}
builder.finish()
}

/// Return a slice of self for the provided range.
fn slice(&self, range: impl RangeBounds<usize>) -> Self {
let len = self.len();
Expand Down Expand Up @@ -547,11 +550,11 @@ macro_rules! impl_array {
}

/// Filter the elements and return a new array.
pub fn filter(&self, visibility: impl Iterator<Item = bool>) -> Self {
pub fn filter(&self, visibility: &[bool]) -> Self {
match self {
Self::Null(a) => Self::Null(a.filter(visibility).into()),
Self::Null(a) => Self::Null(a.filter(&visibility).into()),
$(
Self::$Abc(a) => Self::$Abc(a.filter(visibility).into()),
Self::$Abc(a) => Self::$Abc(a.filter(&visibility).into()),
)*
}
}
Expand Down

0 comments on commit 0265c50

Please sign in to comment.