Skip to content

Commit f378d21

Browse files
authored
perf: optimize writing non-null primitive value (#5460)
* avoid using arrow builder Signed-off-by: Ruihang Xia <[email protected]> * optimize from_vec Signed-off-by: Ruihang Xia <[email protected]> --------- Signed-off-by: Ruihang Xia <[email protected]>
1 parent 5b6279f commit f378d21

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

src/datatypes/src/vectors/primitive.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,12 @@ impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
8080
}
8181
}
8282

83-
pub fn from_vec(array: Vec<T::Native>) -> Self {
84-
Self {
85-
array: PrimitiveArray::from_iter_values(array),
86-
}
83+
pub fn from_vec(vector: Vec<T::Native>) -> Self {
84+
let mutable_buffer = arrow::buffer::MutableBuffer::from(vector);
85+
let mut primitive_builder =
86+
PrimitiveBuilder::<T::ArrowPrimitive>::new_from_buffer(mutable_buffer, None);
87+
let array = primitive_builder.finish();
88+
Self { array }
8789
}
8890

8991
pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {

src/mito2/src/memtable/time_series.rs

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,12 @@ use common_time::Timestamp;
2626
use datatypes::arrow;
2727
use datatypes::arrow::array::ArrayRef;
2828
use datatypes::data_type::{ConcreteDataType, DataType};
29-
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector, VectorRef};
29+
use datatypes::prelude::{MutableVector, Vector, VectorRef};
30+
use datatypes::types::TimestampType;
3031
use datatypes::value::{Value, ValueRef};
3132
use datatypes::vectors::{
32-
Helper, UInt64Vector, UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
33+
Helper, TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
34+
TimestampSecondVector, UInt64Vector, UInt8Vector,
3335
};
3436
use snafu::{ensure, ResultExt};
3537
use store_api::metadata::RegionMetadataRef;
@@ -691,22 +693,23 @@ impl Series {
691693

692694
/// `ValueBuilder` holds all the vector builders for field columns.
693695
struct ValueBuilder {
694-
timestamp: Box<dyn MutableVector>,
695-
sequence: UInt64VectorBuilder,
696-
op_type: UInt8VectorBuilder,
696+
timestamp: Vec<i64>,
697+
timestamp_type: ConcreteDataType,
698+
sequence: Vec<u64>,
699+
op_type: Vec<u8>,
697700
fields: Vec<Option<Box<dyn MutableVector>>>,
698701
field_types: Vec<ConcreteDataType>,
699702
}
700703

701704
impl ValueBuilder {
702705
fn new(region_metadata: &RegionMetadataRef, capacity: usize) -> Self {
703-
let timestamp = region_metadata
706+
let timestamp_type = region_metadata
704707
.time_index_column()
705708
.column_schema
706709
.data_type
707-
.create_mutable_vector(capacity);
708-
let sequence = UInt64VectorBuilder::with_capacity(capacity);
709-
let op_type = UInt8VectorBuilder::with_capacity(capacity);
710+
.clone();
711+
let sequence = Vec::with_capacity(capacity);
712+
let op_type = Vec::with_capacity(capacity);
710713

711714
let field_types = region_metadata
712715
.field_columns()
@@ -715,7 +718,8 @@ impl ValueBuilder {
715718
let fields = (0..field_types.len()).map(|_| None).collect();
716719

717720
Self {
718-
timestamp,
721+
timestamp: Vec::with_capacity(capacity),
722+
timestamp_type,
719723
sequence,
720724
op_type,
721725
fields,
@@ -727,9 +731,10 @@ impl ValueBuilder {
727731
/// We don't need primary keys since they've already be encoded.
728732
fn push(&mut self, ts: ValueRef, sequence: u64, op_type: u8, fields: Vec<ValueRef>) {
729733
debug_assert_eq!(fields.len(), self.fields.len());
730-
self.timestamp.push_value_ref(ts);
731-
self.sequence.push_value_ref(ValueRef::UInt64(sequence));
732-
self.op_type.push_value_ref(ValueRef::UInt8(op_type));
734+
self.timestamp
735+
.push(ts.as_timestamp().unwrap().unwrap().value());
736+
self.sequence.push(sequence);
737+
self.op_type.push(op_type);
733738
let num_rows = self.timestamp.len();
734739
for (idx, field_value) in fields.into_iter().enumerate() {
735740
if !field_value.is_null() || self.fields[idx].is_some() {
@@ -844,9 +849,23 @@ impl From<ValueBuilder> for Values {
844849
}
845850
})
846851
.collect::<Vec<_>>();
847-
let sequence = Arc::new(value.sequence.finish());
848-
let op_type = Arc::new(value.op_type.finish());
849-
let timestamp = value.timestamp.to_vector();
852+
let sequence = Arc::new(UInt64Vector::from_vec(value.sequence));
853+
let op_type = Arc::new(UInt8Vector::from_vec(value.op_type));
854+
let timestamp: VectorRef = match value.timestamp_type {
855+
ConcreteDataType::Timestamp(TimestampType::Second(_)) => {
856+
Arc::new(TimestampSecondVector::from_vec(value.timestamp))
857+
}
858+
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => {
859+
Arc::new(TimestampMillisecondVector::from_vec(value.timestamp))
860+
}
861+
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => {
862+
Arc::new(TimestampMicrosecondVector::from_vec(value.timestamp))
863+
}
864+
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => {
865+
Arc::new(TimestampNanosecondVector::from_vec(value.timestamp))
866+
}
867+
_ => unreachable!(),
868+
};
850869

851870
if cfg!(debug_assertions) {
852871
debug_assert_eq!(timestamp.len(), sequence.len());

0 commit comments

Comments
 (0)