Skip to content

Commit

Permalink
feat: replace DensePrimaryKeyCodec with Arc<dyn PrimaryKeyCodec> (#…
Browse files Browse the repository at this point in the history
…5408)

* feat: use `PrimaryKeyCodec` trait object

* feat: introduce `RewritePrimaryKey`

* chore: apply suggestions from CR

* fix: fix clippy

* chore: add comments
  • Loading branch information
WenyXu authored Jan 23, 2025
1 parent 35b635f commit 05f2167
Show file tree
Hide file tree
Showing 20 changed files with 636 additions and 209 deletions.
4 changes: 2 additions & 2 deletions src/mito2/src/memtable/bulk/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use table::predicate::Predicate;

use crate::row_converter::DensePrimaryKeyCodec;
use crate::row_converter::{build_primary_key_codec, DensePrimaryKeyCodec};
use crate::sst::parquet::file_range::RangeBase;
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::reader::SimpleFilterContext;
Expand All @@ -41,7 +41,7 @@ impl BulkIterContext {
projection: &Option<&[ColumnId]>,
predicate: Option<Predicate>,
) -> Self {
let codec = DensePrimaryKeyCodec::new(&region_metadata);
let codec = build_primary_key_codec(&region_metadata);

let simple_filters = predicate
.as_ref()
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/bulk/part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ mod tests {
let batch_values = batches
.into_iter()
.map(|b| {
let pk_values = pk_encoder.decode_dense(b.primary_key()).unwrap();
let pk_values = pk_encoder.decode(b.primary_key()).unwrap().into_dense();
let timestamps = b
.timestamps()
.as_any()
Expand Down
33 changes: 12 additions & 21 deletions src/mito2/src/memtable/partition_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
use std::sync::Arc;

use common_base::readable_size::ReadableSize;
pub(crate) use primary_key_filter::DensePrimaryKeyFilter;
pub(crate) use primary_key_filter::{DensePrimaryKeyFilter, SparsePrimaryKeyFilter};
use serde::{Deserialize, Serialize};
use store_api::codec::PrimaryKeyEncoding;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::{ColumnId, SequenceNumber};
use table::predicate::Predicate;
Expand All @@ -48,7 +47,7 @@ use crate::memtable::{
MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats,
};
use crate::region::options::MergeMode;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec};
use crate::row_converter::{build_primary_key_codec, PrimaryKeyCodec};

/// Use `1/DICTIONARY_SIZE_FACTOR` of OS memory as dictionary size.
pub(crate) const DICTIONARY_SIZE_FACTOR: u64 = 8;
Expand Down Expand Up @@ -330,22 +329,14 @@ impl PartitionTreeMemtableBuilder {

impl MemtableBuilder for PartitionTreeMemtableBuilder {
fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
match metadata.primary_key_encoding {
PrimaryKeyEncoding::Dense => {
let codec = Arc::new(DensePrimaryKeyCodec::new(metadata));
Arc::new(PartitionTreeMemtable::new(
id,
codec,
metadata.clone(),
self.write_buffer_manager.clone(),
&self.config,
))
}
PrimaryKeyEncoding::Sparse => {
//TODO(weny): Implement sparse primary key encoding.
todo!()
}
}
let codec = build_primary_key_codec(metadata);
Arc::new(PartitionTreeMemtable::new(
id,
codec,
metadata.clone(),
self.write_buffer_manager.clone(),
&self.config,
))
}
}

Expand Down Expand Up @@ -382,7 +373,7 @@ mod tests {
use store_api::storage::RegionId;

use super::*;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
use crate::row_converter::DensePrimaryKeyCodec;
use crate::test_util::memtable_util::{
self, collect_iter_timestamps, region_metadata_to_row_schema,
};
Expand Down Expand Up @@ -794,7 +785,7 @@ mod tests {

let mut reader = new_memtable.iter(None, None, None).unwrap();
let batch = reader.next().unwrap().unwrap();
let pk = codec.decode(batch.primary_key()).unwrap();
let pk = codec.decode(batch.primary_key()).unwrap().into_dense();
if let Value::String(s) = &pk[2] {
assert_eq!("10min", s.as_utf8());
} else {
Expand Down
31 changes: 17 additions & 14 deletions src/mito2/src/memtable/partition_tree/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,21 @@ impl PartitionTree {
}
}

fn verify_primary_key_length(&self, kv: &KeyValue) -> Result<()> {
// The sparse primary key codec does not have a fixed number of fields.
if let Some(expected_num_fields) = self.row_codec.num_fields() {
ensure!(
expected_num_fields == kv.num_primary_keys(),
PrimaryKeyLengthMismatchSnafu {
expect: expected_num_fields,
actual: kv.num_primary_keys(),
}
);
}
// TODO(weny): verify the primary key length for sparse primary key codec.
Ok(())
}

// TODO(yingwen): The size computed from values is inaccurate.
/// Write key-values into the tree.
///
Expand All @@ -110,13 +125,7 @@ impl PartitionTree {
let has_pk = !self.metadata.primary_key.is_empty();

for kv in kvs.iter() {
ensure!(
kv.num_primary_keys() == self.row_codec.num_fields(),
PrimaryKeyLengthMismatchSnafu {
expect: self.row_codec.num_fields(),
actual: kv.num_primary_keys(),
}
);
self.verify_primary_key_length(&kv)?;
// Safety: timestamp of kv must be both present and a valid timestamp value.
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
metrics.min_ts = metrics.min_ts.min(ts);
Expand Down Expand Up @@ -161,13 +170,7 @@ impl PartitionTree {
) -> Result<()> {
let has_pk = !self.metadata.primary_key.is_empty();

ensure!(
kv.num_primary_keys() == self.row_codec.num_fields(),
PrimaryKeyLengthMismatchSnafu {
expect: self.row_codec.num_fields(),
actual: kv.num_primary_keys(),
}
);
self.verify_primary_key_length(&kv)?;
// Safety: timestamp of kv must be both present and a valid timestamp value.
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
metrics.min_ts = metrics.min_ts.min(ts);
Expand Down
16 changes: 11 additions & 5 deletions src/mito2/src/memtable/time_series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED};
use crate::read::dedup::LastNonNullIter;
use crate::read::{Batch, BatchBuilder, BatchColumn};
use crate::region::options::MergeMode;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec, PrimaryKeyCodecExt};
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};

/// Initial vector builder capacity.
const INITIAL_BUILDER_CAPACITY: usize = 0;
Expand Down Expand Up @@ -146,12 +146,13 @@ impl TimeSeriesMemtable {

fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) -> Result<()> {
ensure!(
kv.num_primary_keys() == self.row_codec.num_fields(),
self.row_codec.num_fields() == kv.num_primary_keys(),
PrimaryKeyLengthMismatchSnafu {
expect: self.row_codec.num_fields(),
actual: kv.num_primary_keys()
actual: kv.num_primary_keys(),
}
);

let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
let fields = kv.fields().collect::<Vec<_>>();

Expand Down Expand Up @@ -585,7 +586,7 @@ fn prune_primary_key(
let pk_values = if let Some(pk_values) = series.pk_cache.as_ref() {
pk_values
} else {
let pk_values = codec.decode(pk);
let pk_values = codec.decode_dense_without_column_id(pk);
if let Err(e) = pk_values {
error!(e; "Failed to decode primary key");
return true;
Expand Down Expand Up @@ -1176,7 +1177,12 @@ mod tests {
let row_codec = Arc::new(DensePrimaryKeyCodec::with_fields(
schema
.primary_key_columns()
.map(|c| SortField::new(c.column_schema.data_type.clone()))
.map(|c| {
(
c.column_id,
SortField::new(c.column_schema.data_type.clone()),
)
})
.collect(),
));
let set = Arc::new(SeriesSet::new(schema.clone(), row_codec));
Expand Down
11 changes: 6 additions & 5 deletions src/mito2/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use datatypes::arrow::compute::SortOptions;
use datatypes::arrow::row::{RowConverter, SortField};
use datatypes::prelude::{ConcreteDataType, DataType, ScalarVector};
use datatypes::types::TimestampType;
use datatypes::value::{Value, ValueRef};
use datatypes::value::ValueRef;
use datatypes::vectors::{
BooleanVector, Helper, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, UInt64Vector, UInt8Vector,
Expand All @@ -58,6 +58,7 @@ use crate::error::{
use crate::memtable::BoxedBatchIterator;
use crate::metrics::{READ_BATCHES_RETURN, READ_ROWS_RETURN, READ_STAGE_ELAPSED};
use crate::read::prune::PruneReader;
use crate::row_converter::CompositeValues;

/// Storage internal representation of a batch of rows for a primary key (time series).
///
Expand All @@ -68,7 +69,7 @@ pub struct Batch {
/// Primary key encoded in a comparable form.
primary_key: Vec<u8>,
/// Possibly decoded `primary_key` values. Some places would decode it in advance.
pk_values: Option<Vec<Value>>,
pk_values: Option<CompositeValues>,
/// Timestamps of rows, should be sorted and not null.
timestamps: VectorRef,
/// Sequences of rows
Expand Down Expand Up @@ -114,12 +115,12 @@ impl Batch {
}

/// Returns possibly decoded primary-key values.
pub fn pk_values(&self) -> Option<&[Value]> {
self.pk_values.as_deref()
pub fn pk_values(&self) -> Option<&CompositeValues> {
self.pk_values.as_ref()
}

/// Sets possibly decoded primary-key values.
pub fn set_pk_values(&mut self, pk_values: Vec<Value>) {
pub fn set_pk_values(&mut self, pk_values: CompositeValues) {
self.pk_values = Some(pk_values);
}

Expand Down
Loading

0 comments on commit 05f2167

Please sign in to comment.