Skip to content

Commit c85e2f8

Browse files
sui-indexer-alt-framework: separate trailing_init_watermark from init_watermark
1 parent 3bd8597 commit c85e2f8

File tree

10 files changed

+224
-163
lines changed

10 files changed

+224
-163
lines changed

crates/sui-analytics-indexer/src/store/mod.rs

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ use std::sync::RwLock;
2323
use std::time::Duration;
2424
use std::time::Instant;
2525

26-
use anyhow::Result;
2726
use async_trait::async_trait;
2827
use object_store::PutPayload;
2928
use object_store::path::Path as ObjectPath;
@@ -33,8 +32,6 @@ use sui_indexer_alt_framework::store::Connection;
3332
use sui_indexer_alt_framework::store::Store;
3433
use sui_indexer_alt_framework::store::TransactionalStore;
3534
use sui_indexer_alt_framework_store_traits::CommitterWatermark;
36-
use sui_indexer_alt_framework_store_traits::InitWatermark;
37-
use sui_indexer_alt_framework_store_traits::init_with_committer_watermark;
3835
use sui_types::base_types::EpochId;
3936
use tokio::sync::mpsc;
4037
use tracing::debug;
@@ -287,7 +284,7 @@ impl AnalyticsStore {
287284
&self,
288285
first_checkpoint: Option<u64>,
289286
last_checkpoint: Option<u64>,
290-
) -> Result<(Option<u64>, Option<u64>)> {
287+
) -> anyhow::Result<(Option<u64>, Option<u64>)> {
291288
match &self.mode {
292289
StoreMode::Live(_) => Ok((first_checkpoint, last_checkpoint)),
293290
StoreMode::Migration(store) => {
@@ -533,7 +530,7 @@ impl<'a> AnalyticsConnection<'a> {
533530
pub async fn commit_batch<P: Processor>(
534531
&mut self,
535532
batch_from_framework: &[CheckpointRows],
536-
) -> Result<usize> {
533+
) -> anyhow::Result<usize> {
537534
let pipeline = P::NAME;
538535
let pipeline_config = self.pipeline_config(pipeline);
539536

@@ -623,18 +620,6 @@ impl TransactionalStore for AnalyticsStore {
623620

624621
#[async_trait]
625622
impl Connection for AnalyticsConnection<'_> {
626-
/// Initialize watermark.
627-
///
628-
/// In live mode: Watermarks are derived from file names, so just delegates to `committer_watermark`.
629-
/// In migration mode: Delegates to `MigrationStore::init_watermark`.
630-
async fn init_watermark(
631-
&mut self,
632-
pipeline_task: &str,
633-
init_watermark: InitWatermark,
634-
) -> anyhow::Result<InitWatermark> {
635-
init_with_committer_watermark(self, pipeline_task, init_watermark).await
636-
}
637-
638623
/// Determine the watermark.
639624
///
640625
/// In live mode: scans file names in the object store.

crates/sui-indexer-alt-consistent-store/src/store/mod.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@ use prometheus::Registry;
1313
use scoped_futures::ScopedBoxFuture;
1414
use sui_indexer_alt_framework::service::Service;
1515
use sui_indexer_alt_framework::store::CommitterWatermark;
16-
use sui_indexer_alt_framework::store::InitWatermark;
1716
use sui_indexer_alt_framework::store::Store as _;
18-
use sui_indexer_alt_framework::store::init_with_committer_watermark;
1917
use sui_indexer_alt_framework::store::{self};
2018

2119
use crate::db::Db;
@@ -167,14 +165,6 @@ impl<S: Send + Sync + 'static> store::TransactionalStore for Store<S> {
167165

168166
#[async_trait]
169167
impl<S: Send + Sync> store::Connection for Connection<'_, S> {
170-
async fn init_watermark(
171-
&mut self,
172-
pipeline_task: &str,
173-
init_watermark: InitWatermark,
174-
) -> anyhow::Result<InitWatermark> {
175-
init_with_committer_watermark(self, pipeline_task, init_watermark).await
176-
}
177-
178168
async fn committer_watermark(
179169
&mut self,
180170
pipeline_task: &str,

crates/sui-indexer-alt-framework-store-traits/src/lib.rs

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,21 @@ pub trait Connection: Send {
1515
/// Returns the `InitWatermark` based on the existing watermark if it exists.
1616
/// Otherwise, initializes a new watermark record with `InitWatermark` and returns
1717
/// the value passed in.
18+
///
19+
/// The default implementation does not perform initialization and delegates to `committer_watermark`.
1820
async fn init_watermark(
1921
&mut self,
2022
pipeline_task: &str,
21-
init_watermark: InitWatermark,
22-
) -> anyhow::Result<InitWatermark>;
23+
_init_watermark: InitWatermark,
24+
) -> anyhow::Result<InitWatermark> {
25+
let checkpoint_hi_inclusive = self
26+
.committer_watermark(pipeline_task)
27+
.await?
28+
.map(|w| w.checkpoint_hi_inclusive);
29+
Ok(InitWatermark {
30+
checkpoint_hi_inclusive,
31+
})
32+
}
2333

2434
/// Given a `pipeline_task` representing either a pipeline name or a pipeline with an associated
2535
/// task (formatted as `{pipeline}{Store::DELIMITER}{task}`), return the committer watermark
@@ -45,6 +55,15 @@ pub trait Connection: Send {
4555
/// the pruning subsystem.
4656
#[async_trait]
4757
pub trait TrailingConnection: Connection {
58+
/// Returns the `InitWatermark` based on the existing watermark if it exists.
59+
/// Otherwise, initializes a new watermark record with `InitWatermark` and returns
60+
/// the value passed in.
61+
async fn trailing_init_watermark(
62+
&mut self,
63+
pipeline_task: &str,
64+
trailing_init_watermark: TrailingInitWatermark,
65+
) -> anyhow::Result<TrailingInitWatermark>;
66+
4867
/// Given a pipeline, return the reader watermark from the database. This is used by the indexer
4968
/// to determine the new `reader_lo` or inclusive lower bound of available data.
5069
async fn reader_watermark(
@@ -120,12 +139,18 @@ pub trait TransactionalStore: Store {
120139
&'r mut Self::Connection<'_>,
121140
) -> ScopedBoxFuture<'a, 'r, anyhow::Result<R>>;
122141
}
123-
124142
/// Used during watermark initialization to set and return state.
125143
#[derive(Default, Debug, Clone, Copy, PartialEq)]
126144
pub struct InitWatermark {
127145
/// Calculated by the framework as `default_next_checkpoint.checked_sub(1)`.
128146
pub checkpoint_hi_inclusive: Option<u64>,
147+
}
148+
149+
/// Used during watermark initialization to set and return state.
150+
#[derive(Default, Debug, Clone, Copy, PartialEq)]
151+
pub struct TrailingInitWatermark {
152+
/// Calculated by the framework as `default_next_checkpoint.checked_sub(1)`.
153+
pub checkpoint_hi_inclusive: Option<u64>,
129154
/// Calculated by the framework as `default_next_checkpoint`.
130155
pub reader_lo: u64,
131156
}
@@ -212,23 +237,6 @@ impl PrunerWatermark {
212237
}
213238
}
214239

215-
/// A utility function for connections that do not have special initialization logic. These
216-
/// connections delegate initialization to `Connection::committer_watermark`.
217-
pub async fn init_with_committer_watermark(
218-
connection: &mut impl Connection,
219-
pipeline_task: &str,
220-
init_watermark: InitWatermark,
221-
) -> anyhow::Result<InitWatermark> {
222-
let checkpoint_hi_inclusive = connection
223-
.committer_watermark(pipeline_task)
224-
.await?
225-
.map(|w| w.checkpoint_hi_inclusive);
226-
Ok(InitWatermark {
227-
checkpoint_hi_inclusive,
228-
..init_watermark
229-
})
230-
}
231-
232240
/// Check that the pipeline name does not contain the store's delimiter, and construct the string
233241
/// used for tracking a pipeline's watermarks in the store. This is either the pipeline name itself,
234242
/// or `{pipeline}{Store::DELIMITER}{task}` if a task name is provided.

crates/sui-indexer-alt-framework/src/lib.rs

Lines changed: 69 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ use ingestion::IngestionService;
1414
use ingestion::ingestion_client::IngestionClient;
1515
use metrics::IndexerMetrics;
1616
use prometheus::Registry;
17-
use sui_indexer_alt_framework_store_traits::Connection;
18-
use sui_indexer_alt_framework_store_traits::InitWatermark;
1917
use sui_indexer_alt_framework_store_traits::Store;
2018
use sui_indexer_alt_framework_store_traits::TrailingConnection;
19+
use sui_indexer_alt_framework_store_traits::TrailingInitWatermark;
2120
use sui_indexer_alt_framework_store_traits::TransactionalStore;
2221
use sui_indexer_alt_framework_store_traits::pipeline_task;
22+
use sui_indexer_alt_framework_store_traits::{Connection, InitWatermark};
2323
use tracing::info;
2424

2525
use crate::metrics::IngestionMetrics;
@@ -294,7 +294,13 @@ impl<S: Store> Indexer<S> {
294294
where
295295
H: concurrent::Handler<Store = S> + Send + Sync + 'static,
296296
{
297-
let Some(next_checkpoint) = self.add_pipeline::<H>().await? else {
297+
let default_next_checkpoint = self.default_next_checkpoint;
298+
let Some(next_checkpoint) = self
299+
.add_pipeline::<H>(async |pt, conn| {
300+
Self::next_checkpoint(default_next_checkpoint, pt, conn).await
301+
})
302+
.await?
303+
else {
298304
return Ok(());
299305
};
300306

@@ -345,6 +351,26 @@ impl<S: Store> Indexer<S> {
345351
Ok(service)
346352
}
347353

354+
fn next_checkpoint<'c>(
355+
default_next_checkpoint: u64,
356+
pipeline_task: String,
357+
mut conn: S::Connection<'c>,
358+
) -> impl Future<Output = anyhow::Result<u64>> + 'c {
359+
async move {
360+
let InitWatermark {
361+
checkpoint_hi_inclusive,
362+
} = conn
363+
.init_watermark(
364+
&pipeline_task,
365+
InitWatermark {
366+
checkpoint_hi_inclusive: default_next_checkpoint.checked_sub(1),
367+
},
368+
)
369+
.await?;
370+
Ok(checkpoint_hi_inclusive.map_or(0, |c| c + 1))
371+
}
372+
}
373+
348374
/// Determine the checkpoint for the pipeline to resume processing from. This is either the
349375
/// checkpoint after its watermark, or if that doesn't exist, then the provided
350376
/// [Self::first_checkpoint], and if that is not set, then 0 (genesis).
@@ -353,7 +379,10 @@ impl<S: Store> Indexer<S> {
353379
/// calculated above.
354380
///
355381
/// Returns `Ok(None)` if the pipeline is disabled.
356-
async fn add_pipeline<P: Processor + 'static>(&mut self) -> Result<Option<u64>> {
382+
async fn add_pipeline<P: Processor + 'static>(
383+
&mut self,
384+
next_checkpoint_fn: impl for<'c> AsyncFnOnce(String, S::Connection<'c>) -> anyhow::Result<u64>,
385+
) -> Result<Option<u64>> {
357386
ensure!(
358387
self.added_pipelines.insert(P::NAME),
359388
"Pipeline {:?} already added",
@@ -367,7 +396,7 @@ impl<S: Store> Indexer<S> {
367396
return Ok(None);
368397
}
369398

370-
let mut conn = self
399+
let conn = self
371400
.store
372401
.connect()
373402
.await
@@ -376,21 +405,9 @@ impl<S: Store> Indexer<S> {
376405
let pipeline_task =
377406
pipeline_task::<S>(P::NAME, self.task.as_ref().map(|t| t.task.as_str()))?;
378407

379-
let InitWatermark {
380-
checkpoint_hi_inclusive,
381-
reader_lo,
382-
} = conn
383-
.init_watermark(
384-
&pipeline_task,
385-
InitWatermark {
386-
checkpoint_hi_inclusive: self.default_next_checkpoint.checked_sub(1),
387-
reader_lo: self.default_next_checkpoint,
388-
},
389-
)
408+
let next_checkpoint = next_checkpoint_fn(pipeline_task.clone(), conn)
390409
.await
391-
.with_context(|| format!("Failed to init watermark for {pipeline_task}"))?;
392-
393-
let next_checkpoint = checkpoint_hi_inclusive.map_or(reader_lo, |c| c + 1);
410+
.with_context(|| format!("Failed to calculate next_checkpoint for {pipeline_task}"))?;
394411

395412
self.first_ingestion_checkpoint = next_checkpoint.min(self.first_ingestion_checkpoint);
396413

@@ -412,7 +429,25 @@ where
412429
where
413430
H: concurrent::Handler<Store = S> + Send + Sync + 'static,
414431
{
415-
let Some(next_checkpoint) = self.add_pipeline::<H>().await? else {
432+
let default_next_checkpoint = self.default_next_checkpoint;
433+
let Some(next_checkpoint) = self
434+
.add_pipeline::<H>(async |pipeline_task, mut conn| {
435+
let TrailingInitWatermark {
436+
checkpoint_hi_inclusive,
437+
reader_lo,
438+
} = conn
439+
.trailing_init_watermark(
440+
&pipeline_task,
441+
TrailingInitWatermark {
442+
checkpoint_hi_inclusive: default_next_checkpoint.checked_sub(1),
443+
reader_lo: default_next_checkpoint,
444+
},
445+
)
446+
.await?;
447+
Ok(checkpoint_hi_inclusive.map_or(reader_lo, |c| c + 1))
448+
})
449+
.await?
450+
else {
416451
return Ok(());
417452
};
418453

@@ -449,10 +484,6 @@ impl<T: TransactionalStore> Indexer<T> {
449484
where
450485
H: sequential::Handler<Store = T> + Send + Sync + 'static,
451486
{
452-
let Some(next_checkpoint) = self.add_pipeline::<H>().await? else {
453-
return Ok(());
454-
};
455-
456487
if self.task.is_some() {
457488
bail!(
458489
"Sequential pipelines do not support pipeline tasks. \
@@ -461,6 +492,16 @@ impl<T: TransactionalStore> Indexer<T> {
461492
);
462493
}
463494

495+
let default_next_checkpoint = self.default_next_checkpoint;
496+
let Some(next_checkpoint) = self
497+
.add_pipeline::<H>(async |pt, conn| {
498+
Self::next_checkpoint(default_next_checkpoint, pt, conn).await
499+
})
500+
.await?
501+
else {
502+
return Ok(());
503+
};
504+
464505
// Track the minimum next_checkpoint across all sequential pipelines
465506
self.next_sequential_checkpoint = Some(
466507
self.next_sequential_checkpoint
@@ -740,7 +781,7 @@ mod tests {
740781

741782
let mut conn = store.connect().await.unwrap();
742783

743-
conn.init_watermark(A::NAME, InitWatermark::default())
784+
conn.trailing_init_watermark(A::NAME, TrailingInitWatermark::default())
744785
.await
745786
.unwrap();
746787
conn.set_committer_watermark(
@@ -753,7 +794,7 @@ mod tests {
753794
.await
754795
.unwrap();
755796

756-
conn.init_watermark(B::NAME, InitWatermark::default())
797+
conn.trailing_init_watermark(B::NAME, TrailingInitWatermark::default())
757798
.await
758799
.unwrap();
759800
conn.set_committer_watermark(
@@ -766,7 +807,7 @@ mod tests {
766807
.await
767808
.unwrap();
768809

769-
conn.init_watermark(C::NAME, InitWatermark::default())
810+
conn.trailing_init_watermark(C::NAME, TrailingInitWatermark::default())
770811
.await
771812
.unwrap();
772813
conn.set_committer_watermark(
@@ -779,7 +820,7 @@ mod tests {
779820
.await
780821
.unwrap();
781822

782-
conn.init_watermark(D::NAME, InitWatermark::default())
823+
conn.trailing_init_watermark(D::NAME, TrailingInitWatermark::default())
783824
.await
784825
.unwrap();
785826
conn.set_committer_watermark(

0 commit comments

Comments
 (0)