hyperledger-labs · isegall-da · Dec 19, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/apps/common/src/main/scala/org/lfdecentralizedtrust/splice/store/Limit.scala b/apps/common/src/main/scala/org/lfdecentralizedtrust/splice/store/Limit.scala
@@ -61,6 +61,13 @@ object PageLimit {
     )
 }
 
+/** Limit with no constraints. Must not be used for production, use only for testing.
+  */
+case class UnboundLimit private (limit: Int) extends Limit
+object UnboundLimit {
+  def apply(limit: Int): UnboundLimit = new UnboundLimit(limit)
+}
+
 trait LimitHelpers { _: NamedLogging =>
 
   protected final def applyLimit[CC[_], C](
@@ -71,8 +78,6 @@ trait LimitHelpers { _: NamedLogging =>
       traceContext: TraceContext
   ): C = {
     limit match {
-      case PageLimit(limit) =>
-        result.take(limit.intValue())
       case HardLimit(limit) =>
         val resultSize = result.size
         if (resultSize > limit) {
@@ -86,6 +91,8 @@ trait LimitHelpers { _: NamedLogging =>
         } else {
           result
         }
+      case _ =>
+        result.take(limit.limit.intValue())
     }
   }
 
@@ -95,8 +102,6 @@ trait LimitHelpers { _: NamedLogging =>
       result: C & scala.collection.IterableOps[?, CC, C],
   ): C = {
     limit match {
-      case PageLimit(limit) =>
-        result.take(limit.intValue())
       case HardLimit(limit) =>
         val resultSize = result.size
         if (resultSize > limit) {
@@ -108,13 +113,15 @@ trait LimitHelpers { _: NamedLogging =>
         } else {
           result
         }
+      case _ =>
+        result.take(limit.limit.intValue())
     }
   }
 
   protected def sqlLimit(limit: Limit): Int = {
     limit match {
       case HardLimit(limit) => limit + 1
-      case PageLimit(limit) => limit
+      case _ => limit.limit
     }
   }
 

diff --git a/apps/common/src/test/scala/org/lfdecentralizedtrust/splice/store/StoreTest.scala b/apps/common/src/test/scala/org/lfdecentralizedtrust/splice/store/StoreTest.scala
@@ -139,6 +139,10 @@ abstract class StoreTest extends AsyncWordSpec with BaseTest {
     LfContractId.assertFromString("00" + f"$cIdCounter%064x").coid
   }
 
+  protected def resetCIdCounter() = {
+    cIdCounter = 0
+  }
+
   protected def time(n: Long): CantonTimestamp = CantonTimestamp.ofEpochSecond(n)
 
   private def schedule(

diff --git a/...c/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/AcsSnapshotBulkStorage.scala b/...c/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/AcsSnapshotBulkStorage.scala
@@ -0,0 +1,111 @@
+// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package org.lfdecentralizedtrust.splice.scan.store.bulk
+
+import scala.concurrent.ExecutionContext
+import com.digitalasset.canton.data.CantonTimestamp
+import com.digitalasset.canton.logging.{NamedLoggerFactory, NamedLogging}
+import com.digitalasset.canton.tracing.TraceContext
+import org.apache.pekko.actor.ActorSystem
+import org.apache.pekko.stream.OverflowStrategy
+import org.apache.pekko.stream.scaladsl.{Sink, Source}
+import org.apache.pekko.util.ByteString
+import org.lfdecentralizedtrust.splice.scan.admin.http.CompactJsonScanHttpEncodings
+import org.lfdecentralizedtrust.splice.scan.store.AcsSnapshotStore
+import org.lfdecentralizedtrust.splice.store.HardLimit
+
+import scala.concurrent.Future
+import io.circe.syntax.*
+import java.nio.ByteBuffer
+
+case class BulkStorageConfig(
+    dbReadChunkSize: Int,
+    maxFileSize: Long,
+)
+
+object BulkStorageConfigs {
+  val bulkStorageConfigV1 = BulkStorageConfig(
+    1000,
+    (64 * 1024 * 1024).toLong,
+  )
+  val bulkStorageTestConfig = BulkStorageConfig(
+    1000,
+    50000L,
+  )
+}
+
+sealed trait Position
+case object Start extends Position
+case object End extends Position
+final case class Index(value: Long) extends Position
+
+class AcsSnapshotBulkStorage(
+    val config: BulkStorageConfig,
+    val acsSnapshotStore: AcsSnapshotStore,
+    val s3Connection: S3BucketConnection,
+    override val loggerFactory: NamedLoggerFactory,
+)(implicit actorSystem: ActorSystem, tc: TraceContext, ec: ExecutionContext)
+    extends NamedLogging {
+
+  def getAcsSnapshotChunk(
+      migrationId: Long,
+      timestamp: CantonTimestamp,
+      after: Option[Long],
+  ): Future[(Position, ByteString)] = {
+    for {
+      snapshot <- acsSnapshotStore.queryAcsSnapshot(
+        migrationId,
+        snapshot = timestamp,
+        after,
+        limit = HardLimit.tryCreate(config.dbReadChunkSize),
+        Seq.empty,
+        Seq.empty,
+      )
+    } yield {
+      val encoded = snapshot.createdEventsInPage.map(event =>
+        CompactJsonScanHttpEncodings.javaToHttpCreatedEvent(event.eventId, event.event)
+      )
+      val contractsStr = encoded.map(_.asJson.noSpacesSortKeys).mkString("\n") + "\n"
+      val contractsBytes = ByteString(contractsStr)
+      logger.debug(
+        s"Read ${encoded.length} contracts from ACS, to a bytestring of size ${contractsBytes.length} bytes"
+      )
+      (snapshot.afterToken.fold(End: Position)(Index(_)), contractsBytes)
+    }
+
+  }
+
+  def dumpAcsSnapshot(migrationId: Long, timestamp: CantonTimestamp): Future[Unit] = {
+
+    @SuppressWarnings(Array("org.wartremover.warts.Var"))
+    var idx = 0
+
+    Source
+      .unfoldAsync(Start: Position) {
+        case Start => getAcsSnapshotChunk(migrationId, timestamp, None).map(Some(_))
+        case Index(i) => getAcsSnapshotChunk(migrationId, timestamp, Some(i)).map(Some(_))
+        case End => Future.successful(None)
+      }
+      .via(ZstdGroupedWeight(config.maxFileSize))
+      // Add a buffer so that the next object continues accumulating while we write the previous one
+      .buffer(
+        1,
+        OverflowStrategy.backpressure,
+      )
+      .mapAsync(1) { zstdObj =>
+        val objectKey = s"snapshot_$idx.zstd"
+        Future {
+          // TODO(#3429): For now, we accumulate the full object in memory, then write it as a whole.
+          //    Consider streaming it to S3 instead. Need to make sure that it then handles crashes correctly,
+          //    i.e. that until we tell S3 that we're done writing, if we stop, then S3 throws away the
+          //    partially written object.
+          // TODO(#3429): Error handling
+          val _ = s3Connection.writeFullObject(objectKey, ByteBuffer.wrap(zstdObj.toArrayUnsafe()))
+          idx += 1
+        }
+      }
+      .runWith(Sink.ignore)
+
+  }.map(_ => ())
+}
diff --git a/...an/src/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/AcsSnapshotSource.scala b/...an/src/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/AcsSnapshotSource.scala
@@ -0,0 +1,73 @@
+// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package org.lfdecentralizedtrust.splice.scan.store.bulk
+
+import com.digitalasset.canton.data.CantonTimestamp
+import com.digitalasset.canton.logging.{NamedLoggerFactory, NamedLogging}
+import com.digitalasset.canton.tracing.TraceContext
+import org.apache.pekko.stream.{Attributes, Outlet, SourceShape}
+import org.apache.pekko.stream.stage.{AsyncCallback, GraphStage, GraphStageLogic, OutHandler}
+import org.lfdecentralizedtrust.splice.scan.store.AcsSnapshotStore
+import org.lfdecentralizedtrust.splice.scan.store.AcsSnapshotStore.QueryAcsSnapshotResult
+import org.lfdecentralizedtrust.splice.store.PageLimit
+import org.lfdecentralizedtrust.splice.store.events.SpliceCreatedEvent
+
+import java.util.concurrent.atomic.AtomicReference
+import scala.concurrent.ExecutionContext
+import scala.util.{Failure, Success}
+
+case class AcsSnapshotSource(
+    acsSnapshotStore: AcsSnapshotStore,
+    timestamp: CantonTimestamp,
+    migrationId: Long,
+    override val loggerFactory: NamedLoggerFactory,
+)(implicit tc: TraceContext, ec: ExecutionContext)
+    extends GraphStage[SourceShape[Vector[SpliceCreatedEvent]]]
+    with NamedLogging {
+  val out: Outlet[Vector[SpliceCreatedEvent]] = Outlet("AcsSnapshotSource")
+  override def shape: SourceShape[Vector[SpliceCreatedEvent]] = SourceShape(out)
+
+  val numUpdatesPerQuery = 1000
+
+  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = {
+    new GraphStageLogic(shape) with OutHandler {
+      val token = new AtomicReference[Option[Long]](None)
+
+      val asyncCallback: AsyncCallback[QueryAcsSnapshotResult] = getAsyncCallback {
+        case result: QueryAcsSnapshotResult =>
+          if (result.createdEventsInPage.isEmpty) {
+            complete(out)
+            token.set(None)
+          } else {
+            push(out, result.createdEventsInPage)
+            token.set(result.afterToken)
+          }
+        case _ =>
+          logger.error("asyncCallback unexpectedly called with an error")
+      }
+
+      val failureCallback: AsyncCallback[Throwable] = getAsyncCallback { ex =>
+        fail(out, ex)
+      }
+
+      override def onPull(): Unit = {
+        acsSnapshotStore
+          .queryAcsSnapshot(
+            migrationId,
+            timestamp,
+            token.get(),
+            PageLimit.tryCreate(numUpdatesPerQuery),
+            Seq.empty,
+            Seq.empty,
+          )
+          .onComplete {
+            case Success(value) => asyncCallback.invoke(value)
+            case Failure(exception) => failureCallback.invoke(exception)
+          }
+      }
+      setHandler(out, this)
+    }
+  }
+
+}
diff --git a/...n/src/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/S3BucketConnection.scala b/...n/src/main/scala/org/lfdecentralizedtrust/splice/scan/store/bulk/S3BucketConnection.scala
@@ -0,0 +1,68 @@
+// Copyright (c) 2024 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package org.lfdecentralizedtrust.splice.scan.store.bulk
+
+import com.digitalasset.canton.logging.{NamedLoggerFactory, NamedLogging}
+import com.digitalasset.canton.tracing.TraceContext
+import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider}
+import software.amazon.awssdk.core.sync.RequestBody
+import software.amazon.awssdk.regions.Region
+import software.amazon.awssdk.services.s3.model.{GetObjectRequest, PutObjectRequest}
+import software.amazon.awssdk.services.s3.{S3Client, S3Configuration}
+
+import java.net.URI
+import java.nio.ByteBuffer
+
+case class S3Config(
+    endpoint: URI,
+    bucketName: String,
+    region: Region,
+    credentials: AwsBasicCredentials,
+)
+
+class S3BucketConnection(
+    val s3Client: S3Client,
+    val bucketName: String,
+    val loggerFactory: NamedLoggerFactory,
+) extends NamedLogging {
+  // Reads the full content of an s3 object into a ByteBuffer.
+  // Use only for testing, when the object size is known to be small
+  def readFullObject(key: String): ByteBuffer = {
+    val obj = s3Client.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build())
+    val bytes = obj.readAllBytes()
+    val ret = ByteBuffer.allocateDirect(bytes.length)
+    ret.put(bytes)
+  }
+
+  // Writes a full object from memory into an s3 object
+  def writeFullObject(key: String, content: ByteBuffer)(implicit tc: TraceContext) = {
+    logger.debug(s"Writing ${content.array().length} bytes to S3 object $key")
+    val putObj: PutObjectRequest = PutObjectRequest
+      .builder()
+      .bucket(bucketName)
+      .key(key)
+      .build()
+    s3Client.putObject(
+      putObj,
+      RequestBody.fromBytes(content.array()),
+    )
+  }
+}
+
+object S3BucketConnection {
+  def apply(s3Config: S3Config, bucketName: String, loggerFactory: NamedLoggerFactory) = {
-  def apply(s3Config: S3Config, bucketName: String, loggerFactory: NamedLoggerFactory) = {
+  def apply(s3Config: S3Config, bucketName: String, loggerFactory: NamedLoggerFactory): S3BucketConnection = {
-  def apply(s3Config: S3Config, bucketName: String, loggerFactory: NamedLoggerFactory) = {
+  def apply(s3Config: S3Config, bucketName: String, loggerFactory: NamedLoggerFactory): S3BucketConnection = {
+    new S3BucketConnection(
+      S3Client
+        .builder()
+        .endpointOverride(s3Config.endpoint)
+        .region(s3Config.region)
+        .credentialsProvider(StaticCredentialsProvider.create(s3Config.credentials))
+        // TODO(#3429): mockS3 and GCS support only path style access. Do we need to make this configurable?
+        .serviceConfiguration(S3Configuration.builder().pathStyleAccessEnabled(true).build())
+        .build(),
+      bucketName,
+      loggerFactory,
+    )
+  }
+}