dl/translation/writer: factor in a fixed size overhead per writer

bharathv · bharathv · commit 27a06c78022f · 2025-03-18T14:37:00.000-07:00
diff --git a/src/v/datalake/local_parquet_file_writer.cc b/src/v/datalake/local_parquet_file_writer.cc
@@ -10,6 +10,7 @@
 
 #include "datalake/local_parquet_file_writer.h"
 
+#include "base/units.h"
 #include "base/vlog.h"
 #include "datalake/logger.h"
 
@@ -175,7 +176,17 @@ local_parquet_file_writer_factory::local_parquet_file_writer_factory(
 
 ss::future<result<std::unique_ptr<parquet_file_writer>, writer_error>>
 local_parquet_file_writer_factory::create_writer(
-  const iceberg::struct_type& schema, ss::abort_source&) {
+  const iceberg::struct_type& schema, ss::abort_source& as) {
+    // There is a per writer cost associated which includes stuff like
+    // local path strings and some inmemory data structures holding
+    // the writer instances. This is in place to void an explosion of
+    // writer instances, example partition_by(offset) which creates a
+    // writer per offset.
+    // Additionally one other contributor per writer is the buffer used
+    // in the output stream which defaults to 8_KiB, which is only released
+    // on output stream close().
+    static constexpr size_t WRITER_RESERVATION_OVERHEAD = 10_KiB;
+    co_await _mem_tracker.reserve_bytes(WRITER_RESERVATION_OVERHEAD, as);
     auto writer = std::make_unique<local_parquet_file_writer>(
       create_filename(), _writer_factory, _mem_tracker);
 
diff --git a/src/v/datalake/translation/deps.cc b/src/v/datalake/translation/deps.cc
@@ -485,6 +485,16 @@ class partition_translation_context : public translation_context {
 
     ss::future<> flush() final {
         if (_in_progress_translation) {
+            // Note: The flush here *does not* fully release memory associated
+            // with the underlying file output stream because Seastar only
+            // allows flush() on stream close(). The default buffer size is
+            // 8KiB, this means up to 8KiB per writer can still be buffered even
+            // after flush which is not accounted in reservations. This could be
+            // an issue if there is an explosion of file writer instances. We
+            // try to factor 10KiB overhead per writer, when it is created but
+            // it will be released as soon as the flush is called. An
+            // improvement could be to account for the fixed reservation cost
+            // across flush calls and only release on finish.
             vlog(datalake_log.trace, "[{}] flushing writers", _ntp);
             return _in_progress_translation->flush()
               .then_wrapped([](auto result_f) {