@@ -137,33 +137,6 @@ partition_translator::checkpoint_translation_result(
137
137
});
138
138
}
139
139
140
- ss::future<>
141
- partition_translator::translate_when_notified (kafka::offset begin_offset) {
142
- co_await _ready_to_translate.wait (
143
- [this ] { return _inflight_translation_state.has_value (); });
144
-
145
- auto & as = _inflight_translation_state->as ;
146
- auto reader = co_await _data_source->make_log_reader (
147
- begin_offset, datalake_priority (), as);
148
- if (!reader) {
149
- co_return ;
150
- }
151
- vlog (_logger.trace , " starting translation from offset: {}" , begin_offset);
152
- ss::timer<scheduling::clock > cancellation_timer;
153
- cancellation_timer.set_callback (
154
- [&as] { as.request_abort_ex (translator_time_quota_exceeded_error{}); });
155
-
156
- auto translation_f
157
- = _translation_ctx
158
- ->translate_now (
159
- std::move (reader.value ()), _inflight_translation_state->as )
160
- .finally ([this ] { return _translation_ctx->flush (); });
161
- cancellation_timer.arm (_inflight_translation_state->translate_for );
162
- co_await std::move (translation_f).finally ([&cancellation_timer] {
163
- cancellation_timer.cancel ();
164
- });
165
- }
166
-
167
140
bool partition_translator::should_finish_inflight_translation () const {
168
141
auto bytes_flushed_pending_upload = _translation_ctx->flushed_bytes ();
169
142
auto lag_window_ended = _lag_tracking->should_finish_inflight_translation ();
@@ -250,7 +223,7 @@ partition_translator::fetch_translation_offsets(retry_chain_node& rcn) {
250
223
co_return offsets;
251
224
}
252
225
253
- ss::future<> partition_translator::run_one_translation_iteration (
226
+ ss::future<bool > partition_translator::run_one_translation_iteration (
254
227
kafka::offset begin_offset) {
255
228
_lag_tracking->notify_new_data_for_translation (begin_offset);
256
229
// Notify the scheduler that there is some data to translate
@@ -259,14 +232,16 @@ ss::future<> partition_translator::run_one_translation_iteration(
259
232
// time slice (i.e. scheduled in), then translate until the time
260
233
// slice expires or we run out of data
261
234
std::exception_ptr ex = nullptr ;
235
+ bool should_discard_result = false ;
236
+ bool force_flush_after_iteration = false ;
262
237
try {
263
238
co_await _ready_to_translate.wait (
264
239
[this ] { return _inflight_translation_state.has_value (); });
265
240
auto & as = _inflight_translation_state->as ;
266
241
auto reader = co_await _data_source->make_log_reader (
267
242
begin_offset, datalake_priority (), as);
268
243
if (!reader) {
269
- co_return ;
244
+ co_return force_flush_after_iteration ;
270
245
}
271
246
vlog (
272
247
_logger.trace , " starting translation from offset: {}" , begin_offset);
@@ -282,13 +257,22 @@ ss::future<> partition_translator::run_one_translation_iteration(
282
257
co_await std::move (translation_f).finally ([&cancellation_timer] {
283
258
cancellation_timer.cancel ();
284
259
});
285
- } catch (...) {
286
- ex = std::current_exception ();
260
+ _inflight_translation_state-> as . check ();
261
+ } catch ( const translator_out_of_memory_error&) {
287
262
vlog (
288
263
_logger.warn ,
289
- " Translation attempt failed: {}, discarding state to reset "
290
- " translation" ,
291
- ex);
264
+ " Translation attempt ran into OOM, result will be flushed "
265
+ " immediately" );
266
+ force_flush_after_iteration = true ;
267
+ } catch (const translator_time_quota_exceeded_error&) {
268
+ vlog (
269
+ _logger.debug ,
270
+ " Translation attempt exceeded scheduler time limit quota" );
271
+ } catch (...) {
272
+ // unknown exception or shutdown exception.
273
+ should_discard_result = true ;
274
+ ex = std::current_exception ();
275
+ vlog (_logger.warn , " Translation attempt ran into an exception: {}" , ex);
292
276
}
293
277
// inflight_translation_state tracks a single scheduled chunk of
294
278
// work, so we reset it to nullopt for the next time we're scheduled
@@ -297,10 +281,13 @@ ss::future<> partition_translator::run_one_translation_iteration(
297
281
// Let the scheduler know we are done
298
282
_scheduler->notify_done (id ());
299
283
300
- if (ex ) {
284
+ if (should_discard_result ) {
301
285
co_await _translation_ctx->discard ();
286
+ }
287
+ if (ex) {
302
288
std::rethrow_exception (ex);
303
289
}
290
+ co_return force_flush_after_iteration;
304
291
}
305
292
306
293
ss::future<bool > partition_translator::finish_inflight_translation (
@@ -404,6 +391,7 @@ ss::future<> partition_translator::translate_until_stopped() {
404
391
if (!offsets) {
405
392
continue ;
406
393
}
394
+ auto finish_immediately = false ;
407
395
if (offsets->next_translation_begin_offset ) {
408
396
// new data is available to translate
409
397
auto translate_f = co_await ss::coroutine::as_future (
@@ -413,8 +401,9 @@ ss::future<> partition_translator::translate_until_stopped() {
413
401
translate_f.ignore_ready_future ();
414
402
continue ;
415
403
}
404
+ finish_immediately = translate_f.get ();
416
405
}
417
- if (should_finish_inflight_translation ()) {
406
+ if (finish_immediately || should_finish_inflight_translation ()) {
418
407
auto success = co_await finish_inflight_translation (
419
408
offsets->coordinator_lto , rcn);
420
409
if (!success) {
@@ -513,6 +502,7 @@ void partition_translator::stop_translation() {
513
502
if (_gate.is_closed () || !_inflight_translation_state) {
514
503
return ;
515
504
}
505
+
516
506
// Currently only preempted on OOM error, if the policy changes
517
507
// to preempt on other errors, should be updated accordingly.
518
508
_inflight_translation_state->as .request_abort_ex (
0 commit comments