Skip to content

Commit 807aa83

Browse files
author
Michael Wilkerson-Barker
committed
Updated PR 7542 with changes from master (and PR 7649)
1 parent 25212bc commit 807aa83

File tree

8 files changed

+100
-47
lines changed

8 files changed

+100
-47
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* Refactor `sync::Session` to eliminate the bind() step of session creation ([#7609](https://github.com/realm/realm-core/pull/7609)).
2323
* Add ScopeExitFail which only calls the handler if exiting the scope via an uncaught exception ([#7609](https://github.com/realm/realm-core/pull/7609)).
2424
* Add the originating error and server requests action that caused a client reset to occur to the client reset tracking metadata storage. ([PR #7649](https://github.com/realm/realm-core/pull/7649))
25+
* Fix client reset failure during sync migration due to previous incomplete client reset. ([PR #7542](https://github.com/realm/realm-core/pull/7542), since v13.11.0)
2526

2627
----------------------------------------------
2728

src/realm/sync/client.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,11 @@ SyncClientHookAction SessionImpl::call_debug_hook(SyncClientHookEvent event, con
10671067
return call_debug_hook(data);
10681068
}
10691069

1070+
SyncClientHookAction SessionImpl::call_debug_hook(SyncClientHookEvent event)
1071+
{
1072+
return call_debug_hook(event, m_progress, m_last_sent_flx_query_version, DownloadBatchState::SteadyState, 0);
1073+
}
1074+
10701075
bool SessionImpl::is_steady_state_download_message(DownloadBatchState batch_state, int64_t query_version)
10711076
{
10721077
// Should never be called if session is not active

src/realm/sync/config.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ enum class SyncClientHookEvent {
132132
SessionActivating,
133133
SessionSuspended,
134134
BindMessageSent,
135+
IdentMessageReceived,
136+
IdentMessageSent,
137+
ClientResetMergeStarting,
138+
ClientResetMergeComplete,
139+
ClientResetMergeFailed,
135140
BootstrapBatchAboutToProcess,
136141
};
137142

src/realm/sync/noinst/client_impl_base.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,8 +1727,7 @@ void Session::activate()
17271727
reset_protocol_state();
17281728
m_state = Active;
17291729

1730-
call_debug_hook(SyncClientHookEvent::SessionActivating, m_progress, m_last_sent_flx_query_version,
1731-
DownloadBatchState::SteadyState, 0);
1730+
call_debug_hook(SyncClientHookEvent::SessionActivating);
17321731

17331732
REALM_ASSERT(!m_suspended);
17341733
m_conn.one_more_active_unsuspended_session(); // Throws
@@ -1946,8 +1945,7 @@ void Session::send_bind_message()
19461945
m_conn.initiate_write_message(out, this); // Throws
19471946

19481947
m_bind_message_sent = true;
1949-
call_debug_hook(SyncClientHookEvent::BindMessageSent, m_progress, m_last_sent_flx_query_version,
1950-
DownloadBatchState::SteadyState, 0);
1948+
call_debug_hook(SyncClientHookEvent::BindMessageSent);
19511949

19521950
// Ready to send the IDENT message if the file identifier pair is already
19531951
// available.
@@ -1994,6 +1992,7 @@ void Session::send_ident_message()
19941992
m_conn.initiate_write_message(out, this); // Throws
19951993

19961994
m_ident_message_sent = true;
1995+
call_debug_hook(SyncClientHookEvent::IdentMessageSent);
19971996

19981997
// Other messages may be waiting to be sent
19991998
enlist_to_send(); // Throws
@@ -2270,9 +2269,12 @@ bool Session::client_reset_if_needed()
22702269
auto on_flx_version_complete = [this](int64_t version) {
22712270
this->on_flx_sync_version_complete(version);
22722271
};
2272+
call_debug_hook(SyncClientHookEvent::ClientResetMergeStarting);
22732273
bool did_reset =
22742274
client_reset::perform_client_reset(logger, *get_db(), std::move(*client_reset_config), m_client_file_ident,
22752275
get_flx_subscription_store(), on_flx_version_complete);
2276+
2277+
call_debug_hook(SyncClientHookEvent::ClientResetMergeComplete);
22762278
if (!did_reset) {
22772279
return false;
22782280
}
@@ -2335,6 +2337,7 @@ Status Session::receive_ident_message(SaltedFileIdent client_file_ident)
23352337
}
23362338

23372339
m_client_file_ident = client_file_ident;
2340+
call_debug_hook(SyncClientHookEvent::IdentMessageReceived);
23382341

23392342
if (REALM_UNLIKELY(get_client().is_dry_run())) {
23402343
// Ready to send the IDENT message
@@ -2351,8 +2354,9 @@ Status Session::receive_ident_message(SaltedFileIdent client_file_ident)
23512354
catch (const std::exception& e) {
23522355
auto err_msg = util::format("A fatal error occurred during client reset: '%1'", e.what());
23532356
logger.error(err_msg.c_str());
2354-
SessionErrorInfo err_info(Status{ErrorCodes::AutoClientResetFailed, err_msg}, IsFatal{true});
2355-
suspend(err_info);
2357+
ProtocolErrorInfo prot_info = {ErrorCodes::AutoClientResetFailed, err_msg, IsFatal{true}};
2358+
call_debug_hook(SyncClientHookEvent::ClientResetMergeFailed, prot_info);
2359+
suspend({prot_info});
23562360
return Status::OK();
23572361
}
23582362
if (!did_client_reset) {

src/realm/sync/noinst/client_impl_base.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,7 @@ class ClientImpl::Session {
11801180
size_t);
11811181
SyncClientHookAction call_debug_hook(SyncClientHookEvent event, const ProtocolErrorInfo&);
11821182
SyncClientHookAction call_debug_hook(const SyncClientHookData& data);
1183+
SyncClientHookAction call_debug_hook(SyncClientHookEvent event);
11831184

11841185
bool is_steady_state_download_message(DownloadBatchState batch_state, int64_t query_version);
11851186

src/realm/sync/noinst/client_reset.cpp

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -416,38 +416,49 @@ ClientResyncMode reset_precheck_guard(const TransactionRef& wt_local, ClientResy
416416
{
417417
if (auto previous_reset = sync::PendingResetStore::has_pending_reset(wt_local)) {
418418
logger.info(util::LogCategory::reset, "Found a previous %1", *previous_reset);
419-
switch (previous_reset->mode) {
420-
case ClientResyncMode::Manual:
421-
REALM_UNREACHABLE();
422-
case ClientResyncMode::DiscardLocal:
423-
throw ClientResetFailed(util::format("A previous '%1' mode reset from %2 did not succeed, "
424-
"giving up on '%3' mode to prevent a cycle",
425-
previous_reset->mode, previous_reset->time, mode));
426-
case ClientResyncMode::Recover:
427-
switch (mode) {
428-
case ClientResyncMode::Recover:
429-
throw ClientResetFailed(util::format("A previous '%1' mode reset from %2 did not succeed, "
430-
"giving up on '%3' mode to prevent a cycle",
431-
previous_reset->mode, previous_reset->time, mode));
432-
case ClientResyncMode::RecoverOrDiscard:
433-
mode = ClientResyncMode::DiscardLocal;
434-
logger.info(util::LogCategory::reset,
435-
"A previous '%1' mode reset from %2 downgrades this mode ('%3') to DiscardLocal",
436-
previous_reset->mode, previous_reset->time, mode);
437-
sync::PendingResetStore::clear_pending_reset(wt_local);
438-
break;
439-
case ClientResyncMode::DiscardLocal:
440-
sync::PendingResetStore::clear_pending_reset(wt_local);
441-
// previous mode Recover and this mode is Discard, this is not a cycle yet
442-
break;
443-
case ClientResyncMode::Manual:
444-
REALM_UNREACHABLE();
445-
}
446-
break;
447-
case ClientResyncMode::RecoverOrDiscard:
448-
throw ClientResetFailed(util::format("Unexpected previous '%1' mode reset from %2 did not "
449-
"succeed, giving up on '%3' mode to prevent a cycle",
450-
previous_reset->mode, previous_reset->time, mode));
419+
if (action != previous_reset->action) {
420+
// IF a different client reset is being performed, cler the pending client reset and start over.
421+
logger.info(util::LogCategory::reset,
422+
"New '%1' client reset of type: '%2' is incompatible - clearing previous reset", action,
423+
mode);
424+
sync::PendingResetStore::clear_pending_reset(wt_local);
425+
}
426+
else {
427+
switch (previous_reset->mode) {
428+
case ClientResyncMode::Manual:
429+
REALM_UNREACHABLE();
430+
case ClientResyncMode::DiscardLocal:
431+
throw ClientResetFailed(util::format("A previous '%1' mode reset from %2 did not succeed, "
432+
"giving up on '%3' mode to prevent a cycle",
433+
previous_reset->mode, previous_reset->time, mode));
434+
case ClientResyncMode::Recover:
435+
switch (mode) {
436+
case ClientResyncMode::Recover:
437+
throw ClientResetFailed(
438+
util::format("A previous '%1' mode reset from %2 did not succeed, "
439+
"giving up on '%3' mode to prevent a cycle",
440+
previous_reset->mode, previous_reset->time, mode));
441+
case ClientResyncMode::RecoverOrDiscard:
442+
mode = ClientResyncMode::DiscardLocal;
443+
logger.info(
444+
util::LogCategory::reset,
445+
"A previous '%1' mode reset from %2 downgrades this mode ('%3') to DiscardLocal",
446+
previous_reset->mode, previous_reset->time, mode);
447+
sync::PendingResetStore::clear_pending_reset(wt_local);
448+
break;
449+
case ClientResyncMode::DiscardLocal:
450+
sync::PendingResetStore::clear_pending_reset(wt_local);
451+
// previous mode Recover and this mode is Discard, this is not a cycle yet
452+
break;
453+
case ClientResyncMode::Manual:
454+
REALM_UNREACHABLE();
455+
}
456+
break;
457+
case ClientResyncMode::RecoverOrDiscard:
458+
throw ClientResetFailed(util::format("Unexpected previous '%1' mode reset from %2 did not "
459+
"succeed, giving up on '%3' mode to prevent a cycle",
460+
previous_reset->mode, previous_reset->time, mode));
461+
}
451462
}
452463
}
453464
if (action == PendingReset::Action::ClientResetNoRecovery) {

test/object-store/sync/flx_migration.cpp

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ TEST_CASE("Test client migration and rollback", "[sync][flx][flx migration][baas
320320

321321
TEST_CASE("Test client migration and rollback with recovery", "[sync][flx][flx migration][baas]") {
322322
auto logger_ptr = util::Logger::get_default_logger();
323+
enum TestState { idle, wait_for_merge, merge_complete, rollback_complete };
324+
TestingStateMachine<TestState> test_state(TestState::idle);
323325

324326
const std::string partition = "migration-test";
325327
const Schema mig_schema{
@@ -331,6 +333,20 @@ TEST_CASE("Test client migration and rollback with recovery", "[sync][flx][flx m
331333
SyncTestFile config(session.app()->current_user(), partition, server_app_config.schema);
332334
config.sync_config->client_resync_mode = ClientResyncMode::Recover;
333335
config.schema_version = 0;
336+
config.sync_config->on_sync_client_event_hook = [&](std::weak_ptr<SyncSession>, const SyncClientHookData& data) {
337+
test_state.transition_with([data](TestState cur_state) -> std::optional<TestState> {
338+
if (data.event == SyncClientHookEvent::ClientResetMergeComplete &&
339+
cur_state == TestState::wait_for_merge) {
340+
return TestState::merge_complete;
341+
}
342+
return std::nullopt;
343+
});
344+
if (test_state.get() == TestState::merge_complete) {
345+
// Wait for the FLX->PBS rollback to complete before continuing
346+
test_state.wait_for(TestState::rollback_complete, std::chrono::seconds(25));
347+
}
348+
return SyncClientHookAction::NoAction;
349+
};
334350

335351
// Fill some objects
336352
auto objects = fill_test_data(config); // 5 objects starting at 1 with no partition value set
@@ -437,15 +453,16 @@ TEST_CASE("Test client migration and rollback with recovery", "[sync][flx][flx m
437453
REALM_ASSERT(result.get_value() == sync::SubscriptionSet::State::Superseded);
438454
}
439455

456+
test_state.transition_to(TestState::wait_for_merge);
457+
440458
// Migrate back to FLX - and keep the realm session open
441459
trigger_server_migration(session.app_session(), MigrateToFLX, logger_ptr);
442460

443-
// wait for the subscription store to initialize after downloading
444-
timed_wait_for(
445-
[&outer_realm]() {
446-
return outer_realm->sync_session() && outer_realm->sync_session()->get_flx_subscription_store();
447-
},
448-
std::chrono::seconds(180));
461+
// Cancel any connect waits (since sync session is still active) and try to connect now
462+
outer_realm->sync_session()->handle_reconnect();
463+
464+
// wait for the fresh realm to download and merge with the current local realm
465+
test_state.wait_for(TestState::merge_complete, std::chrono::seconds(180));
449466

450467
// Verify data has been sync'ed and there is only 1 subscription for the Object table
451468
{
@@ -460,9 +477,18 @@ TEST_CASE("Test client migration and rollback with recovery", "[sync][flx][flx m
460477
REQUIRE(active_subs.find("flx_migrated_Object"));
461478
}
462479

463-
// Roll back to PBS once again - and keep the realm session open
480+
// Roll back to PBS once again before the client reset is complete and keep the realm session open
481+
// NOTE: the realm session is blocked in the hook callback until the rollback is complete
464482
trigger_server_migration(session.app_session(), RollbackToPBS, logger_ptr);
465483

484+
// Release the realm session; will reconnect and perform the rollback to PBS client reset
485+
test_state.transition_to(TestState::rollback_complete);
486+
487+
// Cancel any connect waits (since sync session is still active) and try to connect now
488+
outer_realm->sync_session()->handle_reconnect();
489+
490+
// During the rollback client reset, the previous migrate to flx client reset operation is still
491+
// tracked, but will be removed since the new rollback server requests action is incompatible.
466492
REQUIRE(!wait_for_upload(*outer_realm));
467493
REQUIRE(!wait_for_download(*outer_realm));
468494

test/object-store/util/test_utils.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ class TestingStateMachine {
6868
m_cv.notify_one();
6969
}
7070

71-
void wait_for(E target)
71+
bool wait_for(E target, std::chrono::milliseconds period = std::chrono::seconds(15))
7272
{
7373
std::unique_lock lock{m_mutex};
74-
m_cv.wait(lock, [&] {
74+
return m_cv.wait_for(lock, period, [&] {
7575
return m_cur_state == target;
7676
});
7777
}

0 commit comments

Comments
 (0)