Skip to content

Commit

Permalink
Fixes for avoidance of hosts taking backup in PRS & ERS (#17300)
Browse files Browse the repository at this point in the history
Signed-off-by: Eduardo J. Ortega U. <[email protected]>
  • Loading branch information
ejortegau authored Dec 9, 2024
1 parent cb66920 commit 3cdda35
Show file tree
Hide file tree
Showing 12 changed files with 993 additions and 1,120 deletions.
194 changes: 92 additions & 102 deletions go/vt/proto/replicationdata/replicationdata.pb.go

Large diffs are not rendered by default.

34 changes: 0 additions & 34 deletions go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1,610 changes: 795 additions & 815 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Large diffs are not rendered by default.

68 changes: 0 additions & 68 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 14 additions & 3 deletions go/vt/vtctl/reparentutil/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,6 @@ func stopReplicationAndBuildStatusMaps(
logger.Infof("getting replication position from %v", alias)

stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY)
m.Lock()
res.tabletsBackupState[alias] = stopReplicationStatus.GetBackupRunning()
m.Unlock()
if err != nil {
sqlErr, isSQLErr := sqlerror.NewSQLErrorFromError(err).(*sqlerror.SQLError)
if isSQLErr && sqlErr != nil && sqlErr.Number() == sqlerror.ERNotReplica {
Expand All @@ -242,6 +239,20 @@ func stopReplicationAndBuildStatusMaps(
err = vterrors.Wrapf(err, "error when getting replication status for alias %v: %v", alias, err)
}
} else {
isTakingBackup := false

// Prefer the most up-to-date information regarding whether the tablet is taking a backup from the After
// replication status, but fall back to the Before status if After is nil.
if stopReplicationStatus.After != nil {
isTakingBackup = stopReplicationStatus.After.BackupRunning
} else if stopReplicationStatus.Before != nil {
isTakingBackup = stopReplicationStatus.Before.BackupRunning
}

m.Lock()
res.tabletsBackupState[alias] = isTakingBackup
m.Unlock()

var sqlThreadRunning bool
// Check if the sql thread was running for the tablet
sqlThreadRunning, err = SQLThreadWasRunning(stopReplicationStatus)
Expand Down
82 changes: 82 additions & 0 deletions go/vt/vtctl/reparentutil/replication_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
waitForAllTablets bool
expectedStatusMap map[string]*replicationdatapb.StopReplicationStatus
expectedPrimaryStatusMap map[string]*replicationdatapb.PrimaryStatus
expectedTakingBackup map[string]bool
expectedTabletsReachable []*topodatapb.Tablet
shouldErr bool
}{
Expand Down Expand Up @@ -339,6 +340,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -407,6 +409,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -491,6 +494,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -585,6 +589,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -678,6 +683,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -750,6 +756,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
Uid: 101,
},
}},
expectedTakingBackup: map[string]bool{"zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
shouldErr: false,
},
Expand Down Expand Up @@ -811,6 +818,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{
"zone1-0000000100": {
Position: "primary-position-100",
Expand Down Expand Up @@ -885,6 +893,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{}, // zone1-0000000100 fails to demote, so does not appear
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -1008,6 +1017,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
Uid: 101,
},
}},
expectedTakingBackup: map[string]bool{"zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
shouldErr: false,
},
Expand Down Expand Up @@ -1057,6 +1067,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
After: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-9"},
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000101": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Expand Down Expand Up @@ -1252,8 +1263,78 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
},
}},
stopReplicasTimeout: time.Minute,
expectedTakingBackup: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
shouldErr: false,
}, {
name: "Handle nil replication status After. No segfaulting when determining backup status, and fall back to Before status",
durability: "none",
tmc: &stopReplicationAndBuildStatusMapsTestTMClient{
stopReplicationAndGetStatusResults: map[string]*struct {
StopStatus *replicationdatapb.StopReplicationStatus
Err error
}{
"zone1-0000000100": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429100:1-5", IoState: int32(replication.ReplicationStateRunning), SqlState: int32(replication.ReplicationStateRunning), BackupRunning: true},
After: nil,
},
},
"zone1-0000000101": {
StopStatus: &replicationdatapb.StopReplicationStatus{
Before: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-5", IoState: int32(replication.ReplicationStateRunning), SqlState: int32(replication.ReplicationStateRunning), BackupRunning: true},
After: nil,
},
},
},
},
tabletMap: map[string]*topo.TabletInfo{
"zone1-0000000100": {
Tablet: &topodatapb.Tablet{
Type: topodatapb.TabletType_REPLICA,
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
},
},
"zone1-0000000101": {
Tablet: &topodatapb.Tablet{
Type: topodatapb.TabletType_REPLICA,
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
},
},
},
ignoredTablets: sets.New[string](),
expectedStatusMap: map[string]*replicationdatapb.StopReplicationStatus{
"zone1-0000000100": {
Before: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429100:1-5", IoState: int32(replication.ReplicationStateRunning), SqlState: int32(replication.ReplicationStateRunning), BackupRunning: true},
After: nil,
},
"zone1-0000000101": {
Before: &replicationdatapb.Status{Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429101:1-5", IoState: int32(replication.ReplicationStateRunning), SqlState: int32(replication.ReplicationStateRunning), BackupRunning: true},
After: nil,
},
},
expectedTakingBackup: map[string]bool{"zone1-0000000100": true, "zone1-0000000101": true},
expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
expectedTabletsReachable: []*topodatapb.Tablet{{
Type: topodatapb.TabletType_REPLICA,
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
}, {
Type: topodatapb.TabletType_REPLICA,
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 101,
},
}},
shouldErr: false,
},
}

Expand All @@ -1279,6 +1360,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) {
for idx, tablet := range res.reachableTablets {
assert.True(t, topoproto.IsTabletInList(tablet, tt.expectedTabletsReachable), "TabletsReached[%d] not found - %s", idx, topoproto.TabletAliasString(tablet.Alias))
}
assert.Equal(t, tt.expectedTakingBackup, res.tabletsBackupState)
})
}
}
Expand Down
Loading

0 comments on commit 3cdda35

Please sign in to comment.