Skip to content

Commit c81a447

Browse files
authored
DAOS-11134 control: Add leadership check to doGroupUpdate() (#9737) (#9870)
In rare circumstances, this method could be called after leadership is lost but before the worker loop is exited. Add a final check for leadership before invoking the group update dRPC in order to avoid trying to make the group update on a non-leader replica. Also fixes a bug where a sync group update could be followed by an unnecessary async group update. Signed-off-by: Michael MacDonald <[email protected]>
1 parent b27457a commit c81a447

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

src/control/server/mgmt_system.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ func (svc *mgmtSvc) joinLoop(parent context.Context) {
190190
svc.log.Errorf("sync GroupUpdate failed: %s", err)
191191
continue
192192
}
193+
groupUpdateNeeded = false
193194
}
194-
groupUpdateNeeded = false
195195
case <-groupUpdateTimer.C:
196196
if !groupUpdateNeeded {
197197
continue
@@ -364,6 +364,11 @@ func (svc *mgmtSvc) doGroupUpdate(ctx context.Context, forced bool) error {
364364
rankSet.Add(rank)
365365
}
366366

367+
// Final check to make sure we're still leader.
368+
if err := svc.sysdb.CheckLeader(); err != nil {
369+
return err
370+
}
371+
367372
svc.log.Debugf("group update request: version: %d, ranks: %s", req.MapVersion, rankSet)
368373
dResp, err := svc.harness.CallDrpc(ctx, drpc.MethodGroupUpdate, req)
369374
if err != nil {

src/control/system/raft.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,10 @@ func (db *Database) submitRaftUpdate(data []byte) error {
318318
// signal some callers to retry the operation on the
319319
// new leader.
320320
if IsRaftLeadershipError(err) {
321-
return ErrRaftUnavail
321+
return &ErrNotLeader{
322+
LeaderHint: db.leaderHint(),
323+
Replicas: db.cfg.stringReplicas(db.getReplica()),
324+
}
322325
}
323326

324327
return err

0 commit comments

Comments
 (0)