From d8c887a179bd07545056eaa8ea28ee9669adb5fd Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 31 Jan 2025 14:49:38 +0000 Subject: [PATCH] Track the last time we got a heartbeat response. Signed-off-by: Arthur Schreiber --- go/vt/vttablet/tabletserver/repltracker/reader.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/go/vt/vttablet/tabletserver/repltracker/reader.go b/go/vt/vttablet/tabletserver/repltracker/reader.go index b50e5e4b2c7..b8fb0f56d2e 100644 --- a/go/vt/vttablet/tabletserver/repltracker/reader.go +++ b/go/vt/vttablet/tabletserver/repltracker/reader.go @@ -60,6 +60,7 @@ type heartbeatReader struct { lagMu sync.Mutex lastKnownLag time.Duration + lastKnownTime time.Time lastKnownError error } @@ -105,6 +106,7 @@ func (r *heartbeatReader) Open() { r.pool.Open(r.env.Config().DB.AppWithDB(), r.env.Config().DB.DbaWithDB(), r.env.Config().DB.AppDebugWithDB()) r.ticks.Start(func() { r.readHeartbeat() }) r.isOpen = true + r.lastKnownTime = r.now() } // Close cancels the watchHeartbeat periodic ticker and closes the db pool. @@ -130,9 +132,16 @@ func (r *heartbeatReader) Close() { func (r *heartbeatReader) Status() (time.Duration, error) { r.lagMu.Lock() defer r.lagMu.Unlock() + if r.lastKnownError != nil { return 0, r.lastKnownError } + + // Return an error if we didn't receive a heartbeat for more than two seconds + if r.now().Sub(r.lastKnownTime) > 2*r.interval { + return 0, fmt.Errorf("no heartbeat received in over 2x the heartbeat interval") + } + return r.lastKnownLag, nil } @@ -162,6 +171,7 @@ func (r *heartbeatReader) readHeartbeat() { reads.Add(1) r.lagMu.Lock() + r.lastKnownTime = r.now() r.lastKnownLag = lag r.lastKnownError = nil r.lagMu.Unlock()