From 7da7d81a1e8620ce9e8ec669b86afd735ed8d220 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 31 Jan 2025 14:49:38 +0000 Subject: [PATCH] Track the last time we got a heartbeat response. --- go/vt/vttablet/tabletserver/repltracker/reader.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/go/vt/vttablet/tabletserver/repltracker/reader.go b/go/vt/vttablet/tabletserver/repltracker/reader.go index 694778d1119..804c8ed3f60 100644 --- a/go/vt/vttablet/tabletserver/repltracker/reader.go +++ b/go/vt/vttablet/tabletserver/repltracker/reader.go @@ -61,6 +61,7 @@ type heartbeatReader struct { lagMu sync.Mutex lastKnownLag time.Duration + lastKnownTime time.Time lastKnownError error } @@ -106,6 +107,7 @@ func (r *heartbeatReader) Open() { r.pool.Open(r.env.Config().DB.AppWithDB(), r.env.Config().DB.DbaWithDB(), r.env.Config().DB.AppDebugWithDB()) r.ticks.Start(func() { r.readHeartbeat() }) r.isOpen = true + r.lastKnownTime = r.now() } // Close cancels the watchHeartbeat periodic ticker and closes the db pool. @@ -131,6 +133,12 @@ func (r *heartbeatReader) Close() { func (r *heartbeatReader) Status() (time.Duration, error) { r.lagMu.Lock() defer r.lagMu.Unlock() + + // Return an error if we didn't receive a heartbeat for more than two seconds + if r.now().Sub(r.lastKnownTime) > 2*r.interval { + return 0, fmt.Errorf("no heartbeat received in over 2x the heartbeat interval") + } + if r.lastKnownError != nil { return 0, r.lastKnownError } @@ -163,6 +171,7 @@ func (r *heartbeatReader) readHeartbeat() { reads.Add(1) r.lagMu.Lock() + r.lastKnownTime = r.now() r.lastKnownLag = lag r.lastKnownError = nil r.lagMu.Unlock()