Skip to content

Commit dfdb37d

Browse files
fixes config drift metric persistence
1 parent 78cd089 commit dfdb37d

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

pkg/daemon/daemon.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,34 @@ func (dn *Daemon) getCurrentConfigFromNode() (*onDiskConfig, error) {
13031303
}
13041304

13051305
func (dn *Daemon) startConfigDriftMonitor() {
1306-
mcdConfigDrift.Set(0)
1306+
// Initialize the config drift metric based on the node's current state.
1307+
// If the node is Degraded due to config drift, set the metric to current time
1308+
// to indicate ongoing drift. Otherwise, clear the metric.
1309+
// This ensures the metric persists across MCD pod restarts.
1310+
// See: https://issues.redhat.com/browse/OCPBUGS-XXXXX
1311+
state, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonStateAnnotationKey, true)
1312+
if err != nil {
1313+
klog.Warningf("Could not get node state when initializing config drift metric: %v", err)
1314+
mcdConfigDrift.Set(0)
1315+
} else if state == constants.MachineConfigDaemonStateDegraded {
1316+
// Check if degraded due to config drift by examining the reason annotation
1317+
reason, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonReasonAnnotationKey, true)
1318+
if err != nil {
1319+
klog.Warningf("Could not get node reason when initializing config drift metric: %v", err)
1320+
mcdConfigDrift.Set(0)
1321+
} else if strings.Contains(reason, "content mismatch") || strings.Contains(reason, "mode mismatch") {
1322+
// Node is degraded due to config drift - set metric to indicate ongoing drift
1323+
mcdConfigDrift.SetToCurrentTime()
1324+
klog.Infof("Config drift metric initialized: node is degraded due to config drift")
1325+
} else {
1326+
// Node is degraded but not due to config drift
1327+
mcdConfigDrift.Set(0)
1328+
}
1329+
} else {
1330+
// Node is not degraded - clear the metric
1331+
mcdConfigDrift.Set(0)
1332+
}
1333+
13071334
// Even though the Config Drift Monitor object ensures that only a single
13081335
// Config Drift Watcher is running at any given time, other things, such as
13091336
// emitting Kube events on startup, should only occur if we weren't

0 commit comments

Comments
 (0)