Skip to content

Commit ebcb209

Browse files
fixes config drift metric persistence
1 parent 1881dab commit ebcb209

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

pkg/daemon/daemon.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1523,7 +1523,31 @@ func (dn *Daemon) getCurrentConfigFromNode() (*onDiskConfig, error) {
15231523
}
15241524

15251525
func (dn *Daemon) startConfigDriftMonitor() {
1526-
mcdConfigDrift.Set(0)
1526+
// Initialize the config drift metric based on the node's current state.
1527+
// If the node is Degraded due to config drift, set the metric to current time to indicate ongoing drift. Otherwise, clear the metric.
1528+
state, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonStateAnnotationKey, true)
1529+
if err != nil {
1530+
klog.Warningf("Could not get node state when initializing config drift metric: %v", err)
1531+
mcdConfigDrift.Set(0)
1532+
} else if state == constants.MachineConfigDaemonStateDegraded {
1533+
// Check if degraded due to config drift by examining the reason annotation
1534+
reason, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonReasonAnnotationKey, true)
1535+
if err != nil {
1536+
klog.Warningf("Could not get node reason when initializing config drift metric: %v", err)
1537+
mcdConfigDrift.Set(0)
1538+
} else if strings.Contains(reason, "content mismatch") || strings.Contains(reason, "mode mismatch") {
1539+
// Node is degraded due to config drift - set metric to indicate ongoing drift
1540+
mcdConfigDrift.SetToCurrentTime()
1541+
klog.Infof("Config drift metric initialized: node is degraded due to config drift")
1542+
} else {
1543+
// Node is degraded but not due to config drift
1544+
mcdConfigDrift.Set(0)
1545+
}
1546+
} else {
1547+
// Node is not degraded - clear the metric
1548+
mcdConfigDrift.Set(0)
1549+
}
1550+
15271551
// Even though the Config Drift Monitor object ensures that only a single
15281552
// Config Drift Watcher is running at any given time, other things, such as
15291553
// emitting Kube events on startup, should only occur if we weren't
@@ -2291,6 +2315,9 @@ func (dn *Daemon) checkStateOnFirstRun() error {
22912315

22922316
if err := dn.validateOnDiskStateOrImage(state.currentConfig, state.currentImage); err != nil {
22932317
dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", err.Error())
2318+
// Start the config drift monitor even when there's pre-existing drift
2319+
// so the metric gets initialized correctly on MCD restart
2320+
dn.startConfigDriftMonitor()
22942321
return err
22952322
}
22962323

0 commit comments

Comments
 (0)