Skip to content

Commit 68b168a

Browse files
fixes config drift metric persistence
1 parent 78cd089 commit 68b168a

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

pkg/daemon/daemon.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,31 @@ func (dn *Daemon) getCurrentConfigFromNode() (*onDiskConfig, error) {
13031303
}
13041304

13051305
func (dn *Daemon) startConfigDriftMonitor() {
1306-
mcdConfigDrift.Set(0)
1306+
// Initialize the config drift metric based on the node's current state.
1307+
// If the node is Degraded due to config drift, set the metric to current time to indicate ongoing drift. Otherwise, clear the metric.
1308+
state, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonStateAnnotationKey, true)
1309+
if err != nil {
1310+
klog.Warningf("Could not get node state when initializing config drift metric: %v", err)
1311+
mcdConfigDrift.Set(0)
1312+
} else if state == constants.MachineConfigDaemonStateDegraded {
1313+
// Check if degraded due to config drift by examining the reason annotation
1314+
reason, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonReasonAnnotationKey, true)
1315+
if err != nil {
1316+
klog.Warningf("Could not get node reason when initializing config drift metric: %v", err)
1317+
mcdConfigDrift.Set(0)
1318+
} else if strings.Contains(reason, "content mismatch") || strings.Contains(reason, "mode mismatch") {
1319+
// Node is degraded due to config drift - set metric to indicate ongoing drift
1320+
mcdConfigDrift.SetToCurrentTime()
1321+
klog.Infof("Config drift metric initialized: node is degraded due to config drift")
1322+
} else {
1323+
// Node is degraded but not due to config drift
1324+
mcdConfigDrift.Set(0)
1325+
}
1326+
} else {
1327+
// Node is not degraded - clear the metric
1328+
mcdConfigDrift.Set(0)
1329+
}
1330+
13071331
// Even though the Config Drift Monitor object ensures that only a single
13081332
// Config Drift Watcher is running at any given time, other things, such as
13091333
// emitting Kube events on startup, should only occur if we weren't
@@ -1983,6 +2007,9 @@ func (dn *Daemon) checkStateOnFirstRun() error {
19832007

19842008
if err := dn.validateOnDiskStateOrImage(state.currentConfig, state.currentImage); err != nil {
19852009
dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", err.Error())
2010+
// Start the config drift monitor even when there's pre-existing drift
2011+
// so the metric gets initialized correctly on MCD restart
2012+
dn.startConfigDriftMonitor()
19862013
return err
19872014
}
19882015

0 commit comments

Comments
 (0)