Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/machine-config-controller/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ func createControllers(ctx *ctrlcommon.ControllerContext) []ctrlcommon.Controlle
ctx.OCLInformerFactory.Machineconfiguration().V1().MachineOSBuilds(),
ctx.InformerFactory.Machineconfiguration().V1().MachineConfigNodes(),
ctx.ConfigInformerFactory.Config().V1().Schedulers(),
ctx.OperatorInformerFactory.Operator().V1().MachineConfigurations(),
ctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
ctx.FeatureGatesHandler,
Expand Down
13 changes: 12 additions & 1 deletion install/0000_90_machine-config_01_prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,18 @@ spec:
annotations:
summary: "Triggers when nodes in a pool have overlapping labels such as master, worker, and a custom label therefore a choice must be made as to which is honored."
description: "Node {{ $labels.exported_node }} has triggered a pool alert due to a label change. For more details check MachineConfigController pod logs: oc logs -f -n {{ $labels.namespace }} machine-config-controller-xxxxx -c machine-config-controller"
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigControllerPoolAlert.md
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/machine-config-operator/MachineConfigControllerPoolAlert.md
- name: mcc-boot-image-skew-enforcement-none
rules:
- alert: MCCBootImageSkewEnforcementNone
expr: |
mcc_boot_image_skew_enforcement_none == 1
labels:
namespace: openshift-machine-config-operator
severity: info
annotations:
summary: "Boot image skew enforcement is disabled. Scaling operations may not be successful."
description: "Boot image skew enforcement mode is set to None. When scaling up, new nodes may be provisioned with older boot images that could introduce compatibility issues. Consider manually updating boot images to match the cluster version. Please refer to docs at [TODO-INSERTLINK] for additional details."
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
Expand Down
10 changes: 10 additions & 0 deletions pkg/controller/common/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ var (
Help: "state of OS image override",
}, []string{"pool"})

// MCCBootImageSkewEnforcementNone indicates when boot image skew enforcement is disabled.
// Set to 1 when mode is "None", 0 otherwise. A value of 1 indicates scaling operations may
// not be successful
MCCBootImageSkewEnforcementNone = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "mcc_boot_image_skew_enforcement_none",
Help: "Set to 1 when boot image skew enforcement mode is None, indicating scaling may not be successful as bootimages are out of date",
})

// MCCDrainErr logs failed drain
MCCDrainErr = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down Expand Up @@ -94,6 +103,7 @@ func RegisterMCCMetrics() error {
MCCUpdatedMachineCount,
MCCDegradedMachineCount,
MCCUnavailableMachineCount,
MCCBootImageSkewEnforcementNone,
})

if err != nil {
Expand Down
83 changes: 82 additions & 1 deletion pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ import (
configv1 "github.com/openshift/api/config/v1"
features "github.com/openshift/api/features"
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
opv1 "github.com/openshift/api/operator/v1"

cligoinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1"
cligolistersv1 "github.com/openshift/client-go/config/listers/config/v1"
mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
"github.com/openshift/client-go/machineconfiguration/clientset/versioned/scheme"
mcfginformersv1 "github.com/openshift/client-go/machineconfiguration/informers/externalversions/machineconfiguration/v1"
mcopinformersv1 "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
mcoplistersv1 "github.com/openshift/client-go/operator/listers/operator/v1"

mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1"
"github.com/openshift/library-go/pkg/operator/v1helpers"
Expand Down Expand Up @@ -108,6 +111,9 @@ type Controller struct {
schedulerList cligolistersv1.SchedulerLister
schedulerListerSynced cache.InformerSynced

mcopLister mcoplistersv1.MachineConfigurationLister
mcopListerSynced cache.InformerSynced

queue workqueue.TypedRateLimitingInterface[string]

fgHandler ctrlcommon.FeatureGatesHandler
Expand All @@ -127,6 +133,7 @@ func New(
mosbInformer mcfginformersv1.MachineOSBuildInformer,
mcnInformer mcfginformersv1.MachineConfigNodeInformer,
schedulerInformer cligoinformersv1.SchedulerInformer,
mcopInformer mcopinformersv1.MachineConfigurationInformer,
kubeClient clientset.Interface,
mcfgClient mcfgclientset.Interface,
fgHandler ctrlcommon.FeatureGatesHandler,
Expand All @@ -141,6 +148,7 @@ func New(
podInformer,
mcnInformer,
schedulerInformer,
mcopInformer,
kubeClient,
mcfgClient,
defaultUpdateDelay,
Expand All @@ -158,6 +166,7 @@ func NewWithCustomUpdateDelay(
mosbInformer mcfginformersv1.MachineOSBuildInformer,
mcnInformer mcfginformersv1.MachineConfigNodeInformer,
schedulerInformer cligoinformersv1.SchedulerInformer,
mcopInformer mcopinformersv1.MachineConfigurationInformer,
kubeClient clientset.Interface,
mcfgClient mcfgclientset.Interface,
updateDelay time.Duration,
Expand All @@ -173,6 +182,7 @@ func NewWithCustomUpdateDelay(
podInformer,
mcnInformer,
schedulerInformer,
mcopInformer,
kubeClient,
mcfgClient,
updateDelay,
Expand All @@ -191,6 +201,7 @@ func newController(
podInformer coreinformersv1.PodInformer,
mcnInformer mcfginformersv1.MachineConfigNodeInformer,
schedulerInformer cligoinformersv1.SchedulerInformer,
mcopInformer mcopinformersv1.MachineConfigurationInformer,
kubeClient clientset.Interface,
mcfgClient mcfgclientset.Interface,
updateDelay time.Duration,
Expand Down Expand Up @@ -240,6 +251,11 @@ func newController(
UpdateFunc: ctrl.updateMachineConfigNode,
DeleteFunc: ctrl.deleteMachineConfigNode,
})
mcopInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: ctrl.addMachineConfiguration,
UpdateFunc: ctrl.updateMachineConfiguration,
DeleteFunc: ctrl.deleteMachineConfiguration,
})

ctrl.syncHandler = ctrl.syncMachineConfigPool
ctrl.enqueueMachineConfigPool = ctrl.enqueueDefault
Expand All @@ -263,6 +279,9 @@ func newController(
ctrl.schedulerList = schedulerInformer.Lister()
ctrl.schedulerListerSynced = schedulerInformer.Informer().HasSynced

ctrl.mcopLister = mcopInformer.Lister()
ctrl.mcopListerSynced = mcopInformer.Informer().HasSynced

return ctrl
}

Expand All @@ -271,7 +290,7 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer ctrl.queue.ShutDown()

if !cache.WaitForCacheSync(stopCh, ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.mcpListerSynced, ctrl.moscListerSynced, ctrl.mosbListerSynced, ctrl.nodeListerSynced, ctrl.schedulerListerSynced) {
if !cache.WaitForCacheSync(stopCh, ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.mcpListerSynced, ctrl.moscListerSynced, ctrl.mosbListerSynced, ctrl.nodeListerSynced, ctrl.schedulerListerSynced, ctrl.mcopListerSynced) {
return
}

Expand Down Expand Up @@ -1849,3 +1868,65 @@ func (ctrl *Controller) syncMetrics() error {
}
return nil
}

// addMachineConfiguration handles MachineConfiguration add events to update the boot image skew enforcement metric.
func (ctrl *Controller) addMachineConfiguration(obj any) {
if ctrl.fgHandler == nil || !ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) {
return
}

ctrl.syncBootImageSkewEnforcementMetric(obj)
}

// updateMachineConfiguration handles MachineConfiguration update events to update the boot image skew enforcement metric.
// Only takes action if BootImageSkewEnforcementStatus has changed.
func (ctrl *Controller) updateMachineConfiguration(old, cur any) {
if ctrl.fgHandler == nil || !ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) {
return
}

oldMCOP, ok := old.(*opv1.MachineConfiguration)
if !ok {
return
}
curMCOP, ok := cur.(*opv1.MachineConfiguration)
if !ok {
return
}

// Only update metric if BootImageSkewEnforcementStatus mode changed
if oldMCOP.Status.BootImageSkewEnforcementStatus.Mode == curMCOP.Status.BootImageSkewEnforcementStatus.Mode {
return
}

ctrl.syncBootImageSkewEnforcementMetric(cur)
}

// deleteMachineConfiguration handles MachineConfiguration delete events to reset the boot image skew enforcement metric.
func (ctrl *Controller) deleteMachineConfiguration(_ any) {
if ctrl.fgHandler == nil || !ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) {
return
}

// Reset metric to 0 when MachineConfiguration is deleted
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(0)
}

// syncBootImageSkewEnforcementMetric updates the mcc_boot_image_skew_enforcement_none metric
// based on the current BootImageSkewEnforcementStatus mode in MachineConfiguration.
// The metric is set to 1 when mode is "None", indicating that scaling operations may
// not be successful.
func (ctrl *Controller) syncBootImageSkewEnforcementMetric(obj any) {

mcop, ok := obj.(*opv1.MachineConfiguration)
if !ok {
klog.Warningf("Expected MachineConfiguration object, got %T", obj)
return
}

if mcop.Status.BootImageSkewEnforcementStatus.Mode == opv1.BootImageSkewEnforcementModeStatusNone {
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(1)
} else {
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(0)
}
}
7 changes: 6 additions & 1 deletion pkg/controller/node/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import (
configv1informer "github.com/openshift/client-go/config/informers/externalversions"
"github.com/openshift/client-go/machineconfiguration/clientset/versioned/fake"
informers "github.com/openshift/client-go/machineconfiguration/informers/externalversions"
fakeoperatorclient "github.com/openshift/client-go/operator/clientset/versioned/fake"
operatorinformer "github.com/openshift/client-go/operator/informers/externalversions"
"github.com/openshift/machine-config-operator/pkg/constants"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants"
Expand Down Expand Up @@ -102,17 +104,20 @@ func (f *fixture) newControllerWithStopChan(stopCh <-chan struct{}) *Controller
f.client = fake.NewSimpleClientset(f.objects...)
f.kubeclient = k8sfake.NewSimpleClientset(f.kubeobjects...)
f.schedulerClient = fakeconfigv1client.NewSimpleClientset(f.schedulerObjects...)
operatorClient := fakeoperatorclient.NewSimpleClientset()

i := informers.NewSharedInformerFactory(f.client, noResyncPeriodFunc())
k8sI := kubeinformers.NewSharedInformerFactory(f.kubeclient, noResyncPeriodFunc())
ci := configv1informer.NewSharedInformerFactory(f.schedulerClient, noResyncPeriodFunc())
oi := operatorinformer.NewSharedInformerFactory(operatorClient, noResyncPeriodFunc())
c := NewWithCustomUpdateDelay(i.Machineconfiguration().V1().ControllerConfigs(), i.Machineconfiguration().V1().MachineConfigs(), i.Machineconfiguration().V1().MachineConfigPools(), k8sI.Core().V1().Nodes(),
k8sI.Core().V1().Pods(), i.Machineconfiguration().V1().MachineOSConfigs(), i.Machineconfiguration().V1().MachineOSBuilds(), i.Machineconfiguration().V1().MachineConfigNodes(), ci.Config().V1().Schedulers(), f.kubeclient, f.client, time.Millisecond, f.fgHandler)
k8sI.Core().V1().Pods(), i.Machineconfiguration().V1().MachineOSConfigs(), i.Machineconfiguration().V1().MachineOSBuilds(), i.Machineconfiguration().V1().MachineConfigNodes(), ci.Config().V1().Schedulers(), oi.Operator().V1().MachineConfigurations(), f.kubeclient, f.client, time.Millisecond, f.fgHandler)

c.ccListerSynced = alwaysReady
c.mcpListerSynced = alwaysReady
c.nodeListerSynced = alwaysReady
c.schedulerListerSynced = alwaysReady
c.mcopListerSynced = alwaysReady
c.eventRecorder = &record.FakeRecorder{}

i.Start(stopCh)
Expand Down
1 change: 1 addition & 0 deletions test/e2e-bootstrap/bootstrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ func createControllers(ctx *ctrlcommon.ControllerContext) []ctrlcommon.Controlle
ctx.InformerFactory.Machineconfiguration().V1().MachineOSBuilds(),
ctx.InformerFactory.Machineconfiguration().V1().MachineConfigNodes(),
ctx.ConfigInformerFactory.Config().V1().Schedulers(),
ctx.OperatorInformerFactory.Operator().V1().MachineConfigurations(),
ctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
ctx.FeatureGatesHandler,
Expand Down