From 813a86ac88d09c3e88f0b50eda4a64a61ad97812 Mon Sep 17 00:00:00 2001 From: oraz Date: Wed, 13 Mar 2024 17:35:08 +0200 Subject: [PATCH] Emit events after CR update is successful No need for EventReasonEvictingPods event as CR status has better information of maintenance progress indication, and emit an events only after CR update is successful. Signed-off-by: oraz --- controllers/nodemaintenance_controller.go | 20 ++++++++++--------- .../nodemaintenance_controller_test.go | 2 +- pkg/utils/events.go | 12 +++++------ test/e2e/node_maintenance_test.go | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/controllers/nodemaintenance_controller.go b/controllers/nodemaintenance_controller.go index 2fb8ed0a..a81117e5 100644 --- a/controllers/nodemaintenance_controller.go +++ b/controllers/nodemaintenance_controller.go @@ -50,6 +50,7 @@ const ( maxAllowedErrorToUpdateOwnedLease = 3 waitDurationOnDrainError = 5 * time.Second FixedDurationReconcileLog = "Reconciling with fixed duration" + nodeNotFoundError = "nodes \"%s\" not found" //lease consts LeaseHolderIdentity = "node-maintenance" @@ -122,16 +123,16 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ drainer, err := createDrainer(ctx, r.MgrConfig) if err != nil { return emptyResult, err - // Add finalizer when object is created } if !controllerutil.ContainsFinalizer(nm, v1beta1.NodeMaintenanceFinalizer) && nm.ObjectMeta.DeletionTimestamp.IsZero() { + // Add finalizer when object is created controllerutil.AddFinalizer(nm, v1beta1.NodeMaintenanceFinalizer) - // begin maintenance on adding finalizer - utils.NormalEvent(r.Recorder, nm, utils.EventReasonBeginMaintenance, utils.EventMessageBeginMaintenance) if err := r.Client.Update(ctx, nm); err != nil { return r.onReconcileError(ctx, nm, drainer, err) } + // begin maintenance on adding finalizer + utils.NormalEvent(r.Recorder, nm, utils.EventReasonBeginMaintenance, utils.EventMessageBeginMaintenance) } else if controllerutil.ContainsFinalizer(nm, v1beta1.NodeMaintenanceFinalizer) && !nm.ObjectMeta.DeletionTimestamp.IsZero() { // The object is being deleted r.logger.Info("Deletion timestamp not zero") @@ -146,11 +147,11 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Remove finalizer controllerutil.RemoveFinalizer(nm, v1beta1.NodeMaintenanceFinalizer) - // end maintenance on removing finalizer, taints, and node is already uncordoned - utils.NormalEvent(r.Recorder, nm, utils.EventReasonRemovedMaintenance, utils.EventMessageRemovedMaintenance) if err := r.Client.Update(ctx, nm); err != nil { return r.onReconcileError(ctx, nm, drainer, err) } + // end maintenance on removing finalizer, taints, and node is already uncordoned + utils.NormalEvent(r.Recorder, nm, utils.EventReasonRemovedMaintenance, utils.EventMessageRemovedMaintenance) return emptyResult, nil } @@ -203,7 +204,6 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ if nm.Status.Phase != v1beta1.MaintenanceRunning || nm.Status.ErrorOnLeaseCount != 0 { nm.Status.Phase = v1beta1.MaintenanceRunning // Another chance to evict pods - clear ErrorOnLeaseCount and try again to put the node under maintenance - utils.NormalEvent(r.Recorder, nm, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods) nm.Status.ErrorOnLeaseCount = 0 } @@ -227,8 +227,6 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ if err = drain.RunNodeDrain(drainer, nodeName); err != nil { r.logger.Info("Not all pods evicted", "nodeName", nodeName, "error", err) - // maintenance in progress - some pods haven't been evicted - utils.NormalEvent(r.Recorder, nm, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods) waitOnReconcile := waitDurationOnDrainError return r.onReconcileErrorWithRequeue(ctx, nm, drainer, err, &waitOnReconcile) } else if nm.Status.Phase != v1beta1.MaintenanceSucceeded { @@ -380,7 +378,7 @@ func (r *NodeMaintenanceReconciler) stopNodeMaintenanceImp(ctx context.Context, return err } - // end maintenance on removing finalizer - taints have been removed and node was uncordoned + // stop maintenance - remove the added taints and uncordon the node utils.NormalEvent(r.Recorder, node, utils.EventReasonUncordonNode, utils.EventMessageUncordonNode) if err := r.LeaseManager.InvalidateLease(ctx, node); err != nil { return err @@ -461,6 +459,10 @@ func (r *NodeMaintenanceReconciler) onReconcileErrorWithRequeue(ctx context.Cont if updateErr != nil { r.logger.Error(updateErr, "Failed to update NodeMaintenance with \"Failed\" status") } + if nm.Spec.NodeName != "" && err.Error() == fmt.Sprintf(nodeNotFoundError, nm.Spec.NodeName) { + // don't return an error in case of a missing node, as it won't be found in the future. + return ctrl.Result{}, nil + } if duration != nil { r.logger.Info(FixedDurationReconcileLog) return ctrl.Result{RequeueAfter: *duration}, nil diff --git a/controllers/nodemaintenance_controller_test.go b/controllers/nodemaintenance_controller_test.go index ec4737cf..d6f3037d 100644 --- a/controllers/nodemaintenance_controller_test.go +++ b/controllers/nodemaintenance_controller_test.go @@ -250,7 +250,7 @@ var _ = Describe("Node Maintenance", func() { Expect(maintenance.Status.Phase).To(Equal(v1beta1.MaintenanceFailed)) verifyEvent(corev1.EventTypeNormal, utils.EventReasonBeginMaintenance, utils.EventMessageBeginMaintenance) verifyEvent(corev1.EventTypeWarning, utils.EventReasonFailedMaintenance, utils.EventMessageFailedMaintenance) - verifyNoEvent(corev1.EventTypeNormal, utils.EventReasonEvictingPods, utils.EventMessageEvictingPods) + verifyNoEvent(corev1.EventTypeNormal, utils.EventReasonSucceedMaintenance, utils.EventMessageSucceedMaintenance) }) }) }) diff --git a/pkg/utils/events.go b/pkg/utils/events.go index 37966a77..2bdf168d 100644 --- a/pkg/utils/events.go +++ b/pkg/utils/events.go @@ -9,19 +9,17 @@ import ( const ( // events reasons EventReasonBeginMaintenance = "BeginMaintenance" - EventReasonEvictingPods = "EvictingPods" EventReasonFailedMaintenance = "FailedMaintenance" EventReasonSucceedMaintenance = "SucceedMaintenance" EventReasonUncordonNode = "UncordonNode" EventReasonRemovedMaintenance = "RemovedMaintenance" // events messages - EventMessageBeginMaintenance = "Begin maintenance" - EventMessageEvictingPods = "Evicting pods" - EventMessageFailedMaintenance = "Failed maintenance" - EventMessageSucceedMaintenance = "Node maintenance was succeed" - EventMessageUncordonNode = "Uncordon node" - EventMessageRemovedMaintenance = "Removed maintenance" + EventMessageBeginMaintenance = "Begin a node maintenance" + EventMessageFailedMaintenance = "Failed a node maintenance" + EventMessageSucceedMaintenance = "Node maintenance was succeeded" + EventMessageUncordonNode = "Uncordon a node" + EventMessageRemovedMaintenance = "Removed a node maintenance" ) // NormalEvent will record an event with type Normal and fixed message. diff --git a/test/e2e/node_maintenance_test.go b/test/e2e/node_maintenance_test.go index baef613a..f0eb76c8 100644 --- a/test/e2e/node_maintenance_test.go +++ b/test/e2e/node_maintenance_test.go @@ -145,8 +145,8 @@ var _ = Describe("Starting Maintenance", func() { var ( maintenanceNodeName string - nodeMaintenance *nmo.NodeMaintenance - startTime time.Time + nodeMaintenance *nmo.NodeMaintenance + startTime time.Time ) BeforeEach(func() {