Skip to content

Commit d545c84

Browse files
committed
Remove finalizer on NHC Timeout deleted FAR CR
Create three function to reconcile logic into smaller usable functions: remove FAR CR finalizer on deletion, remove FAR taint on deletion, and stop agent execution when NHC time out annotation exist or FAR CR was deleted
1 parent ce57ab4 commit d545c84

File tree

1 file changed

+73
-49
lines changed

1 file changed

+73
-49
lines changed

controllers/fenceagentsremediation_controller.go

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
commonEvents "github.com/medik8s/common/pkg/events"
2929
commonResources "github.com/medik8s/common/pkg/resources"
3030

31+
corev1 "k8s.io/api/core/v1"
3132
apiErrors "k8s.io/apimachinery/pkg/api/errors"
3233
"k8s.io/apimachinery/pkg/api/meta"
3334
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -132,13 +133,22 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
132133
return emptyResult, err
133134
}
134135

135-
// Check NHC timeout annotation
136+
// Check NHC timeout annotation and stop the agent, since remediation is no longer relevant (most likely because fixed by a different remediator)
136137
if isTimedOutByNHC(far) {
137138
r.Log.Info(utils.EventMessageRemediationStoppedByNHC)
138-
r.Executor.Remove(far.GetUID())
139+
r.stopAgentAndGetCrStatus(far, node.Name)
139140
utils.UpdateConditions(utils.RemediationInterruptedByNHC, far, r.Log)
140141
commonEvents.RemediationStoppedByNHC(r.Recorder, far)
141-
return emptyResult, err
142+
143+
if far.GetDeletionTimestamp() != nil {
144+
if res, err := r.removeFarTaints(far, node); res == emptyResult && err != nil {
145+
return res, err
146+
}
147+
// Removing FAR CR finalizer so NHC deletion of the remediation can be completed
148+
r.Log.Info("Removing finalizer of timed-out remediation deleted by NHC", "remediation name", far.GetName())
149+
return emptyResult, r.removeFarFinalizer(far)
150+
}
151+
return emptyResult, nil
142152
}
143153

144154
// Add finalizer when the CR is created
@@ -158,54 +168,12 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
158168
r.Log.Info("CR's deletion timestamp is not zero, and FAR finalizer exists", "CR Name", req.Name)
159169

160170
if !meta.IsStatusConditionPresentAndEqual(far.Status.Conditions, commonConditions.SucceededType, metav1.ConditionTrue) {
161-
processingCondition := meta.FindStatusCondition(far.Status.Conditions, commonConditions.ProcessingType).Status
162-
fenceAgentActionSucceededCondition := meta.FindStatusCondition(far.Status.Conditions, utils.FenceAgentActionSucceededType).Status
163-
succeededCondition := meta.FindStatusCondition(far.Status.Conditions, commonConditions.SucceededType).Status
164-
r.Log.Info("FAR didn't finish remediate the node ", "CR Name", req.Name, "processing condition", processingCondition,
165-
"fenceAgentActionSucceeded condition", fenceAgentActionSucceededCondition, "succeeded condition", succeededCondition)
166-
r.Executor.Remove(far.GetUID())
171+
r.stopAgentAndGetCrStatus(far, node.Name)
167172
}
168-
169-
// remove out-of-service taint when using OutOfServiceTaint remediation
170-
if far.Spec.RemediationStrategy == v1alpha1.OutOfServiceTaintRemediationStrategy {
171-
r.Log.Info("Removing out-of-service taint", "Fence Agent", far.Spec.Agent, "Node Name", node.Name)
172-
taint := utils.CreateOutOfServiceTaint()
173-
if err := utils.RemoveTaint(r.Client, node.Name, taint); err != nil {
174-
if apiErrors.IsConflict(err) {
175-
r.Log.Error(err, "Failed to remove taint from node due to node update, retrying... ,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
176-
return ctrl.Result{RequeueAfter: time.Second}, nil
177-
} else if !apiErrors.IsNotFound(err) {
178-
r.Log.Error(err, "Failed to remove taint from node,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
179-
return emptyResult, err
180-
}
181-
}
182-
r.Log.Info("out-of-service taint was removed", "Node Name", req.Name)
183-
commonEvents.NormalEvent(r.Recorder, node, utils.EventReasonRemoveOutOfServiceTaint, utils.EventMessageRemoveOutOfServiceTaint)
173+
if res, err := r.removeFarTaints(far, node); res == emptyResult && err != nil {
174+
return res, err
184175
}
185-
186-
// remove node's taints
187-
taint := utils.CreateRemediationTaint()
188-
if err := utils.RemoveTaint(r.Client, node.Name, taint); err != nil {
189-
if apiErrors.IsConflict(err) {
190-
r.Log.Info("Failed to remove taint from node due to node update, retrying... ,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
191-
return ctrl.Result{RequeueAfter: time.Second}, nil
192-
193-
} else if !apiErrors.IsNotFound(err) {
194-
r.Log.Error(err, "Failed to remove taint from node,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
195-
return emptyResult, err
196-
}
197-
}
198-
199-
r.Log.Info("FAR remediation taint was removed", "Node Name", node.Name)
200-
commonEvents.NormalEvent(r.Recorder, node, utils.EventReasonRemoveRemediationTaint, utils.EventMessageRemoveRemediationTaint)
201-
// remove finalizer
202-
controllerutil.RemoveFinalizer(far, v1alpha1.FARFinalizer)
203-
if err := r.Client.Update(context.Background(), far); err != nil {
204-
return emptyResult, fmt.Errorf("failed to remove finalizer from CR - %w", err)
205-
}
206-
r.Log.Info("Finalizer was removed", "CR Name", req.Name)
207-
commonEvents.NormalEvent(r.Recorder, far, utils.EventReasonRemoveFinalizer, utils.EventMessageRemoveFinalizer)
208-
return emptyResult, nil
176+
return emptyResult, r.removeFarFinalizer(far)
209177
}
210178
// Add FAR (medik8s) remediation taint
211179
taintAdded, err := utils.AppendTaint(r.Client, node.Name, utils.CreateRemediationTaint())
@@ -282,6 +250,62 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct
282250

283251
return emptyResult, nil
284252
}
253+
func (r *FenceAgentsRemediationReconciler) removeFarTaints(far *v1alpha1.FenceAgentsRemediation, node *corev1.Node) (ctrl.Result, error) {
254+
emptyResult := ctrl.Result{}
255+
// remove out-of-service taint when using OutOfServiceTaint remediation
256+
if far.Spec.RemediationStrategy == v1alpha1.OutOfServiceTaintRemediationStrategy {
257+
r.Log.Info("Removing out-of-service taint", "Fence Agent", far.Spec.Agent, "Node Name", node.Name)
258+
taint := utils.CreateOutOfServiceTaint()
259+
if err := utils.RemoveTaint(r.Client, node.Name, taint); err != nil {
260+
if apiErrors.IsConflict(err) {
261+
r.Log.Error(err, "Failed to remove taint from node due to node update, retrying... ,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
262+
return ctrl.Result{RequeueAfter: time.Second}, nil
263+
} else if !apiErrors.IsNotFound(err) {
264+
r.Log.Error(err, "Failed to remove taint from node,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
265+
return emptyResult, err
266+
}
267+
}
268+
r.Log.Info("out-of-service taint was removed", "Node Name", node.Name)
269+
commonEvents.NormalEvent(r.Recorder, node, utils.EventReasonRemoveOutOfServiceTaint, utils.EventMessageRemoveOutOfServiceTaint)
270+
}
271+
272+
// remove node's taints
273+
taint := utils.CreateRemediationTaint()
274+
if err := utils.RemoveTaint(r.Client, node.Name, taint); err != nil {
275+
if apiErrors.IsConflict(err) {
276+
r.Log.Info("Failed to remove taint from node due to node update, retrying... ,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
277+
return ctrl.Result{RequeueAfter: time.Second}, nil
278+
279+
} else if !apiErrors.IsNotFound(err) {
280+
r.Log.Error(err, "Failed to remove taint from node,", "node name", node.Name, "taint key", taint.Key, "taint effect", taint.Effect)
281+
return emptyResult, err
282+
}
283+
}
284+
285+
r.Log.Info("FAR remediation taint was removed", "Node Name", node.Name)
286+
commonEvents.NormalEvent(r.Recorder, node, utils.EventReasonRemoveRemediationTaint, utils.EventMessageRemoveRemediationTaint)
287+
return emptyResult, nil
288+
}
289+
290+
func (r *FenceAgentsRemediationReconciler) stopAgentAndGetCrStatus(far *v1alpha1.FenceAgentsRemediation, nodeName string) {
291+
processingCondition := meta.FindStatusCondition(far.Status.Conditions, commonConditions.ProcessingType).Status
292+
fenceAgentActionSucceededCondition := meta.FindStatusCondition(far.Status.Conditions, utils.FenceAgentActionSucceededType).Status
293+
succeededCondition := meta.FindStatusCondition(far.Status.Conditions, commonConditions.SucceededType).Status
294+
r.Log.Info("FAR didn't finish to remediate the node", "Node Name", nodeName, "Agent Name", far.Spec.Agent, "processing condition", processingCondition,
295+
"fenceAgentActionSucceeded condition", fenceAgentActionSucceededCondition, "succeeded condition", succeededCondition)
296+
r.Executor.Remove(far.GetUID())
297+
}
298+
299+
// removeFarFinalizer removes FAR finalizer, update CR, and emits an event
300+
func (r *FenceAgentsRemediationReconciler) removeFarFinalizer(far *v1alpha1.FenceAgentsRemediation) error {
301+
controllerutil.RemoveFinalizer(far, v1alpha1.FARFinalizer)
302+
if err := r.Client.Update(context.Background(), far); err != nil {
303+
return fmt.Errorf("failed to remove finalizer from CR - %w", err)
304+
}
305+
r.Log.Info("Finalizer was removed", "CR Name", far.Name)
306+
commonEvents.NormalEvent(r.Recorder, far, utils.EventReasonRemoveFinalizer, utils.EventMessageRemoveFinalizer)
307+
return nil
308+
}
285309

286310
// isTimedOutByNHC checks if NHC set a timeout annotation on the CR
287311
func isTimedOutByNHC(far *v1alpha1.FenceAgentsRemediation) bool {

0 commit comments

Comments
 (0)