Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix orphaned CR handling #360

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions controllers/nodehealthcheck_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func (r *NodeHealthCheckReconciler) Reconcile(ctx context.Context, req ctrl.Requ
defer func() {
patchErr := r.patchStatus(ctx, log, nhc, nhcOrig)
if patchErr != nil {
log.Error(err, "failed to update status")
log.Error(patchErr, "failed to update status")
}
returnErr = utilerrors.NewAggregate([]error{patchErr, returnErr})
log.Info("reconcile end", "error", returnErr, "requeue", result.Requeue, "requeuAfter", result.RequeueAfter)
Expand Down Expand Up @@ -465,12 +465,14 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
}

// check conditions
// for some remediators (e.g. MDR) node deletion is expected. If so, wait until they are succeeded.
// for all other remediators, we can delete the CRs immediately after node deletion
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
permanentNodeDeletionExpected := permanentNodeDeletionExpectedCondition != nil && permanentNodeDeletionExpectedCondition.Status == metav1.ConditionTrue
succeededCondition := getCondition(&cr, commonconditions.SucceededType, log)
succeeded := succeededCondition != nil && succeededCondition.Status == metav1.ConditionTrue
if !permanentNodeDeletionExpected || !succeeded {
// no node name change expected, or not succeeded yet
if permanentNodeDeletionExpected && !succeeded {
// node deletion is expected, but remediation not succeeded yet
return false
}

Expand Down Expand Up @@ -504,13 +506,10 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
resources.UpdateStatusNodeHealthy(nodeName, nhc)

if deleted, err := rm.DeleteRemediationCR(&cr, nhc); err != nil {
log.Error(err, "failed to delete remediation CR", "name", cr.GetName())
log.Error(err, "failed to delete orphaned remediation CR", "name", cr.GetName())
return err
} else if deleted {
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
log.Info("deleted orphaned remediation CR", "name", cr.GetName(),
"reason", permanentNodeDeletionExpectedCondition.Reason,
"message", permanentNodeDeletionExpectedCondition.Message)
log.Info("deleted orphaned remediation CR", "name", cr.GetName(), "for deleted node", nodeName)
}

}
Expand Down
33 changes: 24 additions & 9 deletions controllers/nodehealthcheck_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ var _ = Describe("Node Health Check CR", func() {
Expect(k8sClient.Update(context.Background(), &item)).To(Succeed())
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(&item), &item)).To(Succeed())
}
Expect(k8sClient.Delete(context.Background(), &item)).To(Succeed())
err := k8sClient.Delete(context.Background(), &item)
if err != nil {
Expect(errors.IsNotFound(err)).To(BeTrue())
}
}
}

Expand Down Expand Up @@ -1288,15 +1291,15 @@ var _ = Describe("Node Health Check CR", func() {
Expect(k8sClient.Delete(context.Background(), node))
}

markCR := func() *unstructured.Unstructured {
By("marking CR as succeeded and permanent node deletion expected")
markCR := func(succeededStatus metav1.ConditionStatus) *unstructured.Unstructured {
By("marking CR for permanent node deletion expected")
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
Expect(cr).ToNot(BeNil())

conditions := []interface{}{
map[string]interface{}{
"type": commonconditions.SucceededType,
"status": "True",
"status": string(succeededStatus),
"lastTransitionTime": time.Now().Format(time.RFC3339),
},
map[string]interface{}{
Expand All @@ -1322,18 +1325,30 @@ var _ = Describe("Node Health Check CR", func() {
Expect(underTest.Status.UnhealthyNodes).To(BeEmpty())
}

It("it should delete orphaned CR when CR was updated", func() {
ensureCRNotDeleted := func(cr *unstructured.Unstructured) {
By("ensuring CR is not deleted")
Consistently(func() error {
return k8sClient.Get(context.Background(), client.ObjectKeyFromObject(cr), cr)
}, "10s", "1s").ShouldNot(HaveOccurred())

// get updated NHC
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(underTest), underTest)).To(Succeed())
Expect(underTest.Status.UnhealthyNodes).ToNot(BeEmpty())
}

It("it should delete orphaned CR when CR with expected node deletion succeeded", func() {
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
cr := markCR(metav1.ConditionFalse)
deleteNode()
time.Sleep(1 * time.Second)
cr := markCR()
ensureCRNotDeleted(cr)
cr = markCR(metav1.ConditionTrue)
expectCRDeletion(cr)
})

It("it should delete orphaned CR when node is deleted", func() {
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
cr := markCR()
time.Sleep(1 * time.Second)
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
Expect(cr).ToNot(BeNil())
deleteNode()
expectCRDeletion(cr)
})
Expand Down
Loading