Skip to content

Commit

Permalink
Merge pull request #360 from slintes/fix-orphaned
Browse files Browse the repository at this point in the history
Fix orphaned CR handling
  • Loading branch information
openshift-merge-bot[bot] authored Jan 9, 2025
2 parents 7ad40e3 + 0cfed33 commit a4fff24
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 17 deletions.
15 changes: 7 additions & 8 deletions controllers/nodehealthcheck_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func (r *NodeHealthCheckReconciler) Reconcile(ctx context.Context, req ctrl.Requ
defer func() {
patchErr := r.patchStatus(ctx, log, nhc, nhcOrig)
if patchErr != nil {
log.Error(err, "failed to update status")
log.Error(patchErr, "failed to update status")
}
returnErr = utilerrors.NewAggregate([]error{patchErr, returnErr})
log.Info("reconcile end", "error", returnErr, "requeue", result.Requeue, "requeuAfter", result.RequeueAfter)
Expand Down Expand Up @@ -465,12 +465,14 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
}

// check conditions
// for some remediators (e.g. MDR) node deletion is expected. If so, wait until they are succeeded.
// for all other remediators, we can delete the CRs immediately after node deletion
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
permanentNodeDeletionExpected := permanentNodeDeletionExpectedCondition != nil && permanentNodeDeletionExpectedCondition.Status == metav1.ConditionTrue
succeededCondition := getCondition(&cr, commonconditions.SucceededType, log)
succeeded := succeededCondition != nil && succeededCondition.Status == metav1.ConditionTrue
if !permanentNodeDeletionExpected || !succeeded {
// no node name change expected, or not succeeded yet
if permanentNodeDeletionExpected && !succeeded {
// node deletion is expected, but remediation not succeeded yet
return false
}

Expand Down Expand Up @@ -504,13 +506,10 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
resources.UpdateStatusNodeHealthy(nodeName, nhc)

if deleted, err := rm.DeleteRemediationCR(&cr, nhc); err != nil {
log.Error(err, "failed to delete remediation CR", "name", cr.GetName())
log.Error(err, "failed to delete orphaned remediation CR", "name", cr.GetName())
return err
} else if deleted {
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
log.Info("deleted orphaned remediation CR", "name", cr.GetName(),
"reason", permanentNodeDeletionExpectedCondition.Reason,
"message", permanentNodeDeletionExpectedCondition.Message)
log.Info("deleted orphaned remediation CR", "name", cr.GetName(), "for deleted node", nodeName)
}

}
Expand Down
33 changes: 24 additions & 9 deletions controllers/nodehealthcheck_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ var _ = Describe("Node Health Check CR", func() {
Expect(k8sClient.Update(context.Background(), &item)).To(Succeed())
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(&item), &item)).To(Succeed())
}
Expect(k8sClient.Delete(context.Background(), &item)).To(Succeed())
err := k8sClient.Delete(context.Background(), &item)
if err != nil {
Expect(errors.IsNotFound(err)).To(BeTrue())
}
}
}

Expand Down Expand Up @@ -1288,15 +1291,15 @@ var _ = Describe("Node Health Check CR", func() {
Expect(k8sClient.Delete(context.Background(), node))
}

markCR := func() *unstructured.Unstructured {
By("marking CR as succeeded and permanent node deletion expected")
markCR := func(succeededStatus metav1.ConditionStatus) *unstructured.Unstructured {
By("marking CR for permanent node deletion expected")
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
Expect(cr).ToNot(BeNil())

conditions := []interface{}{
map[string]interface{}{
"type": commonconditions.SucceededType,
"status": "True",
"status": string(succeededStatus),
"lastTransitionTime": time.Now().Format(time.RFC3339),
},
map[string]interface{}{
Expand All @@ -1322,18 +1325,30 @@ var _ = Describe("Node Health Check CR", func() {
Expect(underTest.Status.UnhealthyNodes).To(BeEmpty())
}

It("it should delete orphaned CR when CR was updated", func() {
ensureCRNotDeleted := func(cr *unstructured.Unstructured) {
By("ensuring CR is not deleted")
Consistently(func() error {
return k8sClient.Get(context.Background(), client.ObjectKeyFromObject(cr), cr)
}, "10s", "1s").ShouldNot(HaveOccurred())

// get updated NHC
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(underTest), underTest)).To(Succeed())
Expect(underTest.Status.UnhealthyNodes).ToNot(BeEmpty())
}

It("it should delete orphaned CR when CR with expected node deletion succeeded", func() {
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
cr := markCR(metav1.ConditionFalse)
deleteNode()
time.Sleep(1 * time.Second)
cr := markCR()
ensureCRNotDeleted(cr)
cr = markCR(metav1.ConditionTrue)
expectCRDeletion(cr)
})

It("it should delete orphaned CR when node is deleted", func() {
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
cr := markCR()
time.Sleep(1 * time.Second)
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
Expect(cr).ToNot(BeNil())
deleteNode()
expectCRDeletion(cr)
})
Expand Down

0 comments on commit a4fff24

Please sign in to comment.