Skip to content

Commit

Permalink
Merge pull request #96 from clobrano/add_events/0
Browse files Browse the repository at this point in the history
Emit events during remediation process
  • Loading branch information
openshift-merge-bot[bot] authored Jan 9, 2024
2 parents 75b8d43 + 91197ab commit 0ba5428
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 23 deletions.
33 changes: 27 additions & 6 deletions controllers/machinedeletionremediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/go-logr/logr"
commonannotations "github.com/medik8s/common/pkg/annotations"
commonconditions "github.com/medik8s/common/pkg/conditions"
commonevents "github.com/medik8s/common/pkg/events"
"github.com/pkg/errors"

v1 "k8s.io/api/core/v1"
Expand All @@ -34,6 +35,7 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -59,6 +61,7 @@ const (
failedToDeleteMachineError = "failed to delete machine of node name: %s"
nodeNotFoundErrorMsg = "failed to fetch node"
machineNotFoundErrorMsg = "failed to fetch machine of node"
noControllerOwnerErrorMsg = "ignoring remediation of the machine: the machine has no controller owner"
// Cluster Provider messages
machineDeletedOnCloudProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a Cloud cluster provider: the new node is expected to have a new name"
machineDeletedOnBareMetalProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name"
Expand All @@ -74,7 +77,11 @@ const (
remediationSkippedNodeNotFound conditionChangeReason = "RemediationSkippedNodeNotFound"
remediationSkippedMachineNotFound conditionChangeReason = "RemediationSkippedMachineNotFound"
remediationSkippedNoControllerOwner conditionChangeReason = "RemediationSkippedNoControllerOwner"
remediationFailed conditionChangeReason = "remediationFailed"
remediationFailed conditionChangeReason = "RemediationFailed"

// Event reasons and messages
machineDeletionRequestedEventReason = "MachineDeletionRequested"
machineDeletionRequestedEventMessage = "requesting machine deletion"
)

var (
Expand All @@ -86,8 +93,9 @@ var (
// MachineDeletionRemediationReconciler reconciles a MachineDeletionRemediation object
type MachineDeletionRemediationReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
Log logr.Logger
Scheme *runtime.Scheme
Recorder record.EventRecorder
}

//+kubebuilder:rbac:groups=machine-deletion-remediation.medik8s.io,resources=machinedeletionremediations,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -127,9 +135,11 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re
finalResult.RequeueAfter = time.Second
}
}()
commonevents.RemediationStarted(r.Recorder, mdr)

if r.isTimedOutByNHC(mdr) {
log.Info("NHC time out annotation found, stopping remediation")
commonevents.RemediationStoppedByNHC(r.Recorder, mdr)
_, err = r.updateConditions(remediationTimedOutByNhc, mdr)
return ctrl.Result{}, err
}
Expand All @@ -148,10 +158,13 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re
// situation. An error is returned only if it does not match the following custom errors, or
// updateConditions fails.
if err == nodeNotFoundError {
commonevents.WarningEvent(r.Recorder, mdr, string(remediationSkippedNodeNotFound), nodeNotFoundErrorMsg)
_, err = r.updateConditions(remediationSkippedNodeNotFound, mdr)
} else if err == machineNotFoundError {
commonevents.WarningEvent(r.Recorder, mdr, string(remediationSkippedMachineNotFound), machineNotFoundErrorMsg)
_, err = r.updateConditions(remediationSkippedMachineNotFound, mdr)
} else if err == unrecoverableError {
commonevents.WarningEvent(r.Recorder, mdr, string(remediationFailed), unrecoverableError.Error())
_, err = r.updateConditions(remediationFailed, mdr)
}

Expand All @@ -167,13 +180,18 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re
// verify nodes count restoration even if machine == nil.
if machine == nil || machine.GetCreationTimestamp().After(mdr.GetCreationTimestamp().Time) {
if isRestored, err := r.isExpectedNodesNumberRestored(ctx, mdr); err != nil {
log.Error(err, "could not verify if node was restored")
msg := "could not verify if node was restored"
log.Error(err, msg)
commonevents.WarningEvent(r.Recorder, mdr, "unableToVerifyNodesCount", err.Error())
if err == unrecoverableError {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
} else if isRestored {
_, err = r.updateConditions(remediationFinishedMachineDeleted, mdr)
if err == nil {
commonevents.RemediationFinished(r.Recorder, mdr)
}
return ctrl.Result{}, err
}
log.Info("waiting for the nodes count to be re-provisioned")
Expand Down Expand Up @@ -204,6 +222,7 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re

if updateRequired := r.setPermanentNodeDeletionExpectedCondition(status, mdr); updateRequired {
log.Info(permanentNodeDeletionExpectedMsg)
commonevents.NormalEvent(r.Recorder, mdr, "PermanentNodeDeletionExpected", permanentNodeDeletionExpectedMsg)
return ctrl.Result{RequeueAfter: time.Second}, nil
}

Expand All @@ -214,7 +233,8 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re
}

if !hasControllerOwner(machine) {
log.Info("ignoring remediation of the machine: the machine has no controller owner", "machine", machine.GetName())
log.Info(noControllerOwnerErrorMsg, "machine", machine.GetName(), "remediation name", mdr.Name)
commonevents.WarningEvent(r.Recorder, mdr, string(remediationSkippedNoControllerOwner), noControllerOwnerErrorMsg)
_, err = r.updateConditions(remediationSkippedNoControllerOwner, mdr)
return ctrl.Result{}, err
}
Expand All @@ -225,12 +245,13 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re
return ctrl.Result{}, errors.Wrapf(err, "failed to save Machine's name and namespace")
}

log.Info("request machine deletion", "machine", machine.GetName())
log.Info("request machine deletion", "machine", machine.GetName(), "remediation name", mdr.Name)
err = r.Delete(ctx, machine)
if err != nil {
log.Error(err, "failed to delete machine", "machine", machine.GetName())
return ctrl.Result{}, err
}
commonevents.NormalEvent(r.Recorder, mdr, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage)

// requeue immediately to check machine deletion progression
return ctrl.Result{Requeue: true}, nil
Expand Down
116 changes: 114 additions & 2 deletions controllers/machinedeletionremediation_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationSkippedNodeNotFound},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedNodeNotFound}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedNodeNotFound", "failed to fetch node", true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -162,6 +166,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedNoControllerOwner},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedNoControllerOwner", noControllerOwnerErrorMsg, true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -180,6 +188,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedNoControllerOwner},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedNoControllerOwner", noControllerOwnerErrorMsg, true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -199,6 +211,11 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedNoControllerOwner},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})

verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedNoControllerOwner", noControllerOwnerErrorMsg, true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand Down Expand Up @@ -239,6 +256,9 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, true},
})
})
})

Expand Down Expand Up @@ -274,6 +294,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, true},
{v1.EventTypeNormal, "RemediationFinished", "Remediation finished", true},
})
})
})

Expand All @@ -285,6 +309,12 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
})
It("sets PermanentNodeDeletionExpected condition to false", func() {
verifyConditionMatches(commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionFalse, v1alpha1.MachineDeletionOnBareMetalProviderReason)
verifyEvents([]expectedEvent{
{v1.EventTypeNormal,
"PermanentNodeDeletionExpected",
"Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name",
true},
})
})
})

Expand All @@ -296,6 +326,12 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
})
It("sets PermanentNodeDeletionExpected condition to true", func() {
verifyConditionMatches(commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionTrue, v1alpha1.MachineDeletionOnCloudProviderReason)
verifyEvents([]expectedEvent{
{v1.EventTypeNormal,
"PermanentNodeDeletionExpected",
"Machine will be deleted and the unhealthy node replaced. This is a Cloud cluster provider: the new node is expected to have a new name",
true},
})
})
})

Expand All @@ -308,6 +344,12 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
})
It("sets PermanentNodeDeletionExpected condition to false", func() {
verifyConditionMatches(commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason)
verifyEvents([]expectedEvent{
{v1.EventTypeNormal,
"PermanentNodeDeletionExpected",
"Machine will be deleted and the unhealthy node replaced. Unknown cluster provider: no information about the new node's name",
true},
})
})
})
})
Expand All @@ -327,6 +369,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationSkippedNodeNotFound},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedNodeNotFound}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedNodeNotFound", "failed to fetch node", true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -346,6 +392,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationFailed},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationFailed}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationFailed", unrecoverableError.Error(), true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -365,6 +415,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationFailed},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationFailed}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationFailed", unrecoverableError.Error(), true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -384,6 +438,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationFailed},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationFailed}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationFailed", unrecoverableError.Error(), true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -403,6 +461,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.ProcessingType, metav1.ConditionFalse, remediationSkippedMachineNotFound},
{commonconditions.SucceededType, metav1.ConditionFalse, remediationSkippedMachineNotFound}})
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationSkippedMachineNotFound", "failed to fetch machine of node", true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -423,6 +485,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
// beginning. As a result, the first attempt to get the Machine is via CR's annotation, it fails,
// and the condition cannot be set. Normally, the first attempt is via Node's annotation instead.
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeWarning, "RemediationFailed", unrecoverableError.Error(), true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -445,6 +511,9 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionUnknown, remediationStarted},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})

Expand All @@ -461,6 +530,10 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
// beginning. As a result, the first attempt to get the Machine is via CR's annotation, it fails,
// and the condition cannot be set. Normally, the first attempt is via Node's annotation instead.
verifyConditionUnset(commonconditions.PermanentNodeDeletionExpectedType)
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, "RemediationStopped", "NHC added the timed-out annotation, remediation will be stopped", true},
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, false},
})
})
})
})
Expand Down Expand Up @@ -502,6 +575,9 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, true},
})
})
})

Expand All @@ -525,6 +601,9 @@ var _ = Describe("Machine Deletion Remediation CR", func() {
{commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted},
// Cluster provider is not set in this test
{commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}})
verifyEvents([]expectedEvent{
{v1.EventTypeNormal, machineDeletionRequestedEventReason, machineDeletionRequestedEventMessage, true},
})
})
})
})
Expand Down Expand Up @@ -669,13 +748,13 @@ func verifyMachineNotDeleted(machineName string) {
Consistently(
func() error {
return k8sClient.Get(context.Background(), client.ObjectKey{Namespace: machineNamespace, Name: machineName}, createDummyMachine())
}).ShouldNot(HaveOccurred())
}).ShouldNot(HaveOccurred(), "Machine %s should not have been deleted", machineName)
}

func verifyMachineIsDeleted(machineName string) {
Eventually(func() bool {
return errors.IsNotFound(k8sClient.Get(context.Background(), client.ObjectKey{Namespace: machineNamespace, Name: machineName}, createDummyMachine()))
}).Should(BeTrue())
}).Should(BeTrue(), "Machine %s should have been deleted", machineName)
}

func deleteIgnoreNotFound() func(ctx context.Context, obj client.Object) error {
Expand Down Expand Up @@ -756,3 +835,36 @@ func setMachineProviderID(machine *machinev1beta1.Machine, providerID string) {
machine.Spec.ProviderID = &providerID
Expect(k8sClient.Update(context.TODO(), machine)).To(Succeed())
}

type expectedEvent struct {
eventType, reason, message string
expected bool
}

func verifyEvents(expectedEvents []expectedEvent) {
By("verifying that the Events are emitted (or not) as expected")

// building events message map
toBeTestedEventsMap := make(map[string]bool)
for _, e := range expectedEvents {
formattedMessage := fmt.Sprintf("%s %s [remediation] %s", e.eventType, e.reason, e.message)
toBeTestedEventsMap[formattedMessage] = e.expected
}
actuallyReceivedEventMap := make(map[string]bool)
for {
select {
case got := <-fakeRecorder.Events:
if _, exist := toBeTestedEventsMap[got]; exist {
actuallyReceivedEventMap[got] = true
}
continue
case <-time.After(1 * time.Second):
}
break

}
for formattedEventMessage, expected := range toBeTestedEventsMap {
_, exists := actuallyReceivedEventMap[formattedEventMessage]
Expect(exists).To(Equal(expected), "event test failed.\nEvent '%s': expected %v, received %v", formattedEventMessage, expected, exists)
}
}
Loading

0 comments on commit 0ba5428

Please sign in to comment.