Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] MHC created CR isn't deleted by the remediator #347

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions controllers/machinehealthcheck_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,19 +403,21 @@ func (r *MachineHealthCheckReconciler) remediate(target resources.Target, rm res
if err != nil {
return errors.Wrapf(err, "failed to get remediation template")
}
remediationCR, err := rm.GenerateRemediationCRForMachine(target.Machine, target.MHC, template)
if err != nil {
return errors.Wrapf(err, "failed to generate remediation CR")

var nodeNamePtr *string
mshitrit marked this conversation as resolved.
Show resolved Hide resolved
if target.Node != nil && target.Node.ResourceVersion != "" {
nodeNamePtr = &target.Node.Name
}

// TODO add control plane label

// create remediation CR
var nodeName *string
if target.Node != nil && target.Node.ResourceVersion != "" {
nodeName = &target.Node.Name
remediationCR, err := rm.GenerateRemediationCRForMachine(target.Machine, target.MHC, template, pointer.StringDeref(nodeNamePtr, ""))
if err != nil {
return errors.Wrapf(err, "failed to generate remediation CR")
}
created, _, _, err := rm.CreateRemediationCR(remediationCR, target.MHC, nodeName, utils.DefaultRemediationDuration, 0)

created, _, _, err := rm.CreateRemediationCR(remediationCR, target.MHC, nodeNamePtr, utils.DefaultRemediationDuration, 0)
if err != nil {
if _, ok := err.(resources.RemediationCRNotOwned); ok {
// CR exists but not owned by us, nothing to do
Expand Down
50 changes: 42 additions & 8 deletions controllers/machinehealthcheck_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"time"

commonannotations "github.com/medik8s/common/pkg/annotations"
. "github.com/onsi/gomega"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -122,16 +123,16 @@ func TestReconcile(t *testing.T) {
machineWithoutNodeRef := newMachine("machineWithoutNodeRef", nodeAnnotatedWithMachineWithoutNodeReference.Name)
machineWithoutNodeRef.Status.NodeRef = nil

machineHealthCheck := newMachineHealthCheck("machineHealthCheck")
machineHealthCheck := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef)
nodeStartupTimeout := 15 * time.Minute
machineHealthCheck.Spec.NodeStartupTimeout = &metav1.Duration{Duration: nodeStartupTimeout}

machineHealthCheckNegativeMaxUnhealthy := newMachineHealthCheck("machineHealthCheckNegativeMaxUnhealthy")
machineHealthCheckNegativeMaxUnhealthy := newMachineHealthCheck("machineHealthCheckNegativeMaxUnhealthy", infraRemediationTemplateRef)
negativeOne := intstr.FromInt(-1)
machineHealthCheckNegativeMaxUnhealthy.Spec.MaxUnhealthy = &negativeOne
machineHealthCheckNegativeMaxUnhealthy.Spec.NodeStartupTimeout = &metav1.Duration{Duration: nodeStartupTimeout}

machineHealthCheckPaused := newMachineHealthCheck("machineHealthCheck")
machineHealthCheckPaused := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef)
machineHealthCheckPaused.Annotations = make(map[string]string)
machineHealthCheckPaused.Annotations[annotations.MHCPausedAnnotation] = "test"

Expand Down Expand Up @@ -387,15 +388,18 @@ func TestReconcileExternalRemediationTemplate(t *testing.T) {
machineWithNodeUnHealthy := newMachine("Machine", nodeUnHealthy.Name)
machineWithNodeUnHealthy.APIVersion = machinev1.SchemeGroupVersion.String()

mhc := newMachineHealthCheck("machineHealthCheck")
mhc := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef)
mhcMultipleSupport := newMachineHealthCheck("machineHealthCheck", infraMultipleRemediationTemplateRef)

remediationTemplateCR := newTestRemediationTemplateCR(InfraRemediationKind, MachineNamespace, InfraRemediationTemplateName)
remediationMultipleSupportTemplateCR := newTestRemediationTemplateCR(InfraRemediationKind, MachineNamespace, InfraMultipleSupportRemediationTemplateName)
owner := metav1.OwnerReference{
APIVersion: mhc.APIVersion,
Kind: mhc.Kind,
Name: mhc.Name,
UID: mhc.UID,
}
remediationCR := newRemediationCR(machineWithNodeUnHealthy.Name, *mhc.Spec.RemediationTemplate, owner)
remediationCR := newRemediationCR(machineWithNodeUnHealthy.Name, nodeUnHealthy.Name, *mhc.Spec.RemediationTemplate, owner)

testCases := []testCase{

Expand Down Expand Up @@ -464,6 +468,28 @@ func TestReconcileExternalRemediationTemplate(t *testing.T) {
},
},
},

{
name: "create new multiple template supported remediation",
machine: machineWithNodeUnHealthy,
node: nodeUnHealthy,
mshitrit marked this conversation as resolved.
Show resolved Hide resolved
mhc: mhcMultipleSupport,
remediationCR: nil,
remediationTemplate: remediationMultipleSupportTemplateCR,
expected: expectedReconcile{
result: reconcile.Result{},
error: false,
},
expectedEvents: []string{utils.EventReasonRemediationCreated},
expectedStatus: &machinev1.MachineHealthCheckStatus{
ExpectedMachines: pointer.Int(1),
CurrentHealthy: pointer.Int(0),
RemediationsAllowed: 0,
Conditions: machinev1.Conditions{
remediationAllowedCondition,
},
},
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -849,7 +875,7 @@ func TestMHCRequestsFromNode(t *testing.T) {
func TestGetTargetsFromMHC(t *testing.T) {
machine1 := newMachine("match1", "node1")
machine2 := newMachine("match2", "node2")
mhc := newMachineHealthCheck("findTargets")
mhc := newMachineHealthCheck("findTargets", infraRemediationTemplateRef)
testCases := []struct {
testCase string
mhc *machinev1.MachineHealthCheck
Expand Down Expand Up @@ -2485,6 +2511,10 @@ func buildRunTimeObjects(tc testCase) []runtime.Object {
objects = append(objects, newTestRemediationCRD(testRemediationKind))
objects = append(objects, newTestRemediationTemplateCR(testRemediationKind, MachineNamespace, InfraRemediationTemplateName))

templateMultiSupportCR := newTestRemediationTemplateCR(testRemediationKind, MachineNamespace, InfraMultipleSupportRemediationTemplateName)
templateMultiSupportCR.SetAnnotations(map[string]string{commonannotations.MultipleTemplatesSupportedAnnotation: "true"})
objects = append(objects, templateMultiSupportCR)

return objects
}

Expand All @@ -2501,6 +2531,10 @@ func verifyRemediationCR(t *testing.T, tc testCase, ctx context.Context, client
}
if isExist {
g.Expect(client.Get(ctx, nameSpace, remediationCR)).To(Succeed())
crAnnotations := remediationCR.GetAnnotations()
g.Expect(crAnnotations).ToNot(BeNil())
g.Expect(crAnnotations[commonannotations.NodeNameAnnotation]).Should(Equal(tc.node.Name))
g.Expect(crAnnotations[annotations.TemplateNameAnnotation]).Should(Equal(tc.remediationTemplate.GetName()))
} else {
g.Expect(client.Get(ctx, nameSpace, remediationCR)).NotTo(Succeed())
}
Expand Down Expand Up @@ -2588,7 +2622,7 @@ func newMachine(name string, nodeName string) *machinev1.Machine {
}

// newMachineHealthCheck returns new MachineHealthCheck object that can be used for testing
func newMachineHealthCheck(name string) *machinev1.MachineHealthCheck {
func newMachineHealthCheck(name string, remediationTemplate *corev1.ObjectReference) *machinev1.MachineHealthCheck {
return &machinev1.MachineHealthCheck{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Expand All @@ -2615,7 +2649,7 @@ func newMachineHealthCheck(name string) *machinev1.MachineHealthCheck {
Timeout: metav1.Duration{Duration: 300 * time.Second},
},
},
RemediationTemplate: infraRemediationTemplateRef,
RemediationTemplate: remediationTemplate,
},
Status: machinev1.MachineHealthCheckStatus{},
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/nodehealthcheck_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2283,7 +2283,7 @@ func newRemediationCRForNHC(nodeName string, nhc *v1alpha1.NodeHealthCheck) *uns
Name: nhc.Name,
UID: nhc.UID,
}
return newRemediationCR(nodeName, templateRef, owner)
return newRemediationCR(nodeName, nodeName, templateRef, owner)
}

func newNodeHealthCheck() *v1alpha1.NodeHealthCheck {
Expand Down
16 changes: 9 additions & 7 deletions controllers/resources/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type Manager interface {
GenerateRemediationCRBase(gvk schema.GroupVersionKind) *unstructured.Unstructured
GenerateRemediationCRBaseNamed(gvk schema.GroupVersionKind, namespace string, name string) *unstructured.Unstructured
GenerateRemediationCRForNode(node *corev1.Node, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error)
GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error)
GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured, nodeName string) (*unstructured.Unstructured, error)
CreateRemediationCR(remediationCR *unstructured.Unstructured, owner client.Object, nodeName *string, currentRemediationDuration, previousRemediationsDuration time.Duration) (bool, *time.Duration, *unstructured.Unstructured, error)
DeleteRemediationCR(remediationCR *unstructured.Unstructured, owner client.Object) (bool, error)
UpdateRemediationCR(remediationCR *unstructured.Unstructured) error
Expand Down Expand Up @@ -98,10 +98,10 @@ func (m *manager) GenerateRemediationCRForNode(node *corev1.Node, owner client.O
}
}

return m.generateRemediationCR(node.GetName(), nhcOwnerRef, machineOwnerRef, template)
return m.generateRemediationCR(node.GetName(), node.GetName(), nhcOwnerRef, machineOwnerRef, template)
}

func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error) {
func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured, nodeName string) (*unstructured.Unstructured, error) {

mhcOwnerRef := createOwnerRef(owner)

Expand All @@ -116,23 +116,25 @@ func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machin
// So it can be ignored here.
}

return m.generateRemediationCR(machine.GetName(), mhcOwnerRef, machineOwnerRef, template)
return m.generateRemediationCR(machine.GetName(), nodeName, mhcOwnerRef, machineOwnerRef, template)
}

func (m *manager) generateRemediationCR(name string, healthCheckOwnerRef *metav1.OwnerReference, machineOwnerRef *metav1.OwnerReference, template *unstructured.Unstructured) (*unstructured.Unstructured, error) {
func (m *manager) generateRemediationCR(name string, nodeName string, healthCheckOwnerRef *metav1.OwnerReference, machineOwnerRef *metav1.OwnerReference, template *unstructured.Unstructured) (*unstructured.Unstructured, error) {

remediationCR := m.GenerateRemediationCRBase(template.GroupVersionKind())

// can't go wrong, we already checked for correct spec
templateSpec, _, _ := unstructured.NestedMap(template.Object, "spec", "template", "spec")
unstructured.SetNestedField(remediationCR.Object, templateSpec, "spec")

if annotations.HasMultipleTemplatesAnnotation(template) {
// Multiple same kind templates are never supported for MHC, and remediators are not expected to handle generated names in this case, even if they do for NHC.
isMHCRemediation := name != nodeName
mshitrit marked this conversation as resolved.
Show resolved Hide resolved
if annotations.HasMultipleTemplatesAnnotation(template) && !isMHCRemediation {
remediationCR.SetGenerateName(fmt.Sprintf("%s-", name))
} else {
remediationCR.SetName(name)
}
remediationCR.SetAnnotations(map[string]string{commonannotations.NodeNameAnnotation: name, annotations.TemplateNameAnnotation: template.GetName()})
remediationCR.SetAnnotations(map[string]string{commonannotations.NodeNameAnnotation: nodeName, annotations.TemplateNameAnnotation: template.GetName()})

remediationCR.SetNamespace(template.GetNamespace())
remediationCR.SetResourceVersion("")
Expand Down
6 changes: 3 additions & 3 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ func newTestRemediationTemplateCR(kind, namespace, name string) *unstructured.Un
return template
}

func newRemediationCR(nodeName string, templateRef v1.ObjectReference, owner metav1.OwnerReference) *unstructured.Unstructured {
func newRemediationCR(name string, nodeName string, templateRef v1.ObjectReference, owner metav1.OwnerReference) *unstructured.Unstructured {

// check template if it supports multiple same kind remediation
// not needed (and fails for missing k8sclient) for MHC tests
Expand All @@ -423,9 +423,9 @@ func newRemediationCR(nodeName string, templateRef v1.ObjectReference, owner met
cr := unstructured.Unstructured{}
cr.SetNamespace(templateRef.Namespace)
if mutipleSameKindSupported {
cr.SetGenerateName(fmt.Sprintf("%s-", nodeName))
cr.SetGenerateName(fmt.Sprintf("%s-", name))
} else {
cr.SetName(nodeName)
cr.SetName(name)
}

kind := templateRef.GroupVersionKind().Kind
Expand Down
4 changes: 2 additions & 2 deletions e2e/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
)

func ensureRemediationResourceExists(name string, namespace string, remediationResource schema.GroupVersionResource) func() error {
func ensureRemediationResourceExists(nodeName string, namespace string, remediationResource schema.GroupVersionResource) func() error {
return func() error {
// The CR name doesn't always match the node name in case multiple CRs of same type for the same node are supported
// So list all, and look for the node name in the annotation
Expand All @@ -25,7 +25,7 @@ func ensureRemediationResourceExists(name string, namespace string, remediationR
return err
}
for _, cr := range list.Items {
if nodeName, exists := cr.GetAnnotations()[commonannotations.NodeNameAnnotation]; exists && nodeName == name {
if annotationNodeName, exists := cr.GetAnnotations()[commonannotations.NodeNameAnnotation]; exists && annotationNodeName == nodeName {
log.Info("found remediation resource")
return nil
}
Expand Down
5 changes: 2 additions & 3 deletions e2e/mhc_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ const (

var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() {
var nodeUnderTest *v1.Node
var machineNameUnderTest string
var mhc *v1beta1.MachineHealthCheck
var workers *v1.NodeList
var leaseName string
Expand Down Expand Up @@ -84,7 +83,7 @@ var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() {
nodeUnderTest = &workers.Items[0]

var err error
_, machineNameUnderTest, err = controllerutils.GetMachineNamespaceName(nodeUnderTest)
_, _, err = controllerutils.GetMachineNamespaceName(nodeUnderTest)
Expect(err).ToNot(HaveOccurred(), "failed to get machine name from node")

leaseName = fmt.Sprintf("%s-%s", "node", nodeUnderTest.Name)
Expand Down Expand Up @@ -126,7 +125,7 @@ var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() {
By("ensuring remediation CR exists")
waitTime := nodeUnhealthyTime.Add(unhealthyConditionDuration + 3*time.Second).Sub(time.Now())
Eventually(
ensureRemediationResourceExists(machineNameUnderTest, mhcNamespace, remediationGVR), waitTime, "500ms").
ensureRemediationResourceExists(nodeUnderTest.Name, mhcNamespace, remediationGVR), waitTime, "500ms").
mshitrit marked this conversation as resolved.
Show resolved Hide resolved
Should(Succeed())

By("ensuring lease exist")
Expand Down
Loading