diff --git a/controllers/machinehealthcheck_controller.go b/controllers/machinehealthcheck_controller.go index 1dd84b63..aa425e4e 100644 --- a/controllers/machinehealthcheck_controller.go +++ b/controllers/machinehealthcheck_controller.go @@ -403,19 +403,21 @@ func (r *MachineHealthCheckReconciler) remediate(target resources.Target, rm res if err != nil { return errors.Wrapf(err, "failed to get remediation template") } - remediationCR, err := rm.GenerateRemediationCRForMachine(target.Machine, target.MHC, template) - if err != nil { - return errors.Wrapf(err, "failed to generate remediation CR") + + var nodeNamePtr *string + if target.Node != nil && target.Node.ResourceVersion != "" { + nodeNamePtr = &target.Node.Name } // TODO add control plane label // create remediation CR - var nodeName *string - if target.Node != nil && target.Node.ResourceVersion != "" { - nodeName = &target.Node.Name + remediationCR, err := rm.GenerateRemediationCRForMachine(target.Machine, target.MHC, template, pointer.StringDeref(nodeNamePtr, "")) + if err != nil { + return errors.Wrapf(err, "failed to generate remediation CR") } - created, _, _, err := rm.CreateRemediationCR(remediationCR, target.MHC, nodeName, utils.DefaultRemediationDuration, 0) + + created, _, _, err := rm.CreateRemediationCR(remediationCR, target.MHC, nodeNamePtr, utils.DefaultRemediationDuration, 0) if err != nil { if _, ok := err.(resources.RemediationCRNotOwned); ok { // CR exists but not owned by us, nothing to do diff --git a/controllers/machinehealthcheck_controller_test.go b/controllers/machinehealthcheck_controller_test.go index 0a99d79e..9ec9ad73 100644 --- a/controllers/machinehealthcheck_controller_test.go +++ b/controllers/machinehealthcheck_controller_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + commonannotations "github.com/medik8s/common/pkg/annotations" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -122,16 +123,16 @@ func TestReconcile(t *testing.T) { machineWithoutNodeRef := newMachine("machineWithoutNodeRef", nodeAnnotatedWithMachineWithoutNodeReference.Name) machineWithoutNodeRef.Status.NodeRef = nil - machineHealthCheck := newMachineHealthCheck("machineHealthCheck") + machineHealthCheck := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef) nodeStartupTimeout := 15 * time.Minute machineHealthCheck.Spec.NodeStartupTimeout = &metav1.Duration{Duration: nodeStartupTimeout} - machineHealthCheckNegativeMaxUnhealthy := newMachineHealthCheck("machineHealthCheckNegativeMaxUnhealthy") + machineHealthCheckNegativeMaxUnhealthy := newMachineHealthCheck("machineHealthCheckNegativeMaxUnhealthy", infraRemediationTemplateRef) negativeOne := intstr.FromInt(-1) machineHealthCheckNegativeMaxUnhealthy.Spec.MaxUnhealthy = &negativeOne machineHealthCheckNegativeMaxUnhealthy.Spec.NodeStartupTimeout = &metav1.Duration{Duration: nodeStartupTimeout} - machineHealthCheckPaused := newMachineHealthCheck("machineHealthCheck") + machineHealthCheckPaused := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef) machineHealthCheckPaused.Annotations = make(map[string]string) machineHealthCheckPaused.Annotations[annotations.MHCPausedAnnotation] = "test" @@ -387,15 +388,18 @@ func TestReconcileExternalRemediationTemplate(t *testing.T) { machineWithNodeUnHealthy := newMachine("Machine", nodeUnHealthy.Name) machineWithNodeUnHealthy.APIVersion = machinev1.SchemeGroupVersion.String() - mhc := newMachineHealthCheck("machineHealthCheck") + mhc := newMachineHealthCheck("machineHealthCheck", infraRemediationTemplateRef) + mhcMultipleSupport := newMachineHealthCheck("machineHealthCheck", infraMultipleRemediationTemplateRef) + remediationTemplateCR := newTestRemediationTemplateCR(InfraRemediationKind, MachineNamespace, InfraRemediationTemplateName) + remediationMultipleSupportTemplateCR := newTestRemediationTemplateCR(InfraRemediationKind, MachineNamespace, InfraMultipleSupportRemediationTemplateName) owner := metav1.OwnerReference{ APIVersion: mhc.APIVersion, Kind: mhc.Kind, Name: mhc.Name, UID: mhc.UID, } - remediationCR := newRemediationCR(machineWithNodeUnHealthy.Name, *mhc.Spec.RemediationTemplate, owner) + remediationCR := newRemediationCR(machineWithNodeUnHealthy.Name, nodeUnHealthy.Name, *mhc.Spec.RemediationTemplate, owner) testCases := []testCase{ @@ -464,6 +468,28 @@ func TestReconcileExternalRemediationTemplate(t *testing.T) { }, }, }, + + { + name: "create new multiple template supported remediation", + machine: machineWithNodeUnHealthy, + node: nodeUnHealthy, + mhc: mhcMultipleSupport, + remediationCR: nil, + remediationTemplate: remediationMultipleSupportTemplateCR, + expected: expectedReconcile{ + result: reconcile.Result{}, + error: false, + }, + expectedEvents: []string{utils.EventReasonRemediationCreated}, + expectedStatus: &machinev1.MachineHealthCheckStatus{ + ExpectedMachines: pointer.Int(1), + CurrentHealthy: pointer.Int(0), + RemediationsAllowed: 0, + Conditions: machinev1.Conditions{ + remediationAllowedCondition, + }, + }, + }, } for _, tc := range testCases { @@ -849,7 +875,7 @@ func TestMHCRequestsFromNode(t *testing.T) { func TestGetTargetsFromMHC(t *testing.T) { machine1 := newMachine("match1", "node1") machine2 := newMachine("match2", "node2") - mhc := newMachineHealthCheck("findTargets") + mhc := newMachineHealthCheck("findTargets", infraRemediationTemplateRef) testCases := []struct { testCase string mhc *machinev1.MachineHealthCheck @@ -2485,6 +2511,10 @@ func buildRunTimeObjects(tc testCase) []runtime.Object { objects = append(objects, newTestRemediationCRD(testRemediationKind)) objects = append(objects, newTestRemediationTemplateCR(testRemediationKind, MachineNamespace, InfraRemediationTemplateName)) + templateMultiSupportCR := newTestRemediationTemplateCR(testRemediationKind, MachineNamespace, InfraMultipleSupportRemediationTemplateName) + templateMultiSupportCR.SetAnnotations(map[string]string{commonannotations.MultipleTemplatesSupportedAnnotation: "true"}) + objects = append(objects, templateMultiSupportCR) + return objects } @@ -2501,6 +2531,10 @@ func verifyRemediationCR(t *testing.T, tc testCase, ctx context.Context, client } if isExist { g.Expect(client.Get(ctx, nameSpace, remediationCR)).To(Succeed()) + crAnnotations := remediationCR.GetAnnotations() + g.Expect(crAnnotations).ToNot(BeNil()) + g.Expect(crAnnotations[commonannotations.NodeNameAnnotation]).Should(Equal(tc.node.Name)) + g.Expect(crAnnotations[annotations.TemplateNameAnnotation]).Should(Equal(tc.remediationTemplate.GetName())) } else { g.Expect(client.Get(ctx, nameSpace, remediationCR)).NotTo(Succeed()) } @@ -2588,7 +2622,7 @@ func newMachine(name string, nodeName string) *machinev1.Machine { } // newMachineHealthCheck returns new MachineHealthCheck object that can be used for testing -func newMachineHealthCheck(name string) *machinev1.MachineHealthCheck { +func newMachineHealthCheck(name string, remediationTemplate *corev1.ObjectReference) *machinev1.MachineHealthCheck { return &machinev1.MachineHealthCheck{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -2615,7 +2649,7 @@ func newMachineHealthCheck(name string) *machinev1.MachineHealthCheck { Timeout: metav1.Duration{Duration: 300 * time.Second}, }, }, - RemediationTemplate: infraRemediationTemplateRef, + RemediationTemplate: remediationTemplate, }, Status: machinev1.MachineHealthCheckStatus{}, } diff --git a/controllers/nodehealthcheck_controller_test.go b/controllers/nodehealthcheck_controller_test.go index 1d6993f3..308ae0a8 100644 --- a/controllers/nodehealthcheck_controller_test.go +++ b/controllers/nodehealthcheck_controller_test.go @@ -2283,7 +2283,7 @@ func newRemediationCRForNHC(nodeName string, nhc *v1alpha1.NodeHealthCheck) *uns Name: nhc.Name, UID: nhc.UID, } - return newRemediationCR(nodeName, templateRef, owner) + return newRemediationCR(nodeName, nodeName, templateRef, owner) } func newNodeHealthCheck() *v1alpha1.NodeHealthCheck { diff --git a/controllers/resources/manager.go b/controllers/resources/manager.go index 6d2b6ea8..1ef20025 100644 --- a/controllers/resources/manager.go +++ b/controllers/resources/manager.go @@ -35,7 +35,7 @@ type Manager interface { GenerateRemediationCRBase(gvk schema.GroupVersionKind) *unstructured.Unstructured GenerateRemediationCRBaseNamed(gvk schema.GroupVersionKind, namespace string, name string) *unstructured.Unstructured GenerateRemediationCRForNode(node *corev1.Node, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error) - GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error) + GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured, nodeName string) (*unstructured.Unstructured, error) CreateRemediationCR(remediationCR *unstructured.Unstructured, owner client.Object, nodeName *string, currentRemediationDuration, previousRemediationsDuration time.Duration) (bool, *time.Duration, *unstructured.Unstructured, error) DeleteRemediationCR(remediationCR *unstructured.Unstructured, owner client.Object) (bool, error) UpdateRemediationCR(remediationCR *unstructured.Unstructured) error @@ -98,10 +98,10 @@ func (m *manager) GenerateRemediationCRForNode(node *corev1.Node, owner client.O } } - return m.generateRemediationCR(node.GetName(), nhcOwnerRef, machineOwnerRef, template) + return m.generateRemediationCR(node.GetName(), node.GetName(), nhcOwnerRef, machineOwnerRef, template) } -func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured) (*unstructured.Unstructured, error) { +func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machine, owner client.Object, template *unstructured.Unstructured, nodeName string) (*unstructured.Unstructured, error) { mhcOwnerRef := createOwnerRef(owner) @@ -116,10 +116,10 @@ func (m *manager) GenerateRemediationCRForMachine(machine *machinev1beta1.Machin // So it can be ignored here. } - return m.generateRemediationCR(machine.GetName(), mhcOwnerRef, machineOwnerRef, template) + return m.generateRemediationCR(machine.GetName(), nodeName, mhcOwnerRef, machineOwnerRef, template) } -func (m *manager) generateRemediationCR(name string, healthCheckOwnerRef *metav1.OwnerReference, machineOwnerRef *metav1.OwnerReference, template *unstructured.Unstructured) (*unstructured.Unstructured, error) { +func (m *manager) generateRemediationCR(name string, nodeName string, healthCheckOwnerRef *metav1.OwnerReference, machineOwnerRef *metav1.OwnerReference, template *unstructured.Unstructured) (*unstructured.Unstructured, error) { remediationCR := m.GenerateRemediationCRBase(template.GroupVersionKind()) @@ -127,12 +127,14 @@ func (m *manager) generateRemediationCR(name string, healthCheckOwnerRef *metav1 templateSpec, _, _ := unstructured.NestedMap(template.Object, "spec", "template", "spec") unstructured.SetNestedField(remediationCR.Object, templateSpec, "spec") - if annotations.HasMultipleTemplatesAnnotation(template) { + // Multiple same kind templates are never supported for MHC, and remediators are not expected to handle generated names in this case, even if they do for NHC. + isMHCRemediation := name != nodeName + if annotations.HasMultipleTemplatesAnnotation(template) && !isMHCRemediation { remediationCR.SetGenerateName(fmt.Sprintf("%s-", name)) } else { remediationCR.SetName(name) } - remediationCR.SetAnnotations(map[string]string{commonannotations.NodeNameAnnotation: name, annotations.TemplateNameAnnotation: template.GetName()}) + remediationCR.SetAnnotations(map[string]string{commonannotations.NodeNameAnnotation: nodeName, annotations.TemplateNameAnnotation: template.GetName()}) remediationCR.SetNamespace(template.GetNamespace()) remediationCR.SetResourceVersion("") diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 792ee69e..bf46c582 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -402,7 +402,7 @@ func newTestRemediationTemplateCR(kind, namespace, name string) *unstructured.Un return template } -func newRemediationCR(nodeName string, templateRef v1.ObjectReference, owner metav1.OwnerReference) *unstructured.Unstructured { +func newRemediationCR(name string, nodeName string, templateRef v1.ObjectReference, owner metav1.OwnerReference) *unstructured.Unstructured { // check template if it supports multiple same kind remediation // not needed (and fails for missing k8sclient) for MHC tests @@ -423,9 +423,9 @@ func newRemediationCR(nodeName string, templateRef v1.ObjectReference, owner met cr := unstructured.Unstructured{} cr.SetNamespace(templateRef.Namespace) if mutipleSameKindSupported { - cr.SetGenerateName(fmt.Sprintf("%s-", nodeName)) + cr.SetGenerateName(fmt.Sprintf("%s-", name)) } else { - cr.SetName(nodeName) + cr.SetName(name) } kind := templateRef.GroupVersionKind().Kind diff --git a/e2e/common_test.go b/e2e/common_test.go index d84f2026..3c38eac9 100644 --- a/e2e/common_test.go +++ b/e2e/common_test.go @@ -11,7 +11,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" ) -func ensureRemediationResourceExists(name string, namespace string, remediationResource schema.GroupVersionResource) func() error { +func ensureRemediationResourceExists(nodeName string, namespace string, remediationResource schema.GroupVersionResource) func() error { return func() error { // The CR name doesn't always match the node name in case multiple CRs of same type for the same node are supported // So list all, and look for the node name in the annotation @@ -25,7 +25,7 @@ func ensureRemediationResourceExists(name string, namespace string, remediationR return err } for _, cr := range list.Items { - if nodeName, exists := cr.GetAnnotations()[commonannotations.NodeNameAnnotation]; exists && nodeName == name { + if annotationNodeName, exists := cr.GetAnnotations()[commonannotations.NodeNameAnnotation]; exists && annotationNodeName == nodeName { log.Info("found remediation resource") return nil } diff --git a/e2e/mhc_e2e_test.go b/e2e/mhc_e2e_test.go index 85459345..7ecc4ef0 100644 --- a/e2e/mhc_e2e_test.go +++ b/e2e/mhc_e2e_test.go @@ -31,7 +31,6 @@ const ( var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() { var nodeUnderTest *v1.Node - var machineNameUnderTest string var mhc *v1beta1.MachineHealthCheck var workers *v1.NodeList var leaseName string @@ -84,7 +83,7 @@ var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() { nodeUnderTest = &workers.Items[0] var err error - _, machineNameUnderTest, err = controllerutils.GetMachineNamespaceName(nodeUnderTest) + _, _, err = controllerutils.GetMachineNamespaceName(nodeUnderTest) Expect(err).ToNot(HaveOccurred(), "failed to get machine name from node") leaseName = fmt.Sprintf("%s-%s", "node", nodeUnderTest.Name) @@ -126,7 +125,7 @@ var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() { By("ensuring remediation CR exists") waitTime := nodeUnhealthyTime.Add(unhealthyConditionDuration + 3*time.Second).Sub(time.Now()) Eventually( - ensureRemediationResourceExists(machineNameUnderTest, mhcNamespace, remediationGVR), waitTime, "500ms"). + ensureRemediationResourceExists(nodeUnderTest.Name, mhcNamespace, remediationGVR), waitTime, "500ms"). Should(Succeed()) By("ensuring lease exist") @@ -155,7 +154,7 @@ var _ = Describe("e2e - MHC", Label("MHC", labelOcpOnlyValue), func() { Eventually(func(g Gomega) { err := k8sClient.Get(context.Background(), ctrl.ObjectKey{Name: leaseName, Namespace: leaseNs}, lease) g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) - }, "1m", "5s").Should(Succeed(), "lease not deleted") + }, "5m", "5s").Should(Succeed(), "lease not deleted") }) })