From cd043a0a4e2ace14ee4175596ec642ecaf3cf3e3 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 13:05:38 +0200 Subject: [PATCH 01/19] util: add helpers to deal with the deferred annotation Add helpers to be used in future commits. Enable early introduction of e2e tests Signed-off-by: Francesco Romani --- pkg/apis/tuned/v1/tuned_types.go | 4 + pkg/util/annotations.go | 38 ++++++ pkg/util/annotations_test.go | 204 +++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 pkg/util/annotations.go create mode 100644 pkg/util/annotations_test.go diff --git a/pkg/apis/tuned/v1/tuned_types.go b/pkg/apis/tuned/v1/tuned_types.go index b0dde38c69..0e8a74d9bf 100644 --- a/pkg/apis/tuned/v1/tuned_types.go +++ b/pkg/apis/tuned/v1/tuned_types.go @@ -25,6 +25,10 @@ const ( // TunedBootcmdlineAnnotationKey is a Node-specific annotation denoting kernel command-line parameters // calculated by TuneD for the current profile applied to that Node. TunedBootcmdlineAnnotationKey string = "tuned.openshift.io/bootcmdline" + + // TunedDeferredUpdate request the tuned daemons to defer the update of the rendered profile + // until the next restart. + TunedDeferredUpdate string = "tuned.openshift.io/deferred" ) ///////////////////////////////////////////////////////////////////////////////// diff --git a/pkg/util/annotations.go b/pkg/util/annotations.go new file mode 100644 index 0000000000..1577fe4592 --- /dev/null +++ b/pkg/util/annotations.go @@ -0,0 +1,38 @@ +package util + +import ( + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" +) + +func HasDeferredUpdateAnnotation(anns map[string]string) bool { + if anns == nil { + return false + } + _, ok := anns[tunedv1.TunedDeferredUpdate] + return ok +} + +func SetDeferredUpdateAnnotation(anns map[string]string, tuned *tunedv1.Tuned) map[string]string { + if anns == nil { + anns = make(map[string]string) + } + return ToggleDeferredUpdateAnnotation(anns, HasDeferredUpdateAnnotation(tuned.Annotations)) +} + +func ToggleDeferredUpdateAnnotation(anns map[string]string, toggle bool) map[string]string { + ret := cloneMapStringString(anns) + if toggle { + ret[tunedv1.TunedDeferredUpdate] = "" + } else { + delete(ret, tunedv1.TunedDeferredUpdate) + } + return ret +} + +func cloneMapStringString(obj map[string]string) map[string]string { + ret := make(map[string]string, len(obj)) + for key, val := range obj { + ret[key] = val + } + return ret +} diff --git a/pkg/util/annotations_test.go b/pkg/util/annotations_test.go new file mode 100644 index 0000000000..a20d17bbeb --- /dev/null +++ b/pkg/util/annotations_test.go @@ -0,0 +1,204 @@ +package util + +import ( + "reflect" + "testing" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestHasDeferredUpdateAnnotation(t *testing.T) { + testCases := []struct { + name string + anns map[string]string + expected bool + }{ + { + name: "nil", + expected: false, + }, + { + name: "empty", + anns: map[string]string{}, + expected: false, + }, + { + name: "no-ann", + anns: map[string]string{ + "foo": "bar", + "baz": "2", + }, + expected: false, + }, + { + name: "wrong-case", + anns: map[string]string{ + "tuned.openshift.io/Deferred": "", + }, + expected: false, + }, + { + name: "found", + anns: map[string]string{ + "tuned.openshift.io/deferred": "", + }, + expected: true, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := HasDeferredUpdateAnnotation(tt.anns) + if got != tt.expected { + t.Errorf("got=%v expected=%v", got, tt.expected) + } + }) + } +} + +func TestSetDeferredUpdateAnnotation(t *testing.T) { + testCases := []struct { + name string + anns map[string]string + tuned *tunedv1.Tuned + expected map[string]string + }{ + { + name: "nil", + tuned: &tunedv1.Tuned{}, + expected: map[string]string{}, + }, + { + name: "nil-add", + tuned: &tunedv1.Tuned{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "tuned.openshift.io/deferred": "", + }, + }, + }, + expected: map[string]string{ + "tuned.openshift.io/deferred": "", + }, + }, + { + name: "existing-add", + anns: map[string]string{ + "foobar": "42", + }, + tuned: &tunedv1.Tuned{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "tuned.openshift.io/deferred": "", + }, + }, + }, + expected: map[string]string{ + "foobar": "42", + "tuned.openshift.io/deferred": "", + }, + }, + { + name: "nil-remove", + tuned: &tunedv1.Tuned{}, + expected: map[string]string{}, + }, + { + name: "existing-remove", + anns: map[string]string{ + "foobar": "42", + "tuned.openshift.io/deferred": "", + }, + tuned: &tunedv1.Tuned{}, + expected: map[string]string{ + "foobar": "42", + }, + }, + { + name: "missing-remove", + anns: map[string]string{ + "foobar": "42", + }, + tuned: &tunedv1.Tuned{}, + expected: map[string]string{ + "foobar": "42", + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := SetDeferredUpdateAnnotation(tt.anns, tt.tuned) + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("got=%v expected=%v", got, tt.expected) + } + }) + } +} + +func TestToggleDeferredUpdateAnnotation(t *testing.T) { + testCases := []struct { + name string + anns map[string]string + toggle bool + expected map[string]string + }{ + { + name: "nil", + expected: map[string]string{}, + }, + { + name: "nil-add", + toggle: true, + expected: map[string]string{ + "tuned.openshift.io/deferred": "", + }, + }, + { + name: "existing-add", + anns: map[string]string{ + "foobar": "42", + }, + toggle: true, + expected: map[string]string{ + "foobar": "42", + "tuned.openshift.io/deferred": "", + }, + }, + { + name: "nil-remove", + expected: map[string]string{}, + }, + { + name: "existing-remove", + anns: map[string]string{ + "foobar": "42", + "tuned.openshift.io/deferred": "", + }, + expected: map[string]string{ + "foobar": "42", + }, + }, + { + name: "missing-remove", + anns: map[string]string{ + "foobar": "42", + }, + expected: map[string]string{ + "foobar": "42", + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + anns := cloneMapStringString(tt.anns) + got := ToggleDeferredUpdateAnnotation(tt.anns, tt.toggle) + // must not mutate the argument + if tt.anns != nil && !reflect.DeepEqual(anns, tt.anns) { + t.Errorf("toggle must return a new copy") + } + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("got=%v expected=%v", got, tt.expected) + } + }) + } +} From 6808023e45f8dde814bf5b1d663c99799d5fd652 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 13:07:08 +0200 Subject: [PATCH 02/19] status: report deferred status add support to report the deferred status in conditions. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 1 + pkg/tuned/status.go | 26 ++++-- pkg/tuned/status_test.go | 169 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 6 deletions(-) create mode 100644 pkg/tuned/status_test.go diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index e3f1ec7436..d77abd2ee5 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -43,6 +43,7 @@ const ( scError scSysctlOverride scReloading // reloading is true during the TuneD daemon reload. + scDeferred scUnknown ) diff --git a/pkg/tuned/status.go b/pkg/tuned/status.go index a2df2024ee..bb7e80f0d2 100644 --- a/pkg/tuned/status.go +++ b/pkg/tuned/status.go @@ -84,11 +84,12 @@ func InitializeStatusConditions() []tunedv1.ProfileStatusCondition { } // computeStatusConditions takes the set of Bits 'status', old conditions -// 'conditions' and returns an updated slice of tunedv1.ProfileStatusCondition. +// 'conditions', an optional 'message' to put in the relevant condition field, +// and returns an updated slice of tunedv1.ProfileStatusCondition. // 'status' contains all the information necessary for creating a new slice of // conditions apart from LastTransitionTime, which is set based on checking the // old conditions. -func computeStatusConditions(status Bits, stderr string, conditions []tunedv1.ProfileStatusCondition) []tunedv1.ProfileStatusCondition { +func computeStatusConditions(status Bits, message string, conditions []tunedv1.ProfileStatusCondition) []tunedv1.ProfileStatusCondition { if (status & scUnknown) != 0 { return InitializeStatusConditions() } @@ -100,7 +101,16 @@ func computeStatusConditions(status Bits, stderr string, conditions []tunedv1.Pr Type: tunedv1.TunedDegraded, } - if (status & scApplied) != 0 { + deferredMessage := "" + if len(message) > 0 { + deferredMessage = ": " + message + } + + if (status & scDeferred) != 0 { + tunedProfileAppliedCondition.Status = corev1.ConditionFalse + tunedProfileAppliedCondition.Reason = "Deferred" + tunedProfileAppliedCondition.Message = "The TuneD daemon profile is waiting for the next node restart" + deferredMessage + } else if (status & scApplied) != 0 { tunedProfileAppliedCondition.Status = corev1.ConditionTrue tunedProfileAppliedCondition.Reason = "AsExpected" tunedProfileAppliedCondition.Message = "TuneD profile applied." @@ -113,15 +123,19 @@ func computeStatusConditions(status Bits, stderr string, conditions []tunedv1.Pr if (status & scError) != 0 { tunedDegradedCondition.Status = corev1.ConditionTrue tunedDegradedCondition.Reason = "TunedError" - tunedDegradedCondition.Message = "TuneD daemon issued one or more error message(s) during profile application. TuneD stderr: " + stderr + tunedDegradedCondition.Message = "TuneD daemon issued one or more error message(s) during profile application. TuneD stderr: " + message } else if (status & scSysctlOverride) != 0 { tunedDegradedCondition.Status = corev1.ConditionTrue // treat overrides as regular errors; users should use "reapply_sysctl: true" or remove conflicting sysctls tunedDegradedCondition.Reason = "TunedSysctlOverride" - tunedDegradedCondition.Message = "TuneD daemon issued one or more sysctl override message(s) during profile application. Use reapply_sysctl=true or remove conflicting sysctl " + stderr + tunedDegradedCondition.Message = "TuneD daemon issued one or more sysctl override message(s) during profile application. Use reapply_sysctl=true or remove conflicting sysctl " + message } else if (status & scWarn) != 0 { tunedDegradedCondition.Status = corev1.ConditionFalse // consider warnings from TuneD as non-fatal tunedDegradedCondition.Reason = "TunedWarning" - tunedDegradedCondition.Message = "No error messages observed by applying the TuneD daemon profile, only warning(s). TuneD stderr: " + stderr + tunedDegradedCondition.Message = "No error messages observed by applying the TuneD daemon profile, only warning(s). TuneD stderr: " + message + } else if (status & scDeferred) != 0 { + tunedDegradedCondition.Status = corev1.ConditionTrue + tunedDegradedCondition.Reason = "TunedDeferredUpdate" + tunedDegradedCondition.Message = "Profile will be applied at the next node restart" + deferredMessage } else { tunedDegradedCondition.Status = corev1.ConditionFalse tunedDegradedCondition.Reason = "AsExpected" diff --git a/pkg/tuned/status_test.go b/pkg/tuned/status_test.go new file mode 100644 index 0000000000..c7a5c477df --- /dev/null +++ b/pkg/tuned/status_test.go @@ -0,0 +1,169 @@ +package tuned + +import ( + "reflect" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" +) + +func TestComputeStatusConditions(t *testing.T) { + testCases := []struct { + name string + status Bits + stderr string + conds []tunedv1.ProfileStatusCondition + expected []tunedv1.ProfileStatusCondition + }{ + { + name: "nil", + expected: []tunedv1.ProfileStatusCondition{ + { + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "Failed", + Message: "The TuneD daemon profile not yet applied, or application failed.", + }, + { + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "AsExpected", + Message: "No warning or error messages observed applying the TuneD daemon profile.", + }, + }, + }, + { + name: "only-deferred", + status: scDeferred, + expected: []tunedv1.ProfileStatusCondition{ + { + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "Deferred", + Message: "The TuneD daemon profile is waiting for the next node restart", + }, + { + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "TunedDeferredUpdate", + Message: "Profile will be applied at the next node restart", + }, + }, + }, + { + name: "error-deferred", + status: scError | scDeferred, + stderr: "test-error", + expected: []tunedv1.ProfileStatusCondition{ + { + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "Deferred", + Message: "The TuneD daemon profile is waiting for the next node restart: test-error", + }, + { + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "TunedError", + Message: "TuneD daemon issued one or more error message(s) during profile application. TuneD stderr: test-error", + }, + }, + }, + { + name: "sysctl-deferred", + status: scSysctlOverride | scDeferred, + stderr: "test-error", + expected: []tunedv1.ProfileStatusCondition{ + { + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "Deferred", + Message: "The TuneD daemon profile is waiting for the next node restart: test-error", + }, + { + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "TunedSysctlOverride", + Message: "TuneD daemon issued one or more sysctl override message(s) during profile application. Use reapply_sysctl=true or remove conflicting sysctl test-error", + }, + }, + }, + { + name: "warning-deferred", + status: scWarn | scDeferred, + stderr: "test-error", + expected: []tunedv1.ProfileStatusCondition{ + { + Type: tunedv1.TunedProfileApplied, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "Deferred", + Message: "The TuneD daemon profile is waiting for the next node restart: test-error", + }, + { + Type: tunedv1.TunedDegraded, + Status: corev1.ConditionFalse, + LastTransitionTime: metav1.Time{ + Time: testTime(), + }, + Reason: "TunedWarning", + Message: "No error messages observed by applying the TuneD daemon profile, only warning(s). TuneD stderr: test-error", + }, + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := clearTimestamps(computeStatusConditions(tt.status, tt.stderr, tt.conds)) + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("got=%#v expected=%#v", got, tt.expected) + } + }) + } +} + +func clearTimestamps(conds []tunedv1.ProfileStatusCondition) []tunedv1.ProfileStatusCondition { + ret := make([]tunedv1.ProfileStatusCondition, 0, len(conds)) + for idx := range conds { + cond := conds[idx] // local copy + cond.LastTransitionTime = metav1.Time{ + Time: testTime(), + } + ret = append(ret, cond) + } + return ret +} + +func testTime() time.Time { + return time.Date(1980, time.January, 1, 0, 0, 0, 0, time.UTC) +} From ba0275c4ed40618b299187d4341000084c5eaeba Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 13:15:30 +0200 Subject: [PATCH 03/19] tuned: controller: profiles extract/repack rework profilesExtract and helper functions to make more testable, enable round-tripping, and add helpers which will be used in the deferred update functionality. Read extracted profiles on startup and reconstruct the node state, to be used to implement the deferred updates feature. Please note all the operations are non-destructive read-only so pretty safe. Signed-off-by: Francesco Romani --- pkg/tuned/cmd/render/render.go | 2 +- pkg/tuned/controller.go | 203 ++++++++++++++++++++------- pkg/tuned/controller_test.go | 246 +++++++++++++++++++++++++++++++++ pkg/tuned/tuned_parser.go | 6 +- 4 files changed, 403 insertions(+), 54 deletions(-) create mode 100644 pkg/tuned/controller_test.go diff --git a/pkg/tuned/cmd/render/render.go b/pkg/tuned/cmd/render/render.go index 2eec453076..7c54073e95 100644 --- a/pkg/tuned/cmd/render/render.go +++ b/pkg/tuned/cmd/render/render.go @@ -213,7 +213,7 @@ func render(inputDir []string, outputDir string, mcpName string) error { for _, t := range tuneD { tunedProfiles = append(tunedProfiles, t.Spec.Profile...) } - _, _, _, err = tunedpkg.ProfilesExtract(tunedProfiles, recommendedProfile) + _, _, _, _, err = tunedpkg.ProfilesExtract(tunedProfiles, recommendedProfile) if err != nil { klog.Errorf("error extracting tuned profiles : %v", err) return fmt.Errorf("error extracting tuned profiles: %w", err) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index d77abd2ee5..6bd0dde708 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -4,11 +4,15 @@ import ( "bufio" // scanner "bytes" // bytes.Buffer "context" // context.TODO() + "crypto/sha256" + "encoding/hex" "errors" // errors.Is() "fmt" // Printf() "math" // math.Pow() "os" // os.Exit(), os.Stderr, ... "os/exec" // os.Exec() + "path/filepath" + "sort" "strings" // strings.Join() "syscall" // syscall.SIGHUP, ... "time" // time.Second, ... @@ -105,6 +109,12 @@ type Daemon struct { // recommendedProfile is the TuneD profile the operator calculated to be applied. // This variable is used to cache the value which was written to tunedRecommendFile. recommendedProfile string + // profileFingerprintUnpacked is the fingerprint of the profile unpacked on the node. + // Relevant in the startup flow with deferred updates. + profileFingerprintUnpacked string + // profileFingerprintEffective is the fingerprint of the profile effective on the node. + // Relevant in the startup flow with deferred updates. + profileFingerprintEffective string } type Change struct { @@ -326,23 +336,27 @@ func profilesEqual(profileFile string, profileData string) bool { // Returns: // - True if the data in the to-be-extracted recommended profile or the profiles being // included from the current recommended profile have changed. +// - If the data changed, the fingerprint of the new profile, or "" otherwise. // - A map with successfully extracted TuneD profile names. // - A map with names of TuneD profiles the current TuneD recommended profile depends on. // - Error if any or nil. -func ProfilesExtract(profiles []tunedv1.TunedProfile, recommendedProfile string) (bool, map[string]bool, map[string]bool, error) { +func ProfilesExtract(profiles []tunedv1.TunedProfile, recommendedProfile string) (bool, string, map[string]bool, map[string]bool, error) { + klog.Infof("profilesExtract(): extracting %d TuneD profiles (recommended=%s)", len(profiles), recommendedProfile) + // Get a list of TuneD profiles names the recommended profile depends on. + deps := profileDepends(recommendedProfile) + // Add the recommended profile itself. + deps[recommendedProfile] = true + klog.V(2).Infof("profilesExtract(): profile deps: %#v", deps) + return profilesExtractPathWithDeps(tunedProfilesDirCustom, profiles, recommendedProfile, deps) +} + +// profilesExtractPathWithDeps is like ProfilesExtract but takes explicit profiles root dir path and +// explicit dependencies, so it's easier to test. To be used only internally. +func profilesExtractPathWithDeps(profilesRootDir string, profiles []tunedv1.TunedProfile, recommendedProfile string, recommendedProfileDeps map[string]bool) (bool, string, map[string]bool, map[string]bool, error) { var ( - change bool + change bool = false + extracted map[string]bool = map[string]bool{} // TuneD profile names present in TuneD CR and successfully extracted to tunedProfilesDirCustom ) - klog.Infof("profilesExtract(): extracting %d TuneD profiles", len(profiles)) - - recommendedProfileDeps := map[string]bool{} - if len(recommendedProfile) > 0 { - // Get a list of TuneD profiles names the recommended profile depends on. - recommendedProfileDeps = profileDepends(recommendedProfile) - // Add the recommended profile itself. - recommendedProfileDeps[recommendedProfile] = true - } - extracted := map[string]bool{} // TuneD profile names present in TuneD CR and successfully extracted to tunedProfilesDirCustom for index, profile := range profiles { if profile.Name == nil { @@ -353,11 +367,11 @@ func ProfilesExtract(profiles []tunedv1.TunedProfile, recommendedProfile string) klog.Warningf("profilesExtract(): profile data missing for Profile %v", index) continue } - profileDir := fmt.Sprintf("%s/%s", tunedProfilesDirCustom, *profile.Name) - profileFile := fmt.Sprintf("%s/%s", profileDir, tunedConfFile) + profileDir := filepath.Join(profilesRootDir, *profile.Name) + profileFile := filepath.Join(profileDir, tunedConfFile) if err := os.MkdirAll(profileDir, os.ModePerm); err != nil { - return change, extracted, recommendedProfileDeps, fmt.Errorf("failed to create TuneD profile directory %q: %v", profileDir, err) + return change, "", extracted, recommendedProfileDeps, fmt.Errorf("failed to create TuneD profile directory %q: %v", profileDir, err) } if recommendedProfileDeps[*profile.Name] { @@ -368,21 +382,56 @@ func ProfilesExtract(profiles []tunedv1.TunedProfile, recommendedProfile string) if !change { un = "un" } - klog.Infof("recommended TuneD profile %s content %schanged [%s]", recommendedProfile, un, *profile.Name) + klog.Infof("profilesExtract(): recommended TuneD profile %s content %schanged [%s]", recommendedProfile, un, *profile.Name) } - f, err := os.Create(profileFile) + err := os.WriteFile(profileFile, []byte(*profile.Data), 0644) if err != nil { - return change, extracted, recommendedProfileDeps, fmt.Errorf("failed to create TuneD profile file %q: %v", profileFile, err) - } - defer f.Close() - if _, err = f.WriteString(*profile.Data); err != nil { - return change, extracted, recommendedProfileDeps, fmt.Errorf("failed to write TuneD profile file %q: %v", profileFile, err) + return change, "", extracted, recommendedProfileDeps, fmt.Errorf("failed to write TuneD profile file %q: %v", profileFile, err) } extracted[*profile.Name] = true + klog.V(2).Infof("profilesExtract(): extracted profile %q to %q (%d bytes)", *profile.Name, profileFile, len(*profile.Data)) + } + + profilesFP := profilesFingerprint(profiles, recommendedProfile) + klog.Infof("profilesExtract(): extracted profiles fingerprint: %s", profilesFP) + return change, profilesFP, extracted, recommendedProfileDeps, nil +} + +func profilesRepackPath(rfPath, profilesRootDir string) ([]tunedv1.TunedProfile, string, error) { + recommendedProfile, err := TunedRecommendFileReadPath(rfPath) + if err != nil { + return nil, "", err + } + klog.Infof("profilesRepack(): recovered recommended profile: %q", recommendedProfile) + + dents, err := os.ReadDir(profilesRootDir) + if err != nil { + return nil, "", err + } + var profiles []tunedv1.TunedProfile + for _, dent := range dents { + profileDir := filepath.Join(profilesRootDir, dent.Name()) + if !dent.IsDir() { + klog.V(2).Infof("profilesRepack(): skipped entry %q: not a directory", profileDir) + continue + } + profilePath := filepath.Join(profileDir, tunedConfFile) + profileBytes, err := os.ReadFile(profilePath) + if err != nil { + return profiles, recommendedProfile, err + } + profileName := dent.Name() + profileData := string(profileBytes) + profiles = append(profiles, tunedv1.TunedProfile{ + Name: &profileName, + Data: &profileData, + }) + klog.V(2).Infof("profilesRepack(): recovered profile: %q from %q (%d bytes)", profileName, profilePath, len(profileBytes)) } - return change, extracted, recommendedProfileDeps, nil + klog.V(2).Infof("profilesRepack(): recovered %d profiles", len(profiles)) + return profiles, recommendedProfile, nil } // profilesSync extracts TuneD daemon profiles to the daemon configuration directory @@ -391,16 +440,17 @@ func ProfilesExtract(profiles []tunedv1.TunedProfile, recommendedProfile string) // Returns: // - True if the data in the to-be-extracted recommended profile or the profiles being // included from the current recommended profile have changed. +// - The synced profile fingerprint. // - Error if any or nil. -func profilesSync(profiles []tunedv1.TunedProfile, recommendedProfile string) (bool, error) { - change, extractedNew, recommendedProfileDeps, err := ProfilesExtract(profiles, recommendedProfile) +func profilesSync(profiles []tunedv1.TunedProfile, recommendedProfile string) (bool, string, error) { + change, profilesFP, extractedNew, recommendedProfileDeps, err := ProfilesExtract(profiles, recommendedProfile) if err != nil { - return change, err + return change, "", err } dirEntries, err := os.ReadDir(tunedProfilesDirCustom) if err != nil { - return change, err + return change, "", err } // Deal with TuneD profiles absent from Tuned CRs, but still present in // @@ -424,9 +474,9 @@ func profilesSync(profiles []tunedv1.TunedProfile, recommendedProfile string) (b profileDir := fmt.Sprintf("%s/%s", tunedProfilesDirCustom, profile) err := os.RemoveAll(profileDir) if err != nil { - return change, fmt.Errorf("failed to remove %q: %v", profileDir, err) + return change, "", fmt.Errorf("failed to remove %q: %v", profileDir, err) } - klog.Infof("removed TuneD profile %q", profileDir) + klog.Infof("profilesSync(): removed TuneD profile %q", profileDir) if recommendedProfileDeps[profile] { // This TuneD profile does not exist in the Profile CR, but the recommended profile depends on it. @@ -435,22 +485,43 @@ func profilesSync(profiles []tunedv1.TunedProfile, recommendedProfile string) (b } } - return change, nil + return change, profilesFP, nil } -// providerExtract extracts Cloud Provider name into ocpTunedProvider file. -func providerExtract(provider string) error { - klog.Infof("extracting cloud provider name to %v", ocpTunedProvider) +// filterAndSortProfiles returns a slice of valid (non-nil name, non-nil data) profiles +// from the given slice, and the returned slice have all the valid profiles sorted by name. +func filterAndSortProfiles(profiles []tunedv1.TunedProfile) []tunedv1.TunedProfile { + profs := make([]tunedv1.TunedProfile, 0, len(profiles)) + for _, prof := range profiles { + if prof.Name == nil { + continue + } + if prof.Data == nil { + continue + } + profs = append(profs, prof) + } + sort.Slice(profs, func(i, j int) bool { return *profs[i].Name < *profs[j].Name }) + return profs +} - f, err := os.Create(ocpTunedProvider) - if err != nil { - return fmt.Errorf("failed to create cloud provider name file %q: %v", ocpTunedProvider, err) +// profilesFingerprint returns a hash of `recommendedProfile` name joined with the data sections of all TuneD profiles in the `profiles` slice. +func profilesFingerprint(profiles []tunedv1.TunedProfile, recommendedProfile string) string { + profiles = filterAndSortProfiles(profiles) + h := sha256.New() + h.Write([]byte(recommendedProfile)) + for _, prof := range profiles { + h.Write([]byte(*prof.Data)) } - defer f.Close() - if _, err = f.WriteString(provider); err != nil { + return hex.EncodeToString(h.Sum(nil)) +} + +// providerExtract extracts Cloud Provider name into ocpTunedProvider file. +func providerExtract(provider string) error { + klog.Infof("providerExtract(): extracting cloud provider name to %v", ocpTunedProvider) + if err := os.WriteFile(ocpTunedProvider, []byte(provider), 0o644); err != nil { return fmt.Errorf("failed to write cloud provider name file %q: %v", ocpTunedProvider, err) } - return nil } @@ -540,23 +611,38 @@ func writeOpenShiftTunedImageEnv() error { return nil } -func TunedRecommendFileWrite(profileName string) error { - klog.V(2).Infof("tunedRecommendFileWrite(): %s", profileName) - if err := os.MkdirAll(tunedRecommendDir, os.ModePerm); err != nil { - return fmt.Errorf("failed to create directory %q: %v", tunedRecommendDir, err) - } - f, err := os.Create(tunedRecommendFile) - if err != nil { - return fmt.Errorf("failed to create file %q: %v", tunedRecommendFile, err) +func TunedRecommendFileWritePath(rfPath, profileName string) error { + rfDir := filepath.Dir(rfPath) + klog.V(2).Infof("tunedRecommendFileWrite(): %s %s", profileName, rfDir) + if err := os.MkdirAll(rfDir, os.ModePerm); err != nil { + return fmt.Errorf("failed to create directory %q: %v", rfDir, err) } - defer f.Close() - if _, err = f.WriteString(fmt.Sprintf("[%s]\n", profileName)); err != nil { - return fmt.Errorf("failed to write file %q: %v", tunedRecommendFile, err) + data := []byte(fmt.Sprintf("[%s]\n", profileName)) + if err := os.WriteFile(rfPath, data, 0644); err != nil { + return fmt.Errorf("failed to write file %q: %v", rfPath, err) } - klog.Infof("written %q to set TuneD profile %s", tunedRecommendFile, profileName) + klog.Infof("tunedRecommendFileWrite(): written %q to set TuneD profile %s", rfPath, profileName) return nil } +func TunedRecommendFileReadPath(rfPath string) (string, error) { + data, err := os.ReadFile(rfPath) + if err != nil { + return "", err + } + recommended := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(string(data)), "["), "]") + klog.Infof("tunedRecommendFileRead(): read %q from %q", recommended, tunedRecommendFile) + return recommended, nil +} + +func TunedRecommendFileWrite(profileName string) error { + return TunedRecommendFileWritePath(tunedRecommendFile, profileName) +} + +func TunedRecommendFileRead() (string, error) { + return TunedRecommendFileReadPath(tunedRecommendFile) +} + // overridenSysctl returns name of a host-level sysctl that overrides TuneD-level sysctl, // or an empty string. func overridenSysctl(data string) string { @@ -824,7 +910,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if err != nil { return false, fmt.Errorf("failed to get Profile %s: %v", c.nodeName, err) } - changeProfiles, err := profilesSync(profile.Spec.Profile, c.daemon.recommendedProfile) + changeProfiles, _, err := profilesSync(profile.Spec.Profile, c.daemon.recommendedProfile) if err != nil { return false, err } @@ -1301,6 +1387,19 @@ func RunInCluster(stopCh <-chan struct{}, version string) error { panic(err.Error()) } + profiles, recommended, err := profilesRepackPath(tunedRecommendFile, tunedProfilesDirCustom) + if err != nil { + // keep going, immediate updates are expected to work as usual + // and we expect the vast majority of updates to be immediate anyway + klog.Errorf("error repacking the profile: %v", err) + klog.Infof("deferred updates likely broken") + } + + profileFP := profilesFingerprint(profiles, recommended) + c.daemon.profileFingerprintUnpacked = profileFP + c.daemon.profileFingerprintEffective = profileFP + klog.Infof("starting: installed and effective profile fingerprint %q", profileFP) + return retryLoop(c) } diff --git a/pkg/tuned/controller_test.go b/pkg/tuned/controller_test.go new file mode 100644 index 0000000000..66ae5100ec --- /dev/null +++ b/pkg/tuned/controller_test.go @@ -0,0 +1,246 @@ +package tuned + +import ( + "path/filepath" + "reflect" + "testing" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" +) + +func TestRecommendFileRoundTrip(t *testing.T) { + tmpDir := t.TempDir() + + rfPath := filepath.Join(tmpDir, "50-test.conf") + profName := "test-recommend" + + err := TunedRecommendFileWritePath(rfPath, profName) + if err != nil { + t.Fatalf("unexpected error writing profile %q path %q: %v", profName, rfPath, err) + } + + got, err := TunedRecommendFileReadPath(rfPath) + if err != nil { + t.Fatalf("unexpected error reading from path %q: %v", rfPath, err) + } + + if got != profName { + t.Errorf("profile name got %q expected %q", got, profName) + } +} + +func TestFilterAndSortProfiles(t *testing.T) { + testCases := []struct { + name string + profiles []tunedv1.TunedProfile + expected []tunedv1.TunedProfile + }{ + { + name: "nil", + expected: []tunedv1.TunedProfile{}, + }, + { + name: "empty", + profiles: []tunedv1.TunedProfile{}, + expected: []tunedv1.TunedProfile{}, + }, + { + name: "single", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + Data: newString("data"), + }, + }, + expected: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + Data: newString("data"), + }, + }, + }, + { + name: "single, partial", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + }, + }, + expected: []tunedv1.TunedProfile{}, + }, + { + name: "multi,sorted", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + Data: newString("data"), + }, + { + Name: newString("bbb"), + Data: newString("data"), + }, + { + Name: newString("ccc"), + Data: newString("data"), + }, + }, + expected: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + Data: newString("data"), + }, + { + Name: newString("bbb"), + Data: newString("data"), + }, + { + Name: newString("ccc"), + Data: newString("data"), + }, + }, + }, + { + name: "multi,reverse", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("ccc"), + Data: newString("data"), + }, + { + Name: newString("bbb"), + Data: newString("data"), + }, + { + Name: newString("aaa"), + Data: newString("data"), + }, + }, + expected: []tunedv1.TunedProfile{ + { + Name: newString("aaa"), + Data: newString("data"), + }, + { + Name: newString("bbb"), + Data: newString("data"), + }, + { + Name: newString("ccc"), + Data: newString("data"), + }, + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := filterAndSortProfiles(tt.profiles) + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("got=%#v expected=%#v", got, tt.expected) + } + }) + } +} + +func TestProfileFingerprint(t *testing.T) { + testCases := []struct { + name string + profiles []tunedv1.TunedProfile + recommendedProfile string + expected string + }{ + // all hashes computed manually (well, using throwaway go code and shell tools) on developer box + { + name: "nil", + expected: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + name: "no-name", + profiles: []tunedv1.TunedProfile{ + { + Data: newString("test-data"), + }, + }, + expected: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + name: "no-data", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("test-name"), + }, + }, + expected: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + name: "minimal", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("test-name"), + Data: newString("test-data"), + }, + }, + expected: "a186000422feab857329c684e9fe91412b1a5db084100b37a98cfc95b62aa867", + }, + { + name: "minimal-multi-entry", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("test-name-1"), + Data: newString("test-data-1"), + }, + { + Name: newString("test-name-2"), + Data: newString("test-data-2"), + }, + }, + expected: "72e7e1930db49379e31aa370d4274f9caada231c775a704db7e78dc856e67662", + }, + { + name: "skip-no-data", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("test-name-1"), + Data: newString("test-data-1"), + }, + { + // intentionally out of order in between the two valid profiles + Name: newString("test-name-3"), + }, + { + Name: newString("test-name-2"), + Data: newString("test-data-2"), + }, + }, + expected: "72e7e1930db49379e31aa370d4274f9caada231c775a704db7e78dc856e67662", + }, + { + name: "skip-no-name", + profiles: []tunedv1.TunedProfile{ + { + Name: newString("test-name-1"), + Data: newString("test-data-1"), + }, + { + Name: newString("test-name-2"), + Data: newString("test-data-2"), + }, + { + Data: newString("test-data-3"), + }, + }, + expected: "72e7e1930db49379e31aa370d4274f9caada231c775a704db7e78dc856e67662", + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := profilesFingerprint(tt.profiles, tt.recommendedProfile) + if got != tt.expected { + t.Errorf("got=%v expected=%v", got, tt.expected) + } + }) + } +} + +func newString(s string) *string { + return &s +} diff --git a/pkg/tuned/tuned_parser.go b/pkg/tuned/tuned_parser.go index 67002c977d..87828d0839 100644 --- a/pkg/tuned/tuned_parser.go +++ b/pkg/tuned/tuned_parser.go @@ -190,7 +190,11 @@ func profileExists(profileName string, tunedProfilesDir string) bool { // Note: only basic expansion of TuneD built-in functions into profiles is // performed. See expandTuneDBuiltin for more detail. func profileDepends(profileName string) map[string]bool { - return profileDependsLoop(profileName, map[string]bool{}) + deps := map[string]bool{} + if len(profileName) == 0 { + return deps + } + return profileDependsLoop(profileName, deps) } func profileDependsLoop(profileName string, seenProfiles map[string]bool) map[string]bool { From edf416896477a755774147bc35f8e022368cb6c3 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 14:15:58 +0200 Subject: [PATCH 04/19] tuned: add changeSyncerPostNodeRestart The deferred updates feature need to do specific actions when the node on which the tuned daemon runs is restarted (and only then). Add hook point with specific code for that. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index 6bd0dde708..a4b2ae029c 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -123,6 +123,9 @@ type Change struct { // Do we need to update Tuned Profile status? profileStatus bool + // is this Change caused by a node restart? + nodeRestart bool + // The following keys are set when profile == true. // Was debugging set in Profile k8s object? debug bool @@ -845,6 +848,34 @@ func GetBootcmdline() (string, error) { return responseString, nil } +// changeSyncerPostNodeRestart syncronizes the daemon status after a node restart +func (c *Controller) changeSyncerPostNodeRestart(change Change) { + if !change.nodeRestart { + return + } + + klog.V(2).Infof("changeSyncerPostNodeRestart(%#v)", change) + defer klog.V(2).Infof("changeSyncerPostNodeRestart(%#v) done", change) + + profiles, recommended, err := profilesRepackPath(tunedRecommendFile, tunedProfilesDirCustom) + if err != nil { + // keep going, immediate updates are expected to work as usual + // and we expect the vast majority of updates to be immediate anyway + klog.Errorf("error repacking the profile: %v", err) + klog.Infof("deferred updates likely broken") + } + + profileFP := profilesFingerprint(profiles, recommended) + if (c.daemon.status & scApplied) == 0 { + klog.Infof("changeSyncerPostNodeRestart(): profile not applied after tuned reload") + return + } + + klog.V(2).Infof("changeSyncerPostNodeRestart(): current effective profile fingerprint %q -> %q", c.daemon.profileFingerprintEffective, profileFP) + c.daemon.profileFingerprintEffective = profileFP + c.daemon.status &= ^scDeferred // force clear even if it was never set. +} + func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { klog.V(2).Infof("changeSyncerProfileStatus(%#v)", change) defer klog.V(2).Infof("changeSyncerProfileStatus(%#v) done", change) @@ -972,7 +1003,11 @@ func (c *Controller) changeSyncerRestartOrReloadTuneD() error { // reloads as needed. Returns indication whether the change was successfully // synced and an error. Only critical errors are returned, as non-nil errors // will cause restart of the main control loop -- the changeWatcher() method. -func (c *Controller) changeSyncer(change Change) (synced bool, err error) { +func (c *Controller) changeSyncer(change Change) (bool, error) { + klog.V(2).Infof("changeSyncer(%#v)", change) + defer klog.V(2).Infof("changeSyncer(%#v) done", change) + // Sync internal status after a TuneD reload + c.changeSyncerPostNodeRestart(change) // Sync k8s Profile status if/when needed. if !c.changeSyncerProfileStatus(change) { return false, nil From 9cd62e68537f511a6b0455027f35fd04a933fb62 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 14:23:36 +0200 Subject: [PATCH 05/19] tuned: refactor status update Rework how tuned controller updates status to make room for the deferred update feature. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 144 +++++++++++++++++++++++++---------- pkg/tuned/controller_test.go | 58 ++++++++++++++ pkg/tuned/status.go | 8 +- pkg/tuned/status_test.go | 10 +-- 4 files changed, 171 insertions(+), 49 deletions(-) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index a4b2ae029c..21ee47fb3a 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -135,6 +135,43 @@ type Change struct { reapplySysctl bool // The current recommended profile as calculated by the operator. recommendedProfile string + + // Is the current Change triggered by an object with the deferred annotation? + deferred bool + // Text to convey in status message, if present. + message string +} + +func (ch Change) String() string { + var items []string + if ch.profile { + items = append(items, "profile:true") + } + if ch.profileStatus { + items = append(items, "profileStatus:true") + } + if ch.daemonReload { + items = append(items, "daemonReload:true") + } + if ch.debug { + items = append(items, "debug:true") + } + if ch.provider != "" { + items = append(items, fmt.Sprintf("provider:%q", ch.provider)) + } + if ch.reapplySysctl { + items = append(items, "reapplySysctl:true") + } + if ch.recommendedProfile != "" { + items = append(items, fmt.Sprintf("recommendedProfile:%q", ch.recommendedProfile)) + } + if ch.deferred { + items = append(items, "deferred:true") + } + if ch.message != "" { + items = append(items, fmt.Sprintf("message:%q", ch.message)) + } + return "tuned.Change{" + strings.Join(items, ", ") + "}" } type Controller struct { @@ -854,8 +891,8 @@ func (c *Controller) changeSyncerPostNodeRestart(change Change) { return } - klog.V(2).Infof("changeSyncerPostNodeRestart(%#v)", change) - defer klog.V(2).Infof("changeSyncerPostNodeRestart(%#v) done", change) + klog.V(2).Infof("changeSyncerPostNodeRestart(%s)", change.String()) + defer klog.V(2).Infof("changeSyncerPostNodeRestart(%s) done", change.String()) profiles, recommended, err := profilesRepackPath(tunedRecommendFile, tunedProfilesDirCustom) if err != nil { @@ -877,8 +914,8 @@ func (c *Controller) changeSyncerPostNodeRestart(change Change) { } func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { - klog.V(2).Infof("changeSyncerProfileStatus(%#v)", change) - defer klog.V(2).Infof("changeSyncerProfileStatus(%#v) done", change) + klog.V(2).Infof("changeSyncerProfileStatus(%s)", change.String()) + defer klog.V(2).Infof("changeSyncerProfileStatus(%s) done", change.String()) if !change.profileStatus { return true @@ -891,7 +928,7 @@ func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { // 2) TuneD daemon was reloaded. Make sure the node Profile k8s object is in sync with // the active profile, e.g. the Profile indicates the presence of the stall daemon on // the host if requested by the current active profile. - if err := c.updateTunedProfile(); err != nil { + if err := c.updateTunedProfile(change); err != nil { klog.Error(err.Error()) return false // retry later } @@ -902,8 +939,10 @@ func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { // current TuneD configuration and signals TuneD process reload or restart. func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { var reload bool + var cfgUpdated bool - klog.V(2).Infof("changeSyncerTuneD()") + klog.V(2).Infof("changeSyncerTuneD(%s)", change.String()) + defer klog.V(2).Infof("changeSyncerTuneD(%s) done updated=%v", change.String(), cfgUpdated) if (c.daemon.status & scReloading) != 0 { // This should not be necessary, but keep this here as a reminder. @@ -931,7 +970,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { klog.Infof("recommended profile (%s) matches current configuration", c.daemon.recommendedProfile) // We do not need to reload the TuneD daemon, however, someone may have tampered with the k8s Profile status for this node. // Make sure its status is up-to-date. - if err = c.updateTunedProfile(); err != nil { + if err = c.updateTunedProfile(change); err != nil { klog.Error(err.Error()) return false, nil // retry later } @@ -976,27 +1015,26 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if reload { c.daemon.restart |= ctrlReload } - err = c.changeSyncerRestartOrReloadTuneD() + + cfgUpdated, err = c.changeSyncerRestartOrReloadTuneD() + klog.V(2).Infof("changeSyncerTuneD() configuration updated: %v", cfgUpdated) + if err != nil { + return false, err + } return err == nil, err } -func (c *Controller) changeSyncerRestartOrReloadTuneD() error { - var err error - +func (c *Controller) changeSyncerRestartOrReloadTuneD() (bool, error) { klog.V(2).Infof("changeSyncerRestartOrReloadTuneD()") - if (c.daemon.restart & ctrlRestart) != 0 { // Complete restart of the TuneD daemon needed. For example, debuging option is used or an option in tuned-main.conf file changed). - err = c.tunedRestart() - return err + return true, c.tunedRestart() } - if (c.daemon.restart & ctrlReload) != 0 { - err = c.tunedReload() + return true, c.tunedReload() } - - return err + return false, nil } // Method changeSyncer performs k8s Profile object updates and TuneD daemon @@ -1004,8 +1042,8 @@ func (c *Controller) changeSyncerRestartOrReloadTuneD() error { // synced and an error. Only critical errors are returned, as non-nil errors // will cause restart of the main control loop -- the changeWatcher() method. func (c *Controller) changeSyncer(change Change) (bool, error) { - klog.V(2).Infof("changeSyncer(%#v)", change) - defer klog.V(2).Infof("changeSyncer(%#v) done", change) + klog.V(2).Infof("changeSyncer(%s)", change.String()) + defer klog.V(2).Infof("changeSyncer(%s) done", change.String()) // Sync internal status after a TuneD reload c.changeSyncerPostNodeRestart(change) // Sync k8s Profile status if/when needed. @@ -1123,13 +1161,21 @@ func (c *Controller) updateNodeAnnotations(node *corev1.Node, annotations map[st return nil } +func (c *Controller) daemonMessage(change Change, message string) string { + if len(message) > 0 { + return message + } + if len(change.message) > 0 { + return change.message + } + return c.daemon.stderr +} + // Method updateTunedProfile updates a Tuned Profile with information to report back // to the operator. Note this method must be called only when the TuneD daemon is // not reloading. -func (c *Controller) updateTunedProfile() (err error) { - var ( - bootcmdline string - ) +func (c *Controller) updateTunedProfile(change Change) (err error) { + var bootcmdline string if bootcmdline, err = GetBootcmdline(); err != nil { // This should never happen unless something is seriously wrong (e.g. TuneD @@ -1137,17 +1183,6 @@ func (c *Controller) updateTunedProfile() (err error) { return fmt.Errorf("unable to get kernel command-line parameters: %v", err) } - profileName := getNodeName() - profile, err := c.listers.TunedProfiles.Get(profileName) - if err != nil { - return fmt.Errorf("failed to get Profile %s: %v", profileName, err) - } - - activeProfile, err := getActiveProfile() - if err != nil { - return err - } - node, err := c.getNodeForProfile(c.nodeName) if err != nil { return err @@ -1157,9 +1192,7 @@ func (c *Controller) updateTunedProfile() (err error) { node.ObjectMeta.Annotations = map[string]string{} } - statusConditions := computeStatusConditions(c.daemon.status, c.daemon.stderr, profile.Status.Conditions) bootcmdlineAnnotVal, bootcmdlineAnnotSet := node.ObjectMeta.Annotations[tunedv1.TunedBootcmdlineAnnotationKey] - if !bootcmdlineAnnotSet || bootcmdlineAnnotVal != bootcmdline { annotations := map[string]string{tunedv1.TunedBootcmdlineAnnotationKey: bootcmdline} err = c.updateNodeAnnotations(node, annotations) @@ -1168,9 +1201,41 @@ func (c *Controller) updateTunedProfile() (err error) { } } + return c.updateTunedProfileStatus(context.TODO(), change) +} + +func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change) error { + activeProfile, err := getActiveProfile() + if err != nil { + return err + } + + profile, err := c.listers.TunedProfiles.Get(c.nodeName) + if err != nil { + return fmt.Errorf("failed to get Profile %s: %v", c.nodeName, err) + } + + var message string + wantsDeferred := util.HasDeferredUpdateAnnotation(profile.Annotations) + isApplied := (c.daemon.profileFingerprintUnpacked == c.daemon.profileFingerprintEffective) + klog.V(2).Infof("daemonStatus(): change: deferred=%v reload=%v applied=%v", wantsDeferred, change.daemonReload, isApplied) + if (wantsDeferred && !isApplied) && !change.daemonReload { + c.daemon.status |= scDeferred + recommendProfile, err := TunedRecommendFileRead() + if err == nil { + klog.V(2).Infof("updateTunedProfileStatus(): recommended profile %q (deferred)", recommendProfile) + message = c.daemonMessage(change, recommendProfile) + } else { + // just log and carry on, we will use this info to clarify status conditions + klog.Errorf("%s", err.Error()) + } + } + + statusConditions := computeStatusConditions(c.daemon.status, message, profile.Status.Conditions) + if profile.Status.TunedProfile == activeProfile && conditionsEqual(profile.Status.Conditions, statusConditions) { - klog.V(2).Infof("updateTunedProfile(): no need to update status of Profile %s", profile.Name) + klog.V(2).Infof("updateTunedProfileStatus(): no need to update status of Profile %s", profile.Name) return nil } @@ -1178,7 +1243,7 @@ func (c *Controller) updateTunedProfile() (err error) { profile.Status.TunedProfile = activeProfile profile.Status.Conditions = statusConditions - _, err = c.clients.Tuned.TunedV1().Profiles(operandNamespace).UpdateStatus(context.TODO(), profile, metav1.UpdateOptions{}) + _, err = c.clients.Tuned.TunedV1().Profiles(operandNamespace).UpdateStatus(ctx, profile, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to update Profile %s status: %v", profile.Name, err) } @@ -1324,7 +1389,6 @@ func (c *Controller) changeWatcher() (err error) { return fmt.Errorf("error watching filesystem: %v", err) case ch := <-c.changeCh: - var synced bool klog.V(2).Infof("changeCh") synced, err := c.changeSyncer(ch) diff --git a/pkg/tuned/controller_test.go b/pkg/tuned/controller_test.go index 66ae5100ec..d33cd1f0cb 100644 --- a/pkg/tuned/controller_test.go +++ b/pkg/tuned/controller_test.go @@ -1,6 +1,7 @@ package tuned import ( + "fmt" "path/filepath" "reflect" "testing" @@ -241,6 +242,63 @@ func TestProfileFingerprint(t *testing.T) { } } +func TestChangeString(t *testing.T) { + testCases := []struct { + name string + change Change + expected string + }{ + { + name: "empty", + change: Change{}, + expected: "tuned.Change{}", + }, + // common cases + { + name: "profile", + change: Change{ + profile: true, + }, + expected: "tuned.Change{profile:true}", + }, + { + name: "profileStatus", + change: Change{ + profileStatus: true, + }, + expected: "tuned.Change{profileStatus:true}", + }, + // check all the fields are represented. Keep me last + { + name: "full", + change: fullChange(), + expected: fmt.Sprintf("%#v", fullChange()), + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := tt.change.String() + if got != tt.expected { + t.Errorf("got=%v expected=%v", got, tt.expected) + } + }) + } +} + +func fullChange() Change { + return Change{ + profile: true, + profileStatus: true, + daemonReload: true, + debug: true, + provider: "test-provider", + reapplySysctl: true, + recommendedProfile: "test-profile", + deferred: true, + message: "test-message", + } +} + func newString(s string) *string { return &s } diff --git a/pkg/tuned/status.go b/pkg/tuned/status.go index bb7e80f0d2..64013ec9e1 100644 --- a/pkg/tuned/status.go +++ b/pkg/tuned/status.go @@ -124,6 +124,10 @@ func computeStatusConditions(status Bits, message string, conditions []tunedv1.P tunedDegradedCondition.Status = corev1.ConditionTrue tunedDegradedCondition.Reason = "TunedError" tunedDegradedCondition.Message = "TuneD daemon issued one or more error message(s) during profile application. TuneD stderr: " + message + } else if (status & scDeferred) != 0 { + tunedDegradedCondition.Status = corev1.ConditionTrue + tunedDegradedCondition.Reason = "TunedDeferredUpdate" + tunedDegradedCondition.Message = "Profile will be applied at the next node restart" + deferredMessage } else if (status & scSysctlOverride) != 0 { tunedDegradedCondition.Status = corev1.ConditionTrue // treat overrides as regular errors; users should use "reapply_sysctl: true" or remove conflicting sysctls tunedDegradedCondition.Reason = "TunedSysctlOverride" @@ -132,10 +136,6 @@ func computeStatusConditions(status Bits, message string, conditions []tunedv1.P tunedDegradedCondition.Status = corev1.ConditionFalse // consider warnings from TuneD as non-fatal tunedDegradedCondition.Reason = "TunedWarning" tunedDegradedCondition.Message = "No error messages observed by applying the TuneD daemon profile, only warning(s). TuneD stderr: " + message - } else if (status & scDeferred) != 0 { - tunedDegradedCondition.Status = corev1.ConditionTrue - tunedDegradedCondition.Reason = "TunedDeferredUpdate" - tunedDegradedCondition.Message = "Profile will be applied at the next node restart" + deferredMessage } else { tunedDegradedCondition.Status = corev1.ConditionFalse tunedDegradedCondition.Reason = "AsExpected" diff --git a/pkg/tuned/status_test.go b/pkg/tuned/status_test.go index c7a5c477df..b592272a42 100644 --- a/pkg/tuned/status_test.go +++ b/pkg/tuned/status_test.go @@ -111,8 +111,8 @@ func TestComputeStatusConditions(t *testing.T) { LastTransitionTime: metav1.Time{ Time: testTime(), }, - Reason: "TunedSysctlOverride", - Message: "TuneD daemon issued one or more sysctl override message(s) during profile application. Use reapply_sysctl=true or remove conflicting sysctl test-error", + Reason: "TunedDeferredUpdate", + Message: "Profile will be applied at the next node restart: test-error", }, }, }, @@ -132,12 +132,12 @@ func TestComputeStatusConditions(t *testing.T) { }, { Type: tunedv1.TunedDegraded, - Status: corev1.ConditionFalse, + Status: corev1.ConditionTrue, LastTransitionTime: metav1.Time{ Time: testTime(), }, - Reason: "TunedWarning", - Message: "No error messages observed by applying the TuneD daemon profile, only warning(s). TuneD stderr: test-error", + Reason: "TunedDeferredUpdate", + Message: "Profile will be applied at the next node restart: test-error", }, }, }, From 8c5f349b538f2fc2acdd4b5ab69fe940de58b455 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 13:18:46 +0200 Subject: [PATCH 06/19] tuned: set the deferred flag Now that all the pieces are in place, we can set and use the deferred update flag to implement the desired behavior. Note we now need to trigger an event when the recommend file is written, and always do that even if does not change. This is needed to react properly when a deferred update is un-deferred by editing the tuned object. Without a trigger in this state the k8s object status wouldn't be correctly updated. Signed-off-by: Francesco Romani --- manifests/20-profile.crd.yaml | 3 + pkg/operator/controller.go | 7 +- pkg/operator/profilecalculator.go | 14 ++ pkg/tuned/controller.go | 256 +++++++++++++++++++++++++----- pkg/tuned/controller_test.go | 3 +- 5 files changed, 239 insertions(+), 44 deletions(-) diff --git a/manifests/20-profile.crd.yaml b/manifests/20-profile.crd.yaml index a66f3f1f41..97d62331c8 100644 --- a/manifests/20-profile.crd.yaml +++ b/manifests/20-profile.crd.yaml @@ -28,6 +28,9 @@ spec: - jsonPath: .status.conditions[?(@.type=="Degraded")].status name: Degraded type: string + - jsonPath: .status.conditions[?(@.type=="Applied")].message + name: Message + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date diff --git a/pkg/operator/controller.go b/pkg/operator/controller.go index 292e7275b7..8a42646d59 100644 --- a/pkg/operator/controller.go +++ b/pkg/operator/controller.go @@ -645,6 +645,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { } klog.V(2).Infof("syncProfile(): Profile %s not found, creating one [%s]", profileMf.Name, computed.TunedProfileName) + profileMf.Annotations = util.ToggleDeferredUpdateAnnotation(profileMf.Annotations, computed.AnyDeferred) profileMf.Spec.Config.TunedProfile = computed.TunedProfileName profileMf.Spec.Config.Debug = computed.Operand.Debug profileMf.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig @@ -705,16 +706,20 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { } } + anns := util.ToggleDeferredUpdateAnnotation(profile.Annotations, computed.AnyDeferred) + // Minimize updates if profile.Spec.Config.TunedProfile == computed.TunedProfileName && profile.Spec.Config.Debug == computed.Operand.Debug && reflect.DeepEqual(profile.Spec.Config.TuneDConfig, computed.Operand.TuneDConfig) && reflect.DeepEqual(profile.Spec.Profile, computed.AllProfiles) && + util.HasDeferredUpdateAnnotation(profile.Annotations) == util.HasDeferredUpdateAnnotation(anns) && profile.Spec.Config.ProviderName == providerName { klog.V(2).Infof("syncProfile(): no need to update Profile %s", nodeName) return nil } profile = profile.DeepCopy() // never update the objects from cache + profile.Annotations = anns profile.Spec.Config.TunedProfile = computed.TunedProfileName profile.Spec.Config.Debug = computed.Operand.Debug profile.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig @@ -727,7 +732,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { if err != nil { return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err) } - klog.Infof("updated profile %s [%s]", profile.Name, computed.TunedProfileName) + klog.Infof("updated profile %s [%s] (deferred=%v)", profile.Name, computed.TunedProfileName, util.HasDeferredUpdateAnnotation(profile.Annotations)) return nil } diff --git a/pkg/operator/profilecalculator.go b/pkg/operator/profilecalculator.go index fb00f8ff66..b4f9242bad 100644 --- a/pkg/operator/profilecalculator.go +++ b/pkg/operator/profilecalculator.go @@ -153,6 +153,7 @@ func (pc *ProfileCalculator) nodeChangeHandler(nodeName string) (bool, error) { type ComputedProfile struct { TunedProfileName string AllProfiles []tunedv1.TunedProfile + AnyDeferred bool MCLabels map[string]string NodePoolName string Operand tunedv1.OperandConfig @@ -174,6 +175,7 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, return ComputedProfile{}, fmt.Errorf("failed to list Tuned: %v", err) } + deferred := tunedProfilesAnyDeferred(tunedList) profilesAll := tunedProfiles(tunedList) recommendAll := TunedRecommend(tunedList) recommendProfile := func(nodeName string, iStart int) (int, string, map[string]string, tunedv1.OperandConfig, error) { @@ -277,6 +279,7 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, return ComputedProfile{ TunedProfileName: tunedProfileName, AllProfiles: profilesAll, + AnyDeferred: deferred, MCLabels: mcLabels, Operand: operand, }, err @@ -723,6 +726,17 @@ func TunedRecommend(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedRecommend { return recommendAll } +// tunedProfilesAnyDeferred returns true if any of the Tuned CRs in tunedSlice carry the deferred update annotation. +func tunedProfilesAnyDeferred(tunedSlice []*tunedv1.Tuned) bool { + for _, tuned := range tunedSlice { + if util.HasDeferredUpdateAnnotation(tuned.Annotations) { + klog.Infof("tuned %s/%s is deferred", tuned.Namespace, tuned.Name) + return true + } + } + return false +} + // podLabelsUnique goes through Pod labels of all the Pods on a Node-wide // 'podLabelsNodeWide' map and returns a subset of 'podLabels' unique to 'podNsName' // Pod; i.e. the retuned labels (key & value) will not exist on any other Pod diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index 21ee47fb3a..578220aa93 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -92,6 +92,15 @@ const ( ocpTunedImageEnv = ocpTunedHome + "/image.env" tunedProfilesDirCustomHost = ocpTunedHome + "/profiles" tunedRecommendDirHost = ocpTunedHome + "/recommend.d" + + // How do we detect a reboot? The NTO operand owns and uses two separate files to track deferred updates. + // 1. /var/lib/... - persistent storage which will survive across reboots. Contains the actual data. + // 2. /run/.. - ephemeral file on tmpfs. Lost on reboot. Since this file is going to be wiped out + // automatically and implicitly on reboot, if it is missing we assume a reboot. + // this means the admin can tamper the node state and fake a node restart by deleting this file + // and restarting the tuned daemon. + tunedDeferredUpdateEphemeralFilePath = ocpTunedRunDir + "/pending_profile" + tunedDeferredUpdatePersistentFilePath = ocpTunedHome + "/pending_profile" ) // Types @@ -123,7 +132,10 @@ type Change struct { // Do we need to update Tuned Profile status? profileStatus bool - // is this Change caused by a node restart? + // Is this Change caused by a TuneD reload? + tunedReload bool + + // Is this Change caused by a node restart? nodeRestart bool // The following keys are set when profile == true. @@ -150,8 +162,11 @@ func (ch Change) String() string { if ch.profileStatus { items = append(items, "profileStatus:true") } - if ch.daemonReload { - items = append(items, "daemonReload:true") + if ch.tunedReload { + items = append(items, "tunedReload:true") + } + if ch.nodeRestart { + items = append(items, "nodeRestart:true") } if ch.debug { items = append(items, "debug:true") @@ -195,6 +210,8 @@ type Controller struct { changeCh chan Change // bi-directional channel to wake-up the main thread to process accrued changes changeChRet chan bool // bi-directional channel to announce success/failure of change processing tunedMainCfg *ini.File // global TuneD configuration as defined in tuned-main.conf + + pendingChange *Change // pending deferred change to be applied on node restart (if any) } type wqKeyKube struct { @@ -329,6 +346,7 @@ func (c *Controller) sync(key wqKeyKube) error { return fmt.Errorf("failed to get Profile %s: %v", key.name, err) } + change.profile = true change.provider = profile.Spec.Config.ProviderName change.recommendedProfile = profile.Spec.Config.TunedProfile change.debug = profile.Spec.Config.Debug @@ -336,8 +354,7 @@ func (c *Controller) sync(key wqKeyKube) error { if profile.Spec.Config.TuneDConfig.ReapplySysctl != nil { change.reapplySysctl = *profile.Spec.Config.TuneDConfig.ReapplySysctl } - - change.profile = true + change.deferred = util.HasDeferredUpdateAnnotation(profile.Annotations) // Notify the event processor that the Profile k8s object containing information about which TuneD profile to apply changed. c.wqTuneD.Add(wqKeyTuned{kind: wqKindDaemon, change: change}) @@ -434,12 +451,18 @@ func profilesExtractPathWithDeps(profilesRootDir string, profiles []tunedv1.Tune } profilesFP := profilesFingerprint(profiles, recommendedProfile) - klog.Infof("profilesExtract(): extracted profiles fingerprint: %s", profilesFP) + klog.Infof("profilesExtract(): fingerprint of extracted profiles: %q", profilesFP) return change, profilesFP, extracted, recommendedProfileDeps, nil } -func profilesRepackPath(rfPath, profilesRootDir string) ([]tunedv1.TunedProfile, string, error) { - recommendedProfile, err := TunedRecommendFileReadPath(rfPath) +// profilesRepackPath reconstructs the TunedProfile object from the data unpacked on the node +// by earlier operations of the operand code. Takes the paths of the recommend file and of +// the profiles root directory. For testability, the production code always uses the same +// hardcoded path (see for example RunInCluster). Returns the reconstructed TunedProfiles, +// the name of the recommended profile, error if any. If the returned error is not nil, +// the other return values are not significant and should be ignored. +func profilesRepackPath(recommendFilePath, profilesRootDir string) ([]tunedv1.TunedProfile, string, error) { + recommendedProfile, err := TunedRecommendFileReadPath(recommendFilePath) if err != nil { return nil, "", err } @@ -651,22 +674,22 @@ func writeOpenShiftTunedImageEnv() error { return nil } -func TunedRecommendFileWritePath(rfPath, profileName string) error { - rfDir := filepath.Dir(rfPath) +func TunedRecommendFileWritePath(recommendFilePath, profileName string) error { + rfDir := filepath.Dir(recommendFilePath) klog.V(2).Infof("tunedRecommendFileWrite(): %s %s", profileName, rfDir) if err := os.MkdirAll(rfDir, os.ModePerm); err != nil { return fmt.Errorf("failed to create directory %q: %v", rfDir, err) } data := []byte(fmt.Sprintf("[%s]\n", profileName)) - if err := os.WriteFile(rfPath, data, 0644); err != nil { - return fmt.Errorf("failed to write file %q: %v", rfPath, err) + if err := os.WriteFile(recommendFilePath, data, 0644); err != nil { + return fmt.Errorf("failed to write file %q: %v", recommendFilePath, err) } - klog.Infof("tunedRecommendFileWrite(): written %q to set TuneD profile %s", rfPath, profileName) + klog.Infof("tunedRecommendFileWrite(): written %q to set TuneD profile %s", recommendFilePath, profileName) return nil } -func TunedRecommendFileReadPath(rfPath string) (string, error) { - data, err := os.ReadFile(rfPath) +func TunedRecommendFileReadPath(recommendFilePath string) (string, error) { + data, err := os.ReadFile(recommendFilePath) if err != nil { return "", err } @@ -724,7 +747,10 @@ func (c *Controller) tunedRun() { onDaemonReload := func() { // Notify the event processor that the TuneD daemon finished reloading and that we might need to update Profile status. - c.wqTuneD.Add(wqKeyTuned{kind: wqKindDaemon, change: Change{profileStatus: true}}) + c.wqTuneD.Add(wqKeyTuned{kind: wqKindDaemon, change: Change{ + profileStatus: true, + tunedReload: true, + }}) } err := TunedRun(c.tunedCmd, &c.daemon, onDaemonReload) @@ -885,14 +911,17 @@ func GetBootcmdline() (string, error) { return responseString, nil } -// changeSyncerPostNodeRestart syncronizes the daemon status after a node restart -func (c *Controller) changeSyncerPostNodeRestart(change Change) { - if !change.nodeRestart { - return - } +// changeSyncerPostReloadOrRestart synchronizes the daemon status after a node restart or a TuneD reload. +// The deferred updates are meant to be applied only after a full node restart. +// However, after a TuneD reload caused by a immediate update, we need to update the internal state +// pertaining to the effective profile applied on a node. +func (c *Controller) changeSyncerPostReloadOrRestart(change Change) { + klog.V(2).Infof("changeSyncerPostReloadOrRestart(%s)", change.String()) + defer klog.V(2).Infof("changeSyncerPostReloadOrRestart(%s) done", change.String()) - klog.V(2).Infof("changeSyncerPostNodeRestart(%s)", change.String()) - defer klog.V(2).Infof("changeSyncerPostNodeRestart(%s) done", change.String()) + if !change.nodeRestart && !change.tunedReload { + return // nothing to do + } profiles, recommended, err := profilesRepackPath(tunedRecommendFile, tunedProfilesDirCustom) if err != nil { @@ -903,12 +932,8 @@ func (c *Controller) changeSyncerPostNodeRestart(change Change) { } profileFP := profilesFingerprint(profiles, recommended) - if (c.daemon.status & scApplied) == 0 { - klog.Infof("changeSyncerPostNodeRestart(): profile not applied after tuned reload") - return - } + klog.V(2).Infof("changeSyncerPostReloadOrRestart(): current effective profile fingerprint %q -> %q", c.daemon.profileFingerprintEffective, profileFP) - klog.V(2).Infof("changeSyncerPostNodeRestart(): current effective profile fingerprint %q -> %q", c.daemon.profileFingerprintEffective, profileFP) c.daemon.profileFingerprintEffective = profileFP c.daemon.status &= ^scDeferred // force clear even if it was never set. } @@ -940,10 +965,13 @@ func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { var reload bool var cfgUpdated bool + var changeRecommend bool klog.V(2).Infof("changeSyncerTuneD(%s)", change.String()) defer klog.V(2).Infof("changeSyncerTuneD(%s) done updated=%v", change.String(), cfgUpdated) + reload = change.nodeRestart + if (c.daemon.status & scReloading) != 0 { // This should not be necessary, but keep this here as a reminder. // We should not manipulate TuneD configuration files in any way while the daemon is reloading/restarting. @@ -958,14 +986,17 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { } reload = reload || changeProvider - if c.daemon.recommendedProfile != change.recommendedProfile { + if (c.daemon.recommendedProfile != change.recommendedProfile) || change.nodeRestart { if err = TunedRecommendFileWrite(change.recommendedProfile); err != nil { return false, err } - klog.Infof("recommended TuneD profile changed from (%s) to (%s)", c.daemon.recommendedProfile, change.recommendedProfile) + klog.Infof("recommended TuneD profile changed from %q to %q [deferred=%v nodeRestart=%v]", c.daemon.recommendedProfile, change.recommendedProfile, change.deferred, change.nodeRestart) // Cache the value written to tunedRecommendFile. c.daemon.recommendedProfile = change.recommendedProfile reload = true + } else if !change.deferred && (c.daemon.status&scDeferred != 0) { + klog.Infof("detected deferred update changed to immediate after object update") + reload = true } else { klog.Infof("recommended profile (%s) matches current configuration", c.daemon.recommendedProfile) // We do not need to reload the TuneD daemon, however, someone may have tampered with the k8s Profile status for this node. @@ -980,11 +1011,18 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if err != nil { return false, fmt.Errorf("failed to get Profile %s: %v", c.nodeName, err) } - changeProfiles, _, err := profilesSync(profile.Spec.Profile, c.daemon.recommendedProfile) + + changeProfiles, profilesFP, err := profilesSync(profile.Spec.Profile, c.daemon.recommendedProfile) if err != nil { return false, err } - reload = reload || changeProfiles + if changeProfiles || changeRecommend { + if c.daemon.profileFingerprintUnpacked != profilesFP { + klog.Infof("current unpacked profile fingerprint %q -> %q", c.daemon.profileFingerprintUnpacked, profilesFP) + c.daemon.profileFingerprintUnpacked = profilesFP + } + reload = true + } // Does the current TuneD process have debugging turned on? debug := (c.daemon.restart & ctrlDebug) != 0 @@ -1013,7 +1051,8 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { } if reload { - c.daemon.restart |= ctrlReload + // failures pertaining to deferred updates are not critical + _ = c.handleDaemonReloadRequest(change) } cfgUpdated, err = c.changeSyncerRestartOrReloadTuneD() @@ -1025,6 +1064,35 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { return err == nil, err } +func (c *Controller) handleDaemonReloadRequest(change Change) error { + if !change.deferred || change.nodeRestart { + klog.V(2).Infof("immediate update, setting reload flag") + c.daemon.restart |= ctrlReload + return nil + } + + klog.V(2).Infof("deferred update profile unpacked %q profile effective %q", c.daemon.profileFingerprintUnpacked, c.daemon.profileFingerprintEffective) + + if c.daemon.profileFingerprintUnpacked == c.daemon.profileFingerprintEffective { + klog.V(2).Infof("deferred update, but it seems already applied (profile unpacked == profile effective), nothing to do") + return nil + } + + err := c.storeDeferredUpdate(c.daemon.profileFingerprintUnpacked) + // on restart, we will have the deferred flag but the profileFingerprint will match. So the change must take effect immediately + klog.Infof("deferred update: TuneD daemon won't be reloaded until next restart or immediate update (err=%v)", err) + if err != nil { + return err + } + + // trigger status update + c.wqTuneD.Add(wqKeyTuned{kind: wqKindDaemon, change: Change{ + profileStatus: true, + message: fmt.Sprintf("status change for deferred update %q", change.recommendedProfile), + }}) + return nil +} + func (c *Controller) changeSyncerRestartOrReloadTuneD() (bool, error) { klog.V(2).Infof("changeSyncerRestartOrReloadTuneD()") if (c.daemon.restart & ctrlRestart) != 0 { @@ -1044,8 +1112,10 @@ func (c *Controller) changeSyncerRestartOrReloadTuneD() (bool, error) { func (c *Controller) changeSyncer(change Change) (bool, error) { klog.V(2).Infof("changeSyncer(%s)", change.String()) defer klog.V(2).Infof("changeSyncer(%s) done", change.String()) - // Sync internal status after a TuneD reload - c.changeSyncerPostNodeRestart(change) + + // Sync internal status after a node restart + c.changeSyncerPostReloadOrRestart(change) + // Sync k8s Profile status if/when needed. if !c.changeSyncerProfileStatus(change) { return false, nil @@ -1218,9 +1288,11 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change var message string wantsDeferred := util.HasDeferredUpdateAnnotation(profile.Annotations) isApplied := (c.daemon.profileFingerprintUnpacked == c.daemon.profileFingerprintEffective) - klog.V(2).Infof("daemonStatus(): change: deferred=%v reload=%v applied=%v", wantsDeferred, change.daemonReload, isApplied) - if (wantsDeferred && !isApplied) && !change.daemonReload { - c.daemon.status |= scDeferred + daemonStatus := c.daemon.status + + klog.Infof("daemonStatus(): change: deferred=%v applied=%v nodeRestart=%v", wantsDeferred, isApplied, change.nodeRestart) + if (wantsDeferred && !isApplied) && !change.nodeRestart { // avoid setting the flag on updates deferred -> immediate + daemonStatus |= scDeferred recommendProfile, err := TunedRecommendFileRead() if err == nil { klog.V(2).Infof("updateTunedProfileStatus(): recommended profile %q (deferred)", recommendProfile) @@ -1231,7 +1303,9 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change } } - statusConditions := computeStatusConditions(c.daemon.status, message, profile.Status.Conditions) + statusConditions := computeStatusConditions(daemonStatus, message, profile.Status.Conditions) + klog.Infof("computed status conditions: %#v", statusConditions) // TODO v=4 + c.daemon.status = daemonStatus if profile.Status.TunedProfile == activeProfile && conditionsEqual(profile.Status.Conditions, statusConditions) { @@ -1250,6 +1324,77 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change return nil } +// storeDeferredUpdate sets the node state (on storage, like disk) to signal +// there is a deferred update pending. +func (c *Controller) storeDeferredUpdate(deferredFP string) (derr error) { + // "overwriting" is fine, because we only want an empty data file. + // all the races are benign, so we go for the simplest approach + fp, err := os.Create(tunedDeferredUpdateEphemeralFilePath) + if err != nil { + return err + } + _ = fp.Close() // unlikely to fail, we don't write anything + + // overwriting files is racy, and output can be mixed in. + // the safest approach is to create a temporary file, write + // the full content to it and then rename it, because rename(2) + // is atomic, this is guaranteed safe and race-free. + dst, err := os.CreateTemp(ocpTunedHome, "ocpdeferred") + if err != nil { + return err + } + tmpName := dst.Name() + defer func() { + if dst == nil { + return + } + derr = dst.Close() + os.Remove(dst.Name()) // avoid littering with tmp files + }() + if _, err := dst.WriteString(deferredFP); err != nil { + return err + } + if err := dst.Close(); err != nil { + return err + } + dst = nil // avoid double close()s, the second will fail + return os.Rename(tmpName, tunedDeferredUpdatePersistentFilePath) +} + +// recoverAndClearDeferredUpdate detects the presence and removes the persistent +// deferred updates file. +// Returns: +// - The defered profile fingerprint. +// - A boolean indicating the absence of the ephemeral deferred update file. +// If the file is absent, a node restart is assumed and true is returned. +// - Error if any. +func (c *Controller) recoverAndClearDeferredUpdate() (string, bool, error) { + isReboot := false + + deferredFP, err := os.ReadFile(tunedDeferredUpdatePersistentFilePath) + if err != nil { + if os.IsNotExist(err) { + klog.Infof("recover: no pending deferred change") + return "", isReboot, nil + } + klog.Infof("recover: failed to restore pending deferred change: %v", err) + return "", isReboot, err + } + pendingFP := strings.TrimSpace(string(deferredFP)) + err = os.Remove(tunedDeferredUpdatePersistentFilePath) + klog.Infof("recover: found pending deferred update: %q", pendingFP) + + if _, errEph := os.Stat(tunedDeferredUpdateEphemeralFilePath); errEph != nil { + if os.IsNotExist(errEph) { + isReboot = true + } else { + klog.Infof("recover: failed to detect node restart, assuming not: %v", err) + return "", false, errEph + } + } + return pendingFP, isReboot, err +} + func (c *Controller) informerEventHandler(workqueueKey wqKeyKube) cache.ResourceEventHandlerFuncs { return cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { @@ -1354,6 +1499,12 @@ func (c *Controller) changeWatcher() (err error) { defer c.wqTuneD.ShutDown() klog.Info("started events processors") + if c.pendingChange != nil { + klog.Infof("processing pending change: %s", c.pendingChange.String()) + c.wqTuneD.Add(wqKeyTuned{kind: wqKindDaemon, change: *c.pendingChange}) + c.pendingChange = nil + } + // Watch for filesystem changes on the tunedBootcmdlineFile file. wFs, err := fsnotify.NewWatcher() if err != nil { @@ -1365,8 +1516,9 @@ func (c *Controller) changeWatcher() (err error) { for _, element := range []string{tunedBootcmdlineFile} { err = wFs.Add(element) if err != nil { - return fmt.Errorf("failed to start watching %q: %v", element, err) + return fmt.Errorf("failed to start monitoring filesystem events on %q: %v", element, err) } + klog.Infof("monitoring filesystem events on %q", element) } klog.Info("started controller") @@ -1486,6 +1638,10 @@ func RunInCluster(stopCh <-chan struct{}, version string) error { panic(err.Error()) } + if err := os.MkdirAll(ocpTunedRunDir, os.ModePerm); err != nil { + panic(err.Error()) + } + profiles, recommended, err := profilesRepackPath(tunedRecommendFile, tunedProfilesDirCustom) if err != nil { // keep going, immediate updates are expected to work as usual @@ -1496,8 +1652,24 @@ func RunInCluster(stopCh <-chan struct{}, version string) error { profileFP := profilesFingerprint(profiles, recommended) c.daemon.profileFingerprintUnpacked = profileFP - c.daemon.profileFingerprintEffective = profileFP - klog.Infof("starting: installed and effective profile fingerprint %q", profileFP) + klog.Infof("starting: profile fingerprint unpacked %q", profileFP) + + deferredFP, isNodeReboot, err := c.recoverAndClearDeferredUpdate() + if err != nil { + klog.ErrorS(err, "unable to recover the pending update") + } else if !isNodeReboot { + klog.Infof("starting: does not seem a node reboot, but a daemon restart. Ignoring pending deferred updates (if any)") + } else if deferredFP == "" { + klog.Infof("starting: node reboot, but no pending deferred update") + } else { + klog.Infof("starting: recovered and cleared pending deferred update %q (fingerprint=%q)", recommended, deferredFP) + c.pendingChange = &Change{ + profile: true, + nodeRestart: true, + recommendedProfile: recommended, + message: deferredFP, + } + } return retryLoop(c) } diff --git a/pkg/tuned/controller_test.go b/pkg/tuned/controller_test.go index d33cd1f0cb..d36e314caa 100644 --- a/pkg/tuned/controller_test.go +++ b/pkg/tuned/controller_test.go @@ -289,7 +289,8 @@ func fullChange() Change { return Change{ profile: true, profileStatus: true, - daemonReload: true, + tunedReload: true, + nodeRestart: true, debug: true, provider: "test-provider", reapplySysctl: true, From f064dcae1ed61ffaed80af12f3d4e92f5c322de5 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 8 Jul 2024 17:54:26 +0200 Subject: [PATCH 07/19] tuned: fix propagation of the deferred flag When we compute `TunedProfile`s, we should propagate the deferred flag only if one of these object is extracted from a `Tuned` object which had the deferred annotation. The best option would be to set the deferred flag if any of the `Tuned` objects relevant for a node has the deferred flag set. However, this approximation makes the flow actually more predictable, so we adopt it. Signed-off-by: Francesco Romani --- pkg/operator/controller.go | 4 +- pkg/operator/profilecalculator.go | 122 +++++++++++++++++++----------- 2 files changed, 78 insertions(+), 48 deletions(-) diff --git a/pkg/operator/controller.go b/pkg/operator/controller.go index 8a42646d59..507b324bc8 100644 --- a/pkg/operator/controller.go +++ b/pkg/operator/controller.go @@ -645,7 +645,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { } klog.V(2).Infof("syncProfile(): Profile %s not found, creating one [%s]", profileMf.Name, computed.TunedProfileName) - profileMf.Annotations = util.ToggleDeferredUpdateAnnotation(profileMf.Annotations, computed.AnyDeferred) + profileMf.Annotations = util.ToggleDeferredUpdateAnnotation(profileMf.Annotations, computed.Deferred) profileMf.Spec.Config.TunedProfile = computed.TunedProfileName profileMf.Spec.Config.Debug = computed.Operand.Debug profileMf.Spec.Config.TuneDConfig = computed.Operand.TuneDConfig @@ -706,7 +706,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { } } - anns := util.ToggleDeferredUpdateAnnotation(profile.Annotations, computed.AnyDeferred) + anns := util.ToggleDeferredUpdateAnnotation(profile.Annotations, computed.Deferred) // Minimize updates if profile.Spec.Config.TunedProfile == computed.TunedProfileName && diff --git a/pkg/operator/profilecalculator.go b/pkg/operator/profilecalculator.go index b4f9242bad..19667c7eec 100644 --- a/pkg/operator/profilecalculator.go +++ b/pkg/operator/profilecalculator.go @@ -153,12 +153,19 @@ func (pc *ProfileCalculator) nodeChangeHandler(nodeName string) (bool, error) { type ComputedProfile struct { TunedProfileName string AllProfiles []tunedv1.TunedProfile - AnyDeferred bool + Deferred bool MCLabels map[string]string NodePoolName string Operand tunedv1.OperandConfig } +type RecommendedProfile struct { + TunedProfileName string + Deferred bool + Labels map[string]string + Config tunedv1.OperandConfig +} + // calculateProfile calculates a tuned profile for Node nodeName. // // Returns @@ -175,10 +182,9 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, return ComputedProfile{}, fmt.Errorf("failed to list Tuned: %v", err) } - deferred := tunedProfilesAnyDeferred(tunedList) profilesAll := tunedProfiles(tunedList) recommendAll := TunedRecommend(tunedList) - recommendProfile := func(nodeName string, iStart int) (int, string, map[string]string, tunedv1.OperandConfig, error) { + recommendProfile := func(nodeName string, iStart int) (int, RecommendedProfile, error) { var i int for i = iStart; i < len(recommendAll); i++ { var ( @@ -193,7 +199,11 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, // we do not want to call profileMatches() in that case unless machineConfigLabels // is undefined. if (recommend.Match != nil || recommend.MachineConfigLabels == nil) && pc.profileMatches(recommend.Match, nodeName) { - return i, *recommend.Profile, nil, recommend.Operand, nil + return i, RecommendedProfile{ + TunedProfileName: *recommend.Profile, + Config: recommend.Operand, + Deferred: recommend.Deferred, + }, nil } if recommend.MachineConfigLabels == nil { @@ -207,24 +217,29 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, // is often unneeded and would likely have a performance impact. node, err = pc.listers.Nodes.Get(nodeName) if err != nil { - return i, "", nil, tunedv1.OperandConfig{}, err + return i, RecommendedProfile{}, err } pools, err = pc.getPoolsForNode(node) if err != nil { - return i, "", nil, tunedv1.OperandConfig{}, err + return i, RecommendedProfile{}, err } } // MachineConfigLabels based matching if pc.machineConfigLabelsMatch(recommend.MachineConfigLabels, pools) { - return i, *recommend.Profile, recommend.MachineConfigLabels, recommend.Operand, nil + return i, RecommendedProfile{ + TunedProfileName: *recommend.Profile, + Labels: recommend.MachineConfigLabels, + Config: recommend.Operand, + Deferred: recommend.Deferred, + }, nil } } // No profile matches. This is not necessarily a problem, e.g. when we check for matching profiles with the same priority. - return i, defaultProfile, nil, tunedv1.OperandConfig{}, nil + return i, RecommendedProfile{TunedProfileName: defaultProfile}, nil } - iStop, tunedProfileName, mcLabels, operand, err := recommendProfile(nodeName, 0) + iStop, recommendedProfile, err := recommendProfile(nodeName, 0) if iStop == len(recommendAll) { // This should never happen; the default Tuned CR should always be accessible and with a catch-all rule @@ -233,19 +248,19 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, if err != nil { return ComputedProfile{ TunedProfileName: defaultProfile, - Operand: operand, + Operand: recommendedProfile.Config, }, fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedDefaultResourceName, err) } return ComputedProfile{ TunedProfileName: defaultProfile, - Operand: operand, + Operand: recommendedProfile.Config, }, fmt.Errorf("the default Tuned CR misses a catch-all profile selection") } // Make sure we do not have multiple matching profiles with the same priority. If so, report a warning. for i := iStop + 1; i < len(recommendAll); i++ { - j, tunedProfileNameDup, _, _, err := recommendProfile(nodeName, i) + j, recommendedProfileDup, err := recommendProfile(nodeName, i) if err != nil { // Duplicate matching profile priority detection failed, likely due to a failure to retrieve a k8s object. // This is not fatal, do not spam the logs, as we will retry later during a periodic resync. @@ -266,9 +281,9 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, // If they have the same name and different contents a separate warning // will be issued by manifests.tunedRenderedProfiles() if *recommendAll[iStop].Priority == *recommendAll[i].Priority { - if tunedProfileName != tunedProfileNameDup { + if recommendedProfile.TunedProfileName != recommendedProfileDup.TunedProfileName { klog.Warningf("profiles %s/%s have the same priority %d and match %s; please use a different priority for your custom profiles!", - tunedProfileName, tunedProfileNameDup, *recommendAll[i].Priority, nodeName) + recommendedProfile.TunedProfileName, recommendedProfileDup.TunedProfileName, *recommendAll[i].Priority, nodeName) } } else { // We no longer have recommend rules with the same priority -- do not go through the entire (priority-ordered) list. @@ -277,14 +292,21 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (ComputedProfile, } return ComputedProfile{ - TunedProfileName: tunedProfileName, + TunedProfileName: recommendedProfile.TunedProfileName, AllProfiles: profilesAll, - AnyDeferred: deferred, - MCLabels: mcLabels, - Operand: operand, + Deferred: recommendedProfile.Deferred, + MCLabels: recommendedProfile.Labels, + Operand: recommendedProfile.Config, }, err } +type HypershiftRecommendedProfile struct { + TunedProfileName string + Deferred bool + NodePoolName string + Config tunedv1.OperandConfig +} + // calculateProfileHyperShift calculates a tuned profile for Node nodeName. // // Returns @@ -325,7 +347,7 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput profilesAll := tunedProfiles(tunedList) recommendAll := TunedRecommend(tunedList) - recommendProfile := func(nodeName string, iStart int) (int, string, string, tunedv1.OperandConfig, error) { + recommendProfile := func(nodeName string, iStart int) (int, HypershiftRecommendedProfile, error) { var i int for i = iStart; i < len(recommendAll); i++ { recommend := recommendAll[i] @@ -333,35 +355,45 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput // Start with node/pod label based matching if recommend.Match != nil && pc.profileMatches(recommend.Match, nodeName) { klog.V(3).Infof("calculateProfileHyperShift: node / pod label matching used for node: %s, tunedProfileName: %s, nodePoolName: %s, operand: %v", nodeName, *recommend.Profile, "", recommend.Operand) - return i, *recommend.Profile, "", recommend.Operand, nil + return i, HypershiftRecommendedProfile{ + TunedProfileName: *recommend.Profile, + Config: recommend.Operand, + }, nil } // If recommend.Match is empty, NodePool based matching is assumed if recommend.Match == nil { if *recommend.Profile == defaultProfile { // Don't set nodepool for default profile, no MachineConfigs should be generated. - return i, *recommend.Profile, "", recommend.Operand, nil + return i, HypershiftRecommendedProfile{ + TunedProfileName: *recommend.Profile, + Config: recommend.Operand, + }, nil } klog.V(3).Infof("calculateProfileHyperShift: NodePool based matching used for node: %s, tunedProfileName: %s, nodePoolName: %s", nodeName, *recommend.Profile, nodePoolName) - return i, *recommend.Profile, nodePoolName, recommend.Operand, nil + return i, HypershiftRecommendedProfile{ + TunedProfileName: *recommend.Profile, + NodePoolName: nodePoolName, + Config: recommend.Operand, + }, nil } } // No profile matches. This is not necessarily a problem, e.g. when we check for matching profiles with the same priority. - return i, defaultProfile, "", tunedv1.OperandConfig{}, nil + return i, HypershiftRecommendedProfile{TunedProfileName: defaultProfile}, nil } - iStop, tunedProfileName, nodePoolName, operand, err := recommendProfile(nodeName, 0) + iStop, recommendedProfile, err := recommendProfile(nodeName, 0) if iStop == len(recommendAll) { return ComputedProfile{ TunedProfileName: defaultProfile, AllProfiles: profilesAll, - Operand: operand, + Operand: recommendedProfile.Config, }, fmt.Errorf("the default Tuned CR misses a catch-all profile selection") } // Make sure we do not have multiple matching profiles with the same priority. If so, report a warning. for i := iStop + 1; i < len(recommendAll); i++ { - j, tunedProfileNameDup, _, _, err := recommendProfile(nodeName, i) + j, recommendedProfileDup, err := recommendProfile(nodeName, i) if err != nil { // Duplicate matching profile priority detection failed, likely due to a failure to retrieve a k8s object. // This is not fatal, do not spam the logs, as we will retry later during a periodic resync. @@ -382,9 +414,9 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput // If they have the same name and different contents a separate warning // will be issued by manifests.tunedRenderedProfiles() if *recommendAll[iStop].Priority == *recommendAll[i].Priority { - if tunedProfileName != tunedProfileNameDup { + if recommendedProfile.TunedProfileName != recommendedProfileDup.TunedProfileName { klog.Warningf("profiles %s/%s have the same priority %d and match %s; please use a different priority for your custom profiles!", - tunedProfileName, tunedProfileNameDup, *recommendAll[i].Priority, nodeName) + recommendedProfile.TunedProfileName, recommendedProfileDup.TunedProfileName, *recommendAll[i].Priority, nodeName) } } else { // We no longer have recommend rules with the same priority -- do not go through the entire (priority-ordered) list. @@ -393,10 +425,11 @@ func (pc *ProfileCalculator) calculateProfileHyperShift(nodeName string) (Comput } return ComputedProfile{ - TunedProfileName: tunedProfileName, + TunedProfileName: recommendedProfile.TunedProfileName, AllProfiles: profilesAll, - NodePoolName: nodePoolName, - Operand: operand, + Deferred: recommendedProfile.Deferred, + NodePoolName: recommendedProfile.NodePoolName, + Operand: recommendedProfile.Config, }, err } @@ -697,10 +730,15 @@ func tunedProfiles(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedProfile { return tunedProfiles } +type TunedRecommendInfo struct { + tunedv1.TunedRecommend + Deferred bool +} + // TunedRecommend returns a priority-sorted TunedRecommend slice out of // a slice of Tuned objects for profile-calculation purposes. -func TunedRecommend(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedRecommend { - var recommendAll []tunedv1.TunedRecommend +func TunedRecommend(tunedSlice []*tunedv1.Tuned) []TunedRecommendInfo { + var recommendAll []TunedRecommendInfo // Tuned profiles should have unique priority across all Tuned CRs and users // will be warned about this. However, go into some effort to make the profile @@ -711,8 +749,11 @@ func TunedRecommend(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedRecommend { }) for _, tuned := range tunedSlice { - if tuned.Spec.Recommend != nil { - recommendAll = append(recommendAll, tuned.Spec.Recommend...) + for _, recommend := range tuned.Spec.Recommend { + recommendAll = append(recommendAll, TunedRecommendInfo{ + TunedRecommend: recommend, + Deferred: util.HasDeferredUpdateAnnotation(tuned.Annotations), + }) } } @@ -726,17 +767,6 @@ func TunedRecommend(tunedSlice []*tunedv1.Tuned) []tunedv1.TunedRecommend { return recommendAll } -// tunedProfilesAnyDeferred returns true if any of the Tuned CRs in tunedSlice carry the deferred update annotation. -func tunedProfilesAnyDeferred(tunedSlice []*tunedv1.Tuned) bool { - for _, tuned := range tunedSlice { - if util.HasDeferredUpdateAnnotation(tuned.Annotations) { - klog.Infof("tuned %s/%s is deferred", tuned.Namespace, tuned.Name) - return true - } - } - return false -} - // podLabelsUnique goes through Pod labels of all the Pods on a Node-wide // 'podLabelsNodeWide' map and returns a subset of 'podLabels' unique to 'podNsName' // Pod; i.e. the retuned labels (key & value) will not exist on any other Pod From d15261da3e2a6cfc0c094ead55d00a88dd5cd681 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 3 Jul 2024 14:50:25 +0200 Subject: [PATCH 08/19] tuned: operand: review log verbosiness tune down the log chattiness to reduce the log pressure. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index 578220aa93..196e8e88f7 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -466,7 +466,7 @@ func profilesRepackPath(recommendFilePath, profilesRootDir string) ([]tunedv1.Tu if err != nil { return nil, "", err } - klog.Infof("profilesRepack(): recovered recommended profile: %q", recommendedProfile) + klog.V(1).Infof("profilesRepack(): recovered recommended profile: %q", recommendedProfile) dents, err := os.ReadDir(profilesRootDir) if err != nil { @@ -990,15 +990,15 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if err = TunedRecommendFileWrite(change.recommendedProfile); err != nil { return false, err } - klog.Infof("recommended TuneD profile changed from %q to %q [deferred=%v nodeRestart=%v]", c.daemon.recommendedProfile, change.recommendedProfile, change.deferred, change.nodeRestart) + klog.V(1).Infof("recommended TuneD profile changed from %q to %q [deferred=%v nodeRestart=%v]", c.daemon.recommendedProfile, change.recommendedProfile, change.deferred, change.nodeRestart) // Cache the value written to tunedRecommendFile. c.daemon.recommendedProfile = change.recommendedProfile reload = true } else if !change.deferred && (c.daemon.status&scDeferred != 0) { - klog.Infof("detected deferred update changed to immediate after object update") + klog.V(1).Infof("detected deferred update changed to immediate after object update") reload = true } else { - klog.Infof("recommended profile (%s) matches current configuration", c.daemon.recommendedProfile) + klog.V(1).Infof("recommended profile (%s) matches current configuration", c.daemon.recommendedProfile) // We do not need to reload the TuneD daemon, however, someone may have tampered with the k8s Profile status for this node. // Make sure its status is up-to-date. if err = c.updateTunedProfile(change); err != nil { @@ -1018,7 +1018,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { } if changeProfiles || changeRecommend { if c.daemon.profileFingerprintUnpacked != profilesFP { - klog.Infof("current unpacked profile fingerprint %q -> %q", c.daemon.profileFingerprintUnpacked, profilesFP) + klog.V(2).Infof("current unpacked profile fingerprint %q -> %q", c.daemon.profileFingerprintUnpacked, profilesFP) c.daemon.profileFingerprintUnpacked = profilesFP } reload = true @@ -1290,7 +1290,7 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change isApplied := (c.daemon.profileFingerprintUnpacked == c.daemon.profileFingerprintEffective) daemonStatus := c.daemon.status - klog.Infof("daemonStatus(): change: deferred=%v applied=%v nodeRestart=%v", wantsDeferred, isApplied, change.nodeRestart) + klog.V(4).Infof("daemonStatus(): change: deferred=%v applied=%v nodeRestart=%v", wantsDeferred, isApplied, change.nodeRestart) if (wantsDeferred && !isApplied) && !change.nodeRestart { // avoid setting the flag on updates deferred -> immediate daemonStatus |= scDeferred recommendProfile, err := TunedRecommendFileRead() @@ -1304,7 +1304,7 @@ func (c *Controller) updateTunedProfileStatus(ctx context.Context, change Change } statusConditions := computeStatusConditions(daemonStatus, message, profile.Status.Conditions) - klog.Infof("computed status conditions: %#v", statusConditions) // TODO v=4 + klog.V(4).Infof("computed status conditions: %#v", statusConditions) c.daemon.status = daemonStatus if profile.Status.TunedProfile == activeProfile && From 7c9c040567fca84ebc822ad31cb8b2810d4ee09f Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 16 Jul 2024 10:55:20 +0200 Subject: [PATCH 09/19] tuned: operand: log restarts Log reasons why a tuned reload or restart is requested. We do this at V>=4, so there's little risk of adding extra noise in the logs. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index 196e8e88f7..7a19b46d6f 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -1028,6 +1028,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { debug := (c.daemon.restart & ctrlDebug) != 0 if debug != change.debug { // A complete restart of the TuneD daemon is needed due to a debugging request switched on or off. + klog.V(4).Infof("debug control triggering tuned restart") c.daemon.restart |= ctrlRestart if change.debug { c.daemon.restart |= ctrlDebug @@ -1039,6 +1040,8 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { // Does the current TuneD process have the reapply_sysctl option turned on? reapplySysctl := c.tunedMainCfg.Section("").Key("reapply_sysctl").MustBool() if reapplySysctl != change.reapplySysctl { + klog.V(4).Infof("reapplySysctl rewriting configuration file") + if err = iniCfgSetKey(c.tunedMainCfg, "reapply_sysctl", !reapplySysctl); err != nil { return false, err } @@ -1046,6 +1049,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if err != nil { return false, fmt.Errorf("failed to write global TuneD configuration file: %v", err) } + klog.V(4).Infof("reapplySysctl triggering tuned restart") c.daemon.restart |= ctrlRestart // A complete restart of the TuneD daemon is needed due to configuration change in tuned-main.conf file. } } From 09a5766e0507b94f8e9e309c504d0674216859d5 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 16 Jul 2024 11:32:09 +0200 Subject: [PATCH 10/19] tuned: operand: unified restart and reload handling Previously, we added special logic to handle TuneD reload requests when deferred updates are received. Extend the logic to TuneD restarts for the sake of clarity and because difform treatment was causing bugs: profile not correctly reverted after removal. Signed-off-by: Francesco Romani --- pkg/tuned/controller.go | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index 7a19b46d6f..40cb2b420b 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -963,15 +963,17 @@ func (c *Controller) changeSyncerProfileStatus(change Change) (synced bool) { // changeSyncerTuneD synchronizes k8s objects to disk, compares them with // current TuneD configuration and signals TuneD process reload or restart. func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { + var restart bool var reload bool var cfgUpdated bool var changeRecommend bool - klog.V(2).Infof("changeSyncerTuneD(%s)", change.String()) - defer klog.V(2).Infof("changeSyncerTuneD(%s) done updated=%v", change.String(), cfgUpdated) - + restart = change.nodeRestart reload = change.nodeRestart + klog.V(2).Infof("changeSyncerTuneD(%s) restart=%v reload=%v", change.String(), restart, reload) + defer klog.V(2).Infof("changeSyncerTuneD(%s) done updated=%v restart=%v reload=%v", change.String(), cfgUpdated, restart, reload) + if (c.daemon.status & scReloading) != 0 { // This should not be necessary, but keep this here as a reminder. // We should not manipulate TuneD configuration files in any way while the daemon is reloading/restarting. @@ -1029,7 +1031,7 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { if debug != change.debug { // A complete restart of the TuneD daemon is needed due to a debugging request switched on or off. klog.V(4).Infof("debug control triggering tuned restart") - c.daemon.restart |= ctrlRestart + restart = true if change.debug { c.daemon.restart |= ctrlDebug } else { @@ -1050,14 +1052,12 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { return false, fmt.Errorf("failed to write global TuneD configuration file: %v", err) } klog.V(4).Infof("reapplySysctl triggering tuned restart") - c.daemon.restart |= ctrlRestart // A complete restart of the TuneD daemon is needed due to configuration change in tuned-main.conf file. + restart = true // A complete restart of the TuneD daemon is needed due to configuration change in tuned-main.conf file. } } - if reload { - // failures pertaining to deferred updates are not critical - _ = c.handleDaemonReloadRequest(change) - } + // failures pertaining to deferred updates are not critical + _ = c.handleDaemonReloadRestartRequest(change, reload, restart) cfgUpdated, err = c.changeSyncerRestartOrReloadTuneD() klog.V(2).Infof("changeSyncerTuneD() configuration updated: %v", cfgUpdated) @@ -1068,10 +1068,21 @@ func (c *Controller) changeSyncerTuneD(change Change) (synced bool, err error) { return err == nil, err } -func (c *Controller) handleDaemonReloadRequest(change Change) error { +func (c *Controller) handleDaemonReloadRestartRequest(change Change, reload, restart bool) error { + if !reload && !restart { + // nothing to do + return nil + } + if !change.deferred || change.nodeRestart { - klog.V(2).Infof("immediate update, setting reload flag") - c.daemon.restart |= ctrlReload + if reload { + klog.V(2).Infof("immediate update, setting reload flag") + c.daemon.restart |= ctrlReload + } + if restart { + klog.V(2).Infof("immediate update, setting restart flag") + c.daemon.restart |= ctrlRestart + } return nil } From 48628daa1a6295c352efa69770e98fd3248509af Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 13 Jun 2024 10:35:10 +0200 Subject: [PATCH 11/19] e2e: makefile: add targets to build suites building suites ahead of time enables trivial catching of compilation error and turns out handy to run subset of tests, in addition to the usual ginkgo filtering capabilities. Signed-off-by: Francesco Romani --- Makefile | 7 +++++-- hack/build-pao-test-bin.sh | 30 ++++++++++++++++++++++++++++++ hack/build-test-bin.sh | 8 ++++---- 3 files changed, 39 insertions(+), 6 deletions(-) create mode 100755 hack/build-pao-test-bin.sh diff --git a/Makefile b/Makefile index dcdf9396cd..6a867e518a 100644 --- a/Makefile +++ b/Makefile @@ -293,13 +293,16 @@ gather-sysinfo-tests: build-gather-sysinfo render-sync: build hack/render-sync.sh +build-e2e-%: + @hack/build-test-bin.sh $(shell echo $@ | sed -e 's/^build-e2e-//' ) + pao-build-e2e-%: - @hack/build-test-bin.sh $(shell echo $@ | sed -e 's/^pao-build-e2e-//' ) + @hack/build-pao-test-bin.sh $(shell echo $@ | sed -e 's/^pao-build-e2e-//' ) .PHONY: pao-build-e2e pao-build-e2e: @for suite in $(PAO_E2E_SUITES); do \ - hack/build-test-bin.sh $$suite; \ + hack/build-pao-test-bin.sh $$suite; \ done .PHONY: pao-clean-e2e diff --git a/hack/build-pao-test-bin.sh b/hack/build-pao-test-bin.sh new file mode 100755 index 0000000000..ad562f8696 --- /dev/null +++ b/hack/build-pao-test-bin.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e + +PREFIX="pao-build-e2e-" +SUITEPATH="./test/e2e/performanceprofile/functests" +TARGET=$1 + +if [ -z "$TARGET" ]; then + echo "usage: $0 suite" + echo "example: $0 1_performance" + exit 1 +fi + +OUTDIR="${2:-_output}" + +if [ ! -d "$SUITEPATH/$TARGET" ]; then + echo "unknown suite: $TARGET" + echo -e "must be one of:\n$( ls $SUITEPATH | grep -E '[0-9]+_.*' )" + exit 2 +fi + +SUITE="${SUITEPATH}/${TARGET}" +SUFFIX=$( echo $TARGET | cut -d_ -f2- ) +BASENAME="e2e-pao" +EXTENSION="test" +OUTPUT="${BASENAME}-${SUFFIX}.${EXTENSION}" + +echo "${SUITE} -> ${OUTDIR}/${OUTPUT}" +go test -c -v -o ${OUTDIR}/${OUTPUT} ${SUITE} diff --git a/hack/build-test-bin.sh b/hack/build-test-bin.sh index ad562f8696..a10f971682 100755 --- a/hack/build-test-bin.sh +++ b/hack/build-test-bin.sh @@ -2,13 +2,13 @@ set -e -PREFIX="pao-build-e2e-" -SUITEPATH="./test/e2e/performanceprofile/functests" +PREFIX="build-e2e-" +SUITEPATH="./test/e2e" TARGET=$1 if [ -z "$TARGET" ]; then echo "usage: $0 suite" - echo "example: $0 1_performance" + echo "example: $0 deferred" exit 1 fi @@ -22,7 +22,7 @@ fi SUITE="${SUITEPATH}/${TARGET}" SUFFIX=$( echo $TARGET | cut -d_ -f2- ) -BASENAME="e2e-pao" +BASENAME="e2e" EXTENSION="test" OUTPUT="${BASENAME}-${SUFFIX}.${EXTENSION}" From ce625d25f6d279973081a69a1df9475cefcd1139 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 21 May 2024 13:03:25 +0200 Subject: [PATCH 12/19] e2e: deferred: add test cases add test coverage for the `deferred updates` feature Signed-off-by: Francesco Romani --- Makefile | 4 +- test/e2e/deferred/basic.go | 446 ++++++++++++++++++ test/e2e/deferred/non_regression.go | 109 +++++ test/e2e/deferred/operator_test.go | 181 +++++++ test/e2e/deferred/restart.go | 297 ++++++++++++ .../deferred/tuned-basic-00.yaml | 22 + .../deferred/tuned-basic-10.yaml | 24 + .../deferred/tuned-basic-20.yaml | 25 + test/e2e/util/ready/node.go | 56 +++ test/e2e/util/ready/pod.go | 18 + test/e2e/util/util.go | 30 +- 11 files changed, 1208 insertions(+), 4 deletions(-) create mode 100644 test/e2e/deferred/basic.go create mode 100644 test/e2e/deferred/non_regression.go create mode 100644 test/e2e/deferred/operator_test.go create mode 100644 test/e2e/deferred/restart.go create mode 100644 test/e2e/testing_manifests/deferred/tuned-basic-00.yaml create mode 100644 test/e2e/testing_manifests/deferred/tuned-basic-10.yaml create mode 100644 test/e2e/testing_manifests/deferred/tuned-basic-20.yaml create mode 100644 test/e2e/util/ready/node.go create mode 100644 test/e2e/util/ready/pod.go diff --git a/Makefile b/Makefile index 6a867e518a..bb9e36fec3 100644 --- a/Makefile +++ b/Makefile @@ -99,8 +99,8 @@ pkg/generated: $(API_TYPES) $(GOBINDATA_BIN): $(GO) build -o $(GOBINDATA_BIN) ./vendor/github.com/kevinburke/go-bindata/go-bindata -test-e2e: - for d in core basic reboots reboots/sno; do \ +test-e2e: $(BINDATA) + for d in core basic reboots reboots/sno deferred; do \ KUBERNETES_CONFIG="$(KUBECONFIG)" $(GO) test -v -timeout 40m ./test/e2e/$$d -ginkgo.v -ginkgo.no-color -ginkgo.fail-fast || exit; \ done diff --git a/test/e2e/deferred/basic.go b/test/e2e/deferred/basic.go new file mode 100644 index 0000000000..7c65993e5a --- /dev/null +++ b/test/e2e/deferred/basic.go @@ -0,0 +1,446 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" + ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" +) + +var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { + ginkgo.Context("when applied", func() { + var ( + createdTuneds []string + targetNode *corev1.Node + targetTunedPod *corev1.Pod + + dirPath string + tunedPathVMLatency string + tunedObjVMLatency *tunedv1.Tuned + ) + + ginkgo.BeforeEach(func() { + ginkgo.By("getting a list of worker nodes") + nodes, err := util.GetNodesByRole(cs, "worker") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") + + targetNode = &nodes[0] + ginkgo.By(fmt.Sprintf("using node %q as reference", targetNode.Name)) + + createdTuneds = []string{} + + dirPath, err = getCurrentDirPath() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) + tunedObjVMLatency, err = loadTuned(tunedPathVMLatency) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.AfterEach(func() { + for _, createdTuned := range createdTuneds { + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", createdTuned)) + util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", createdTuned) + } + }) + + ginkgo.It("should not trigger any actual change", func(ctx context.Context) { + tunedPath := filepath.Join(dirPath, tunedSHMMNI) + ginkgo.By(fmt.Sprintf("loading tuned data from %s (basepath=%s)", tunedPath, dirPath)) + + tuned, err := loadTuned(tunedPath) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + verificationCommand, ok := tuned.Annotations[verifyCommandAnnotation] + gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) + + verificationCommandArgs := []string{} + err = json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) + gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") + ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) + + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + + // gather the output now before the profile is applied so we can check nothing changed + verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + if err != nil { + // not available, which is actually a valid state. Let's record it. + verificationOutput = err.Error() + } else { + verificationOutput = strings.TrimSpace(verificationOutput) + } + ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) + + tunedMutated := setDeferred(tuned.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPath) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tuned.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tuned.Name) + gomega.Expect(tuned.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tuned.Name) + expectedProfile := *tuned.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + recommended, err := getRecommendedProfile(targetTunedPod) + if err != nil { + return err + } + if recommended != expectedProfile { + return fmt.Errorf("recommended profile is %q expected %q", recommended, expectedProfile) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + gomega.Consistently(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking conditions for profile %q: %#v", curProf.Name, curProf.Status.Conditions)) + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + for _, condition := range curProf.Status.Conditions { + if condition.Type == tunedv1.TunedProfileApplied && condition.Status != corev1.ConditionFalse && condition.Reason != "Deferred" { + return fmt.Errorf("Profile deferred=%v %s applied", ntoutil.HasDeferredUpdateAnnotation(curProf.Annotations), curProf.Name) + } + if condition.Type == tunedv1.TunedDegraded && condition.Status != corev1.ConditionTrue && condition.Reason != "TunedDeferredUpdate" { + return fmt.Errorf("Profile deferred=%v %s not degraded", ntoutil.HasDeferredUpdateAnnotation(curProf.Annotations), curProf.Name) + } + } + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) + out, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verificationOutput { + return fmt.Errorf("got: %s; expected: %s", out, verificationOutput) + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) + }) + + ginkgo.It("should revert the profile status on removal", func(ctx context.Context) { + dirPath, err := getCurrentDirPath() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedPath := filepath.Join(dirPath, tunedSHMMNI) + ginkgo.By(fmt.Sprintf("loading tuned data from %s (basepath=%s)", tunedPath, dirPath)) + + tuned, err := loadTuned(tunedPath) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedMutated := setDeferred(tuned.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPath) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tuned.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tuned.Name) + gomega.Expect(tuned.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tuned.Name) + expectedProfile := *tuned.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + recommended, err := getRecommendedProfile(targetTunedPod) + if err != nil { + return err + } + if recommended != expectedProfile { + return fmt.Errorf("recommended profile is %q expected %q", recommended, expectedProfile) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", tunedPath)) + _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", tunedPath) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + _, createdTuneds, _ = popleft(createdTuneds) + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + }) + + ginkgo.It("should be overridden by another deferred update", func(ctx context.Context) { + dirPath, err := getCurrentDirPath() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedPathSHMMNI := filepath.Join(dirPath, tunedSHMMNI) + tunedDeferred, err := loadTuned(tunedPathSHMMNI) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedMutated := setDeferred(tunedDeferred.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathSHMMNI) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tunedMutated.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated.Name) + gomega.Expect(tunedMutated.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated.Name) + expectedProfile := *tunedMutated.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + recommended, err := getRecommendedProfile(targetTunedPod) + if err != nil { + return err + } + if recommended != expectedProfile { + return fmt.Errorf("recommended profile is %q expected %q", recommended, expectedProfile) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + tunedDeferred2 := tunedObjVMLatency + tunedMutated2 := setDeferred(tunedDeferred2.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated2.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated2.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated2, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathVMLatency) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tunedMutated2.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated2.Name) + gomega.Expect(tunedMutated2.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated2.Name) + expectedProfile = *tunedMutated2.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + + recommended, err := getRecommendedProfile(targetTunedPod) + if err != nil { + return err + } + if recommended != expectedProfile { + return fmt.Errorf("recommended profile is %q expected %q", recommended, expectedProfile) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(5 * time.Minute).Should(gomega.Succeed()) + }) + + ginkgo.It("should be overridden by a immediate update by edit", func(ctx context.Context) { + tunedImmediate := tunedObjVMLatency + tunedMutated := setDeferred(tunedImmediate.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err := cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathVMLatency) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tunedMutated.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated.Name) + gomega.Expect(tunedMutated.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated.Name) + expectedProfile := *tunedMutated.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + recommended, err := getRecommendedProfile(targetTunedPod) + if err != nil { + return err + } + if recommended != expectedProfile { + return fmt.Errorf("recommended profile is %q expected %q", recommended, expectedProfile) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + gomega.Eventually(func() error { + curTuned, err := cs.Tuneds(ntoconfig.WatchNamespace()).Get(ctx, tunedImmediate.Name, metav1.GetOptions{}) + if err != nil { + return err + } + curTuned = curTuned.DeepCopy() + + ginkgo.By(fmt.Sprintf("removing the deferred annotation from Tuned %q", tunedImmediate.Name)) + curTuned.Annotations = ntoutil.ToggleDeferredUpdateAnnotation(curTuned.Annotations, false) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Update(ctx, curTuned, metav1.UpdateOptions{}) + return err + }).WithPolling(1*time.Second).WithTimeout(1*time.Minute).Should(gomega.Succeed(), "cannot remove the deferred annotation") + + verifications := extractVerifications(tunedImmediate) + gomega.Expect(len(verifications)).To(gomega.Equal(1), "unexpected verification count, check annotations") + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + gomega.Consistently(func() error { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) + for _, verif := range verifications { + out, err := util.ExecCmdInPod(targetTunedPod, verif.command...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verif.output { + return fmt.Errorf("got: %s; expected: %s", out, verif.output) + } + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + }) + }) +}) diff --git a/test/e2e/deferred/non_regression.go b/test/e2e/deferred/non_regression.go new file mode 100644 index 0000000000..bef5c68686 --- /dev/null +++ b/test/e2e/deferred/non_regression.go @@ -0,0 +1,109 @@ +package e2e + +import ( + "context" + "fmt" + "path/filepath" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" + ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" +) + +var _ = ginkgo.Describe("[deferred][non_regression] Profile non-deferred", func() { + ginkgo.Context("when applied", func() { + var ( + createdTuneds []string + targetNode *corev1.Node + + dirPath string + tunedPathVMLatency string + ) + + ginkgo.BeforeEach(func() { + ginkgo.By("getting a list of worker nodes") + nodes, err := util.GetNodesByRole(cs, "worker") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") + + targetNode = &nodes[0] + ginkgo.By(fmt.Sprintf("using node %q as reference", targetNode.Name)) + + createdTuneds = []string{} + + dirPath, err = getCurrentDirPath() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.AfterEach(func() { + for _, createdTuned := range createdTuneds { + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", createdTuned)) + util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", createdTuned) + } + }) + + ginkgo.It("should trigger changes", func(ctx context.Context) { + tuned, err := loadTuned(tunedPathVMLatency) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tuned.Name, ntoutil.HasDeferredUpdateAnnotation(tuned.Annotations))) + + verifications := extractVerifications(tuned) + gomega.Expect(len(verifications)).To(gomega.Equal(1), "unexpected verification count, check annotations") + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tuned, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathVMLatency) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tuned.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tuned.Name) + gomega.Expect(tuned.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tuned.Name) + expectedProfile := *tuned.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + var targetTunedPod *corev1.Pod + gomega.Eventually(func() error { + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if targetTunedPod.Status.Phase != corev1.PodRunning { + return fmt.Errorf("pod %s/%s %q not running (phase=%v)", targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + } + + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return verify(targetTunedPod, verifications) + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + gomega.Consistently(func() error { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) + return verify(targetTunedPod, verifications) + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + }) + }) +}) diff --git a/test/e2e/deferred/operator_test.go b/test/e2e/deferred/operator_test.go new file mode 100644 index 0000000000..3d13f65b87 --- /dev/null +++ b/test/e2e/deferred/operator_test.go @@ -0,0 +1,181 @@ +package e2e + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + goruntime "runtime" + "strings" + "testing" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + "github.com/openshift/cluster-node-tuning-operator/pkg/manifests" + ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" + "github.com/openshift/cluster-node-tuning-operator/test/framework" +) + +const ( + verifyCommandAnnotation = "verificationCommand" + verifyOutputAnnotation = "verificationOutput" + + pollInterval = 5 * time.Second + waitDuration = 5 * time.Minute + // The number of Profile status conditions. Adjust when adding new conditions in the API. + ProfileStatusConditions = 2 + + tunedSHMMNI = "../testing_manifests/deferred/tuned-basic-00.yaml" + tunedCPUEnergy = "../testing_manifests/deferred/tuned-basic-10.yaml" + tunedVMLatency = "../testing_manifests/deferred/tuned-basic-20.yaml" +) + +var ( + cs = framework.NewClientSet() +) + +func TestNodeTuningOperatorDeferred(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "Node Tuning Operator e2e tests: deferred") +} + +type verification struct { + command []string + output string +} + +func extractVerifications(tuneds ...*tunedv1.Tuned) map[string]verification { + ret := make(map[string]verification) + for _, tuned := range tuneds { + verificationOutput, ok := tuned.Annotations[verifyOutputAnnotation] + if !ok { + util.Logf("tuned %q has no verification output annotation", tuned.Name) + continue + } + + verificationCommand, ok := tuned.Annotations[verifyCommandAnnotation] + if !ok { + util.Logf("tuned %q has no verification command annotation", tuned.Name) + continue + } + + verificationCommandArgs := []string{} + err := json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) + if err != nil { + util.Logf("cannot unmarshal verification command for tuned %q", tuned.Name) + continue + } + util.Logf("tuned %q verification command: %v", tuned.Name, verificationCommandArgs) + + ret[tuned.Name] = verification{ + command: verificationCommandArgs, + output: verificationOutput, + } + } + return ret +} + +func getRecommendedProfile(pod *corev1.Pod) (string, error) { + out, err := util.ExecCmdInPod(pod, "/bin/cat", "/etc/tuned/recommend.d/50-openshift.conf") + if err != nil { + return "", err + } + recommended := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(out), "["), "]") + util.Logf("getRecommendedProfile(): read %q from pod %s/%s on %q", recommended, pod.Namespace, pod.Name, pod.Spec.NodeName) + return recommended, nil +} + +func verify(pod *corev1.Pod, verifications map[string]verification) error { + for _, verif := range verifications { + out, err := util.ExecCmdInPod(pod, verif.command...) + if err != nil { + // not available, which is actually a valid state. Let's record it. + out = err.Error() + } else { + out = strings.TrimSpace(out) + } + if out != verif.output { + return fmt.Errorf("got: %s; expected: %s", out, verif.output) + } + } + return nil +} + +func popleft(strs []string) (string, []string, bool) { + if len(strs) < 1 { + return "", strs, false + } + return strs[0], strs[1:], true +} + +func prepend(strs []string, s string) []string { + return append([]string{s}, strs...) +} + +func setDeferred(obj *tunedv1.Tuned) *tunedv1.Tuned { + if obj == nil { + return obj + } + if obj.Annotations == nil { + obj.Annotations = make(map[string]string) + } + obj.Annotations = ntoutil.ToggleDeferredUpdateAnnotation(obj.Annotations, true) + return obj +} + +func loadTuned(path string) (*tunedv1.Tuned, error) { + src, err := os.Open(path) + if err != nil { + return nil, err + } + defer src.Close() + return manifests.NewTuned(src) +} + +func getCurrentDirPath() (string, error) { + _, file, _, ok := goruntime.Caller(0) + if !ok { + return "", fmt.Errorf("cannot retrieve tests directory") + } + return filepath.Dir(file), nil +} + +func findCondition(conditions []tunedv1.ProfileStatusCondition, conditionType tunedv1.ProfileConditionType) *tunedv1.ProfileStatusCondition { + for _, condition := range conditions { + if condition.Type == conditionType { + return &condition + } + } + return nil +} + +func checkAppliedConditionDeferred(cond *tunedv1.ProfileStatusCondition, expectedProfile string) error { + klog.Infof("expected profile: %q", expectedProfile) + if cond.Status != corev1.ConditionFalse { + return fmt.Errorf("applied is true") + } + if !strings.Contains(cond.Message, "waiting for the next node restart") { + return fmt.Errorf("unexpected message %q", cond.Message) + } + return nil +} + +func checkAppliedConditionOK(cond *tunedv1.ProfileStatusCondition) error { + if cond.Status != corev1.ConditionTrue { + return fmt.Errorf("applied is false") + } + if !strings.Contains(cond.Reason, "AsExpected") { + return fmt.Errorf("unexpected reason %q", cond.Reason) + } + if !strings.Contains(cond.Message, "TuneD profile applied.") { + return fmt.Errorf("unexpected message %q", cond.Message) + } + return nil +} diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go new file mode 100644 index 0000000000..7505a99b59 --- /dev/null +++ b/test/e2e/deferred/restart.go @@ -0,0 +1,297 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" + ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util/ready" +) + +var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { + ginkgo.Context("when restarting", func() { + var ( + createdTuneds []string + targetNode *corev1.Node + + dirPath string + tunedPathVMLatency string + tunedObjVMLatency *tunedv1.Tuned + ) + + ginkgo.BeforeEach(func() { + ginkgo.By("getting a list of worker nodes") + nodes, err := util.GetNodesByRole(cs, "worker") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") + + targetNode = &nodes[0] + ginkgo.By(fmt.Sprintf("using node %q as reference", targetNode.Name)) + + createdTuneds = []string{} + + dirPath, err = getCurrentDirPath() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) + tunedObjVMLatency, err = loadTuned(tunedPathVMLatency) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.AfterEach(func() { + for _, createdTuned := range createdTuneds { + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", createdTuned)) + util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", createdTuned) + } + }) + + ginkgo.Context("[slow][pod]the tuned daemon", func() { + ginkgo.It("should not be applied", func(ctx context.Context) { + ginkgo.By(fmt.Sprintf("getting the tuned pod running on %q", targetNode.Name)) + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + tunedImmediate := tunedObjVMLatency + + verificationCommand, ok := tunedImmediate.Annotations[verifyCommandAnnotation] + gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) + + verificationCommandArgs := []string{} + err = json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) + gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") + ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) + + // gather the output now before the profile is applied so we can check nothing changed + verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + if err != nil { + // not available, which is actually a valid state. Let's record it. + verificationOutput = err.Error() + } else { + verificationOutput = strings.TrimSpace(verificationOutput) + } + ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) + + tunedMutated := setDeferred(tunedImmediate.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathVMLatency) // we need the path, not the name + ginkgo.By(fmt.Sprintf("create tuneds: %v", createdTuneds)) + + gomega.Expect(tunedMutated.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated.Name) + gomega.Expect(tunedMutated.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated.Name) + expectedProfile := *tunedMutated.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + oldTunedPodUID := targetTunedPod.UID + ginkgo.By(fmt.Sprintf("killing the tuned pod running on %q", targetNode.Name)) + gomega.Expect(cs.Pods(targetTunedPod.GetNamespace()).Delete(ctx, targetTunedPod.Name, metav1.DeleteOptions{})).To(gomega.Succeed()) + // wait for the tuned pod to be found again + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("getting again the tuned pod running on %q", targetNode.Name)) + gomega.Eventually(func() error { + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + if err != nil { + return err + } + if targetTunedPod.UID == oldTunedPodUID { + return fmt.Errorf("pod %s/%s not refreshed old UID %q current UID %q", targetTunedPod.Namespace, targetTunedPod.Name, oldTunedPodUID, targetTunedPod.UID) + } + if !ready.Pod(*targetTunedPod) { + return fmt.Errorf("pod %s/%s (%s) not ready", targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID) + } + return nil + }).WithPolling(2 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + ginkgo.By(fmt.Sprintf("got again the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + gomega.Consistently(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking conditions for profile %q: %#v", curProf.Name, curProf.Status.Conditions)) + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + for _, condition := range curProf.Status.Conditions { + if condition.Type == tunedv1.TunedProfileApplied && condition.Status != corev1.ConditionFalse && condition.Reason != "Deferred" { + return fmt.Errorf("Profile deferred=%v %s applied", ntoutil.HasDeferredUpdateAnnotation(curProf.Annotations), curProf.Name) + } + if condition.Type == tunedv1.TunedDegraded && condition.Status != corev1.ConditionTrue && condition.Reason != "TunedDeferredUpdate" { + return fmt.Errorf("Profile deferred=%v %s not degraded", ntoutil.HasDeferredUpdateAnnotation(curProf.Annotations), curProf.Name) + } + } + + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) + out, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verificationOutput { + return fmt.Errorf("got: %s; expected: %s", out, verificationOutput) + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) + }) + }) + + ginkgo.Context("[slow][disruptive][node] the worker node", func() { + ginkgo.It("should be applied", func(ctx context.Context) { + tunedImmediate := tunedObjVMLatency + + verifications := extractVerifications(tunedImmediate) + gomega.Expect(len(verifications)).To(gomega.Equal(1), "unexpected verification count, check annotations") + + ginkgo.By(fmt.Sprintf("getting the tuned pod running on %q", targetNode.Name)) + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + tunedMutated := setDeferred(tunedImmediate.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathVMLatency) // we need the path, not the name + + gomega.Expect(tunedMutated.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated.Name) + gomega.Expect(tunedMutated.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated.Name) + expectedProfile := *tunedMutated.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("getting the machine config daemon pod running on %q", targetNode.Name)) + targetMCDPod, err := util.GetMachineConfigDaemonForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got the machine config daemon pod running on %q: %s/%s %s", targetNode.Name, targetMCDPod.Namespace, targetMCDPod.Name, targetMCDPod.UID)) + + ginkgo.By(fmt.Sprintf("restarting the worker node on which the tuned was running on %q", targetNode.Name)) + _, err = util.ExecCmdInPodNamespace(targetMCDPod.Namespace, targetMCDPod.Name, "chroot", "/rootfs", "systemctl", "reboot") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Very generous timeout. On baremetal a reboot can take a long time + ready.WaitNodeOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("getting again the tuned pod running on %q", targetNode.Name)) + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got again the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + gomega.Consistently(func() error { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) + for _, verif := range verifications { + out, err := util.ExecCmdInPod(targetTunedPod, verif.command...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verif.output { + return fmt.Errorf("got: %s; expected: %s", out, verif.output) + } + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + }) + }) + }) +}) diff --git a/test/e2e/testing_manifests/deferred/tuned-basic-00.yaml b/test/e2e/testing_manifests/deferred/tuned-basic-00.yaml new file mode 100644 index 0000000000..005f5f76fc --- /dev/null +++ b/test/e2e/testing_manifests/deferred/tuned-basic-00.yaml @@ -0,0 +1,22 @@ +apiVersion: tuned.openshift.io/v1 +kind: Tuned +metadata: + name: ocp-prof-deferred-basic-00 + namespace: openshift-cluster-node-tuning-operator + annotations: + verificationCommand: "[\"/usr/sbin/sysctl\", \"-n\", \"kernel.shmmni\"]" + verificationOutput: "8192" +spec: + profile: + - data: | + [main] + summary=Custom OpenShift profile + include=openshift-node + [sysctl] + kernel.shmmni=8192 + name: test-shmmni + recommend: + - match: + - label: node-role.kubernetes.io/worker + priority: 20 + profile: test-shmmni diff --git a/test/e2e/testing_manifests/deferred/tuned-basic-10.yaml b/test/e2e/testing_manifests/deferred/tuned-basic-10.yaml new file mode 100644 index 0000000000..f30c808fe2 --- /dev/null +++ b/test/e2e/testing_manifests/deferred/tuned-basic-10.yaml @@ -0,0 +1,24 @@ +apiVersion: tuned.openshift.io/v1 +kind: Tuned +metadata: + name: ocp-prof-deferred-basic-10 + namespace: openshift-cluster-node-tuning-operator + annotations: + verificationCommand: "[\"/bin/cat\", \"/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor\"]" + verificationOutput: "performance" +spec: + profile: + - data: | + [main] + summary=Custom OpenShift profile + include=openshift-node + [sysctl] + kernel.shmmni=8192 + [cpu] + governor=performance + name: test-cpu-energy + recommend: + - match: + - label: node-role.kubernetes.io/worker + priority: 15 + profile: test-cpu-energy diff --git a/test/e2e/testing_manifests/deferred/tuned-basic-20.yaml b/test/e2e/testing_manifests/deferred/tuned-basic-20.yaml new file mode 100644 index 0000000000..1e9a55b166 --- /dev/null +++ b/test/e2e/testing_manifests/deferred/tuned-basic-20.yaml @@ -0,0 +1,25 @@ +apiVersion: tuned.openshift.io/v1 +kind: Tuned +metadata: + name: ocp-prof-deferred-basic-20 + namespace: openshift-cluster-node-tuning-operator + annotations: + verificationCommand: "[\"/usr/sbin/sysctl\", \"-n\", \"vm.swappiness\"]" + verificationOutput: "13" +spec: + profile: + - data: | + [main] + summary=Custom OpenShift profile + include=openshift-node + [sysctl] + kernel.shmmni=8192 + vm.dirty_ratio=10 + vm.dirty_background_ratio=3 + vm.swappiness=13 + name: test-vm-latency + recommend: + - match: + - label: node-role.kubernetes.io/worker + priority: 15 + profile: test-vm-latency diff --git a/test/e2e/util/ready/node.go b/test/e2e/util/ready/node.go new file mode 100644 index 0000000000..5e520d3d0d --- /dev/null +++ b/test/e2e/util/ready/node.go @@ -0,0 +1,56 @@ +package ready + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" + "github.com/openshift/cluster-node-tuning-operator/test/framework" +) + +func Node(node corev1.Node) bool { + for _, c := range node.Status.Conditions { + if c.Type == corev1.NodeReady { + return c.Status == corev1.ConditionTrue + } + } + return false +} + +func WaitNodeOrFail(cs *framework.ClientSet, tag, nodeName string, timeout, polling time.Duration) { + ginkgo.GinkgoHelper() + + util.Logf("%s: waiting for node %q: to be NOT-ready", tag, nodeName) + gomega.Eventually(func() (bool, error) { + node, err := cs.CoreV1Interface.Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + // intentionally tolerate error + util.Logf("wait for node %q ready: %v", nodeName, err) + return false, nil + } + ready := Node(*node) + util.Logf("node %q ready=%v", nodeName, ready) + return !ready, nil // note "not" + }).WithTimeout(2*time.Minute).WithPolling(polling).Should(gomega.BeTrue(), "post reboot/1: cannot get readiness status after reboot for node %q", nodeName) + + util.Logf("%s: waiting for node %q: to be ready", tag, nodeName) + gomega.Eventually(func() (bool, error) { + node, err := cs.CoreV1Interface.Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + // intentionally tolerate error + util.Logf("wait for node %q ready: %v", nodeName, err) + return false, nil + } + ready := Node(*node) + util.Logf("node %q ready=%v", nodeName, ready) + return ready, nil + }).WithTimeout(timeout).WithPolling(polling).Should(gomega.BeTrue(), "post reboot/2: cannot get readiness status after reboot for node %q", nodeName) + + util.Logf("%s: node %q: reported ready", tag, nodeName) +} diff --git a/test/e2e/util/ready/pod.go b/test/e2e/util/ready/pod.go new file mode 100644 index 0000000000..2717dec6c5 --- /dev/null +++ b/test/e2e/util/ready/pod.go @@ -0,0 +1,18 @@ +package ready + +import ( + corev1 "k8s.io/api/core/v1" +) + +func Pod(pod corev1.Pod) bool { + if pod.Status.Phase != corev1.PodRunning { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type != corev1.PodReady { + continue + } + return cond.Status == corev1.ConditionTrue + } + return false +} diff --git a/test/e2e/util/util.go b/test/e2e/util/util.go index cf366d09b0..843bede41e 100644 --- a/test/e2e/util/util.go +++ b/test/e2e/util/util.go @@ -59,6 +59,27 @@ func GetNodesByRole(cs *framework.ClientSet, role string) ([]corev1.Node, error) return nodeList, nil } +// GetMachineConfigDaemonForNode returns the machine-config-daemon pod that runs on the specified node +func GetMachineConfigDaemonForNode(cs *framework.ClientSet, node *corev1.Node) (*corev1.Pod, error) { + listOptions := metav1.ListOptions{ + FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String(), + LabelSelector: labels.SelectorFromSet(labels.Set{"k8s-app": "machine-config-daemon"}).String(), + } + + podList, err := cs.Pods("openshift-machine-config-operator").List(context.TODO(), listOptions) + if err != nil { + return nil, fmt.Errorf("couldn't get a list of TuneD Pods: %v", err) + } + + if len(podList.Items) != 1 { + if len(podList.Items) == 0 { + return nil, fmt.Errorf("failed to find a TuneD Pod for node %s", node.Name) + } + return nil, fmt.Errorf("too many (%d) TuneD Pods for node %s", len(podList.Items), node.Name) + } + return &podList.Items[0], nil +} + // GetTunedForNode returns a Pod that runs on a given node. func GetTunedForNode(cs *framework.ClientSet, node *corev1.Node) (*corev1.Pod, error) { listOptions := metav1.ListOptions{ @@ -130,12 +151,17 @@ func ExecAndLogCommand(name string, args ...string) (bytes.Buffer, bytes.Buffer, // ExecCmdInPod executes command with arguments 'cmd' in Pod 'pod'. func ExecCmdInPod(pod *corev1.Pod, cmd ...string) (string, error) { - ocArgs := []string{"rsh", "-n", ntoconfig.WatchNamespace(), pod.Name} + return ExecCmdInPodNamespace(ntoconfig.WatchNamespace(), pod.Name, cmd...) +} + +// ExecCmdInPodNamespace executes command with arguments 'cmd' in Pod 'podNamespace/podName'. +func ExecCmdInPodNamespace(podNamespace, podName string, cmd ...string) (string, error) { + ocArgs := []string{"rsh", "-n", podNamespace, podName} ocArgs = append(ocArgs, cmd...) stdout, stderr, err := execCommand(false, "oc", ocArgs...) if err != nil { - return "", fmt.Errorf("failed to run %s in Pod %s:\n out=%s\n err=%s\n ret=%v", cmd, pod.Name, stdout.String(), stderr.String(), err.Error()) + return "", fmt.Errorf("failed to run %s in pod %s/%s:\n out=%s\n err=%s\n ret=%v", cmd, podNamespace, podName, stdout.String(), stderr.String(), err.Error()) } return stdout.String(), nil From c8ce073f213f24e2f063711d61e4e8cfe3b399f6 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 8 Jul 2024 16:09:48 +0200 Subject: [PATCH 13/19] e2e: deferred: assertions to avoid unwanted changes Add assertions to ensure deferred updates don't affect unrelated nodes. In the e2e tests we target one of the worker nodes, so we check control-plane nodes are not affected by the testing changes. Checking different MCPs would be equivalent or better, but this is the simplest way to check because the worker/control-plane split is always available. Signed-off-by: Francesco Romani --- test/e2e/deferred/basic.go | 44 ++++++++++++++++++++++++------ test/e2e/deferred/operator_test.go | 29 ++++++++++++++++++++ 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/test/e2e/deferred/basic.go b/test/e2e/deferred/basic.go index 7c65993e5a..ed2b876be1 100644 --- a/test/e2e/deferred/basic.go +++ b/test/e2e/deferred/basic.go @@ -23,9 +23,12 @@ import ( var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { ginkgo.Context("when applied", func() { var ( - createdTuneds []string - targetNode *corev1.Node - targetTunedPod *corev1.Pod + createdTuneds []string + referenceNode *corev1.Node // control plane + targetNode *corev1.Node + referenceTunedPod *corev1.Pod // control plane + targetTunedPod *corev1.Pod + referenceProfile string dirPath string tunedPathVMLatency string @@ -34,12 +37,28 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { ginkgo.BeforeEach(func() { ginkgo.By("getting a list of worker nodes") - nodes, err := util.GetNodesByRole(cs, "worker") + workerNodes, err := util.GetNodesByRole(cs, "worker") gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(len(nodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") + gomega.Expect(len(workerNodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") - targetNode = &nodes[0] - ginkgo.By(fmt.Sprintf("using node %q as reference", targetNode.Name)) + targetNode = &workerNodes[0] + ginkgo.By(fmt.Sprintf("using node %q as target for workers", targetNode.Name)) + + ginkgo.By("getting a list of control-plane nodes") + controlPlaneNodes, err := util.GetNodesByRole(cs, "control-plane") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(controlPlaneNodes)).NotTo(gomega.BeZero(), "number of control plane nodes is 0") + + referenceNode = &controlPlaneNodes[0] + ginkgo.By(fmt.Sprintf("using node %q as reference control plane", referenceNode.Name)) + + referenceTunedPod, err = util.GetTunedForNode(cs, referenceNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(referenceTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) + + referenceProfile, err = getRecommendedProfile(referenceTunedPod) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("using profile %q as reference control plane", referenceProfile)) createdTuneds = []string{} @@ -75,7 +94,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { targetTunedPod, err = util.GetTunedForNode(cs, targetNode) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) // gather the output now before the profile is applied so we can check nothing changed verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) @@ -157,6 +176,9 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { } return nil }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) + + // on non-targeted nodes, like control plane, nothing should have changed + checkAppliedConditionStaysOKForNode(ctx, referenceNode.Name, referenceProfile) }) ginkgo.It("should revert the profile status on removal", func(ctx context.Context) { @@ -215,6 +237,9 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { return err }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + // on non-targeted nodes, like control plane, nothing should have changed + checkAppliedConditionStaysOKForNode(ctx, referenceNode.Name, referenceProfile) + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", tunedPath)) _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", tunedPath) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -268,6 +293,9 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + // on non-targeted nodes, like control plane, nothing should have changed + checkAppliedConditionStaysOKForNode(ctx, referenceNode.Name, referenceProfile) + gomega.Eventually(func() error { curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) if err != nil { diff --git a/test/e2e/deferred/operator_test.go b/test/e2e/deferred/operator_test.go index 3d13f65b87..6963775b6d 100644 --- a/test/e2e/deferred/operator_test.go +++ b/test/e2e/deferred/operator_test.go @@ -1,6 +1,7 @@ package e2e import ( + "context" "encoding/json" "fmt" "os" @@ -14,9 +15,11 @@ import ( "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog" tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" "github.com/openshift/cluster-node-tuning-operator/pkg/manifests" ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" @@ -179,3 +182,29 @@ func checkAppliedConditionOK(cond *tunedv1.ProfileStatusCondition) error { } return nil } + +func checkAppliedConditionStaysOKForNode(ctx context.Context, nodeName, expectedProfile string) { + ginkgo.GinkgoHelper() + + gomega.Consistently(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + ginkgo.By(fmt.Sprintf("checking conditions for reference profile %q: %#v", curProf.Name, curProf.Status.Conditions)) + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) +} From b7b492aeeb1b83a6dd7fe093530716f3ca0dedd0 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 15 Jul 2024 10:48:55 +0200 Subject: [PATCH 14/19] e2e: deferred: check revert restores node condition Extract helper code to enable verification of node state, because we are starting to see the same snippet (which can't and shouldn't fail) in few places. Add check to make sure we revert correctly to node state on profile removal. Signed-off-by: Francesco Romani --- test/e2e/deferred/basic.go | 54 ++++++++++++------------------ test/e2e/deferred/operator_test.go | 39 +++++++++++++++++++++ test/e2e/deferred/restart.go | 27 +++------------ 3 files changed, 66 insertions(+), 54 deletions(-) diff --git a/test/e2e/deferred/basic.go b/test/e2e/deferred/basic.go index ed2b876be1..76ec25fb2c 100644 --- a/test/e2e/deferred/basic.go +++ b/test/e2e/deferred/basic.go @@ -2,7 +2,6 @@ package e2e import ( "context" - "encoding/json" "fmt" "path/filepath" "strings" @@ -84,27 +83,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { tuned, err := loadTuned(tunedPath) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - verificationCommand, ok := tuned.Annotations[verifyCommandAnnotation] - gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) - - verificationCommandArgs := []string{} - err = json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) - gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") - ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) - - targetTunedPod, err = util.GetTunedForNode(cs, targetNode) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) - - // gather the output now before the profile is applied so we can check nothing changed - verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) - if err != nil { - // not available, which is actually a valid state. Let's record it. - verificationOutput = err.Error() - } else { - verificationOutput = strings.TrimSpace(verificationOutput) - } - ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) + verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tuned) tunedMutated := setDeferred(tuned.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -138,7 +117,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { if err != nil { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - recommended, err := getRecommendedProfile(targetTunedPod) + recommended, err := getRecommendedProfile(verifCtx.targetTunedPod) if err != nil { return err } @@ -165,14 +144,14 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { return fmt.Errorf("Profile deferred=%v %s not degraded", ntoutil.HasDeferredUpdateAnnotation(curProf.Annotations), curProf.Name) } } - ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) - out, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q are not changed from pristine state", curProf.Name)) + out, err := util.ExecCmdInPod(verifCtx.targetTunedPod, verifCtx.args...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verificationOutput { - return fmt.Errorf("got: %s; expected: %s", out, verificationOutput) + if out != verifCtx.output { + return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) } return nil }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) @@ -191,6 +170,9 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { tuned, err := loadTuned(tunedPath) gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // gather the output now before the profile is applied so we can check nothing changed + verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tuned) + tunedMutated := setDeferred(tuned.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -205,9 +187,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { expectedProfile := *tuned.Spec.Recommend[0].Profile ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) - targetTunedPod, err = util.GetTunedForNode(cs, targetNode) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) + gomega.Expect(verifCtx.targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), "targetTunedPod %s/%s uid %s phase %s", verifCtx.targetTunedPod.Namespace, verifCtx.targetTunedPod.Name, verifCtx.targetTunedPod.UID, verifCtx.targetTunedPod.Status.Phase) gomega.Eventually(func() error { curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) @@ -227,7 +207,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { if err != nil { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - recommended, err := getRecommendedProfile(targetTunedPod) + recommended, err := getRecommendedProfile(verifCtx.targetTunedPod) if err != nil { return err } @@ -263,7 +243,17 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { if err != nil { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - return err + + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) + out, err := util.ExecCmdInPod(verifCtx.targetTunedPod, verifCtx.args...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verifCtx.output { + return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + } + return nil }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) }) diff --git a/test/e2e/deferred/operator_test.go b/test/e2e/deferred/operator_test.go index 6963775b6d..d2cb16b464 100644 --- a/test/e2e/deferred/operator_test.go +++ b/test/e2e/deferred/operator_test.go @@ -208,3 +208,42 @@ func checkAppliedConditionStaysOKForNode(ctx context.Context, nodeName, expected return err }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) } + +type verificationContext struct { + output string + args []string + targetTunedPod *corev1.Pod +} + +func mustExtractVerificationOutputAndCommand(targetNode *corev1.Node, tuned *tunedv1.Tuned) verificationContext { + ginkgo.GinkgoHelper() + + verificationCommand, ok := tuned.Annotations[verifyCommandAnnotation] + gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) + + verificationCommandArgs := []string{} + err := json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") + ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) + + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) + + // gather the output now before the profile is applied so we can check nothing changed + verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + if err != nil { + // not available, which is actually a valid state. Let's record it. + verificationOutput = err.Error() + } else { + verificationOutput = strings.TrimSpace(verificationOutput) + } + ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) + + return verificationContext{ + output: verificationOutput, + args: verificationCommandArgs, + targetTunedPod: targetTunedPod, + } +} diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go index 7505a99b59..eefb565ff0 100644 --- a/test/e2e/deferred/restart.go +++ b/test/e2e/deferred/restart.go @@ -2,7 +2,6 @@ package e2e import ( "context" - "encoding/json" "fmt" "path/filepath" "strings" @@ -67,23 +66,7 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { tunedImmediate := tunedObjVMLatency - verificationCommand, ok := tunedImmediate.Annotations[verifyCommandAnnotation] - gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) - - verificationCommandArgs := []string{} - err = json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) - gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") - ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) - - // gather the output now before the profile is applied so we can check nothing changed - verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) - if err != nil { - // not available, which is actually a valid state. Let's record it. - verificationOutput = err.Error() - } else { - verificationOutput = strings.TrimSpace(verificationOutput) - } - ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) + verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tunedImmediate) tunedMutated := setDeferred(tunedImmediate.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -180,14 +163,14 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { } } - ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) - out, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q did not change over pristine status", curProf.Name)) + out, err := util.ExecCmdInPod(targetTunedPod, verifCtx.args...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verificationOutput { - return fmt.Errorf("got: %s; expected: %s", out, verificationOutput) + if out != verifCtx.output { + return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) } return nil }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) From 73855b9bafd7b95a217db7b415ae20e84b1b691f Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 15 Jul 2024 11:00:27 +0200 Subject: [PATCH 15/19] e2e: deferred: check restore after restart Add test to verify that node state is correctly restored after a deferred update is applied (requires a restart) and then removed Signed-off-by: Francesco Romani --- test/e2e/deferred/restart.go | 142 +++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go index eefb565ff0..39b9c02c64 100644 --- a/test/e2e/deferred/restart.go +++ b/test/e2e/deferred/restart.go @@ -27,7 +27,9 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { targetNode *corev1.Node dirPath string + tunedPathSHMMNI string tunedPathVMLatency string + tunedObjSHMMNI *tunedv1.Tuned tunedObjVMLatency *tunedv1.Tuned ) @@ -45,6 +47,10 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { dirPath, err = getCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) + tunedPathSHMMNI = filepath.Join(dirPath, tunedSHMMNI) + tunedObjSHMMNI, err = loadTuned(tunedPathSHMMNI) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) tunedObjVMLatency, err = loadTuned(tunedPathVMLatency) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -275,6 +281,142 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { return nil }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) }) + + ginkgo.It("should be reverted once applied and the node state should be restored", func(ctx context.Context) { + tunedImmediate := tunedObjSHMMNI + + verifications := extractVerifications(tunedImmediate) + gomega.Expect(len(verifications)).To(gomega.Equal(1), "unexpected verification count, check annotations") + + ginkgo.By(fmt.Sprintf("getting the tuned pod running on %q", targetNode.Name)) + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tunedImmediate) + + tunedMutated := setDeferred(tunedImmediate.DeepCopy()) + ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) + + _, err = cs.Tuneds(ntoconfig.WatchNamespace()).Create(ctx, tunedMutated, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + createdTuneds = prepend(createdTuneds, tunedPathSHMMNI) // we need the path, not the name + + gomega.Expect(tunedMutated.Spec.Recommend).ToNot(gomega.BeEmpty(), "tuned %q has empty recommendations", tunedMutated.Name) + gomega.Expect(tunedMutated.Spec.Recommend[0].Profile).ToNot(gomega.BeNil(), "tuned %q has empty recommended tuned profile", tunedMutated.Name) + expectedProfile := *tunedMutated.Spec.Recommend[0].Profile + ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionDeferred(cond, expectedProfile) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("getting the machine config daemon pod running on %q", targetNode.Name)) + targetMCDPod, err := util.GetMachineConfigDaemonForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got the machine config daemon pod running on %q: %s/%s %s", targetNode.Name, targetMCDPod.Namespace, targetMCDPod.Name, targetMCDPod.UID)) + + ginkgo.By(fmt.Sprintf("restarting the worker node on which the tuned was running on %q", targetNode.Name)) + _, err = util.ExecCmdInPodNamespace(targetMCDPod.Namespace, targetMCDPod.Name, "chroot", "/rootfs", "systemctl", "reboot") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Very generous timeout. On baremetal a reboot can take a long time + ready.WaitNodeOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) + + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + return err + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("getting again the tuned pod running on %q", targetNode.Name)) + targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("got again the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) + + gomega.Consistently(func() error { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) + for _, verif := range verifications { + out, err := util.ExecCmdInPod(targetTunedPod, verif.command...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verif.output { + return fmt.Errorf("got: %s; expected: %s", out, verif.output) + } + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + + ginkgo.By(fmt.Sprintf("cluster changes rollback: %q", tunedPathSHMMNI)) + _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", tunedPathSHMMNI) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + _, createdTuneds, _ = popleft(createdTuneds) + gomega.Eventually(func() error { + curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) + if err != nil { + return err + } + ginkgo.By(fmt.Sprintf("checking profile for target node %q matches expectations about %q", curProf.Name, expectedProfile)) + + if len(curProf.Status.Conditions) == 0 { + return fmt.Errorf("missing status conditions") + } + cond := findCondition(curProf.Status.Conditions, tunedv1.TunedProfileApplied) + if cond == nil { + return fmt.Errorf("missing status applied condition") + } + err = checkAppliedConditionOK(cond) + if err != nil { + util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) + } + + ginkgo.By("checking real node conditions are restored pristine") + out, err := util.ExecCmdInPod(targetTunedPod, verifCtx.args...) + if err != nil { + return err + } + out = strings.TrimSpace(out) + if out != verifCtx.output { + return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + } + return nil + }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) + }) }) }) }) From d0a690a69dd8774d4d6ad4887e97f6ad25310060 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 15 Jul 2024 17:41:29 +0200 Subject: [PATCH 16/19] e2e: push down utilities enable to add another basic rollback test Signed-off-by: Francesco Romani --- test/e2e/deferred/basic.go | 43 +++++++++---------- test/e2e/deferred/non_regression.go | 4 +- test/e2e/deferred/operator_test.go | 60 -------------------------- test/e2e/deferred/restart.go | 30 ++++++------- test/e2e/util/util.go | 21 ++++++++++ test/e2e/util/verification.go | 65 +++++++++++++++++++++++++++++ 6 files changed, 126 insertions(+), 97 deletions(-) create mode 100644 test/e2e/util/verification.go diff --git a/test/e2e/deferred/basic.go b/test/e2e/deferred/basic.go index 76ec25fb2c..516bbb6a7c 100644 --- a/test/e2e/deferred/basic.go +++ b/test/e2e/deferred/basic.go @@ -26,7 +26,6 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { referenceNode *corev1.Node // control plane targetNode *corev1.Node referenceTunedPod *corev1.Pod // control plane - targetTunedPod *corev1.Pod referenceProfile string dirPath string @@ -61,11 +60,11 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { createdTuneds = []string{} - dirPath, err = getCurrentDirPath() + dirPath, err = util.GetCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) - tunedObjVMLatency, err = loadTuned(tunedPathVMLatency) + tunedObjVMLatency, err = util.LoadTuned(tunedPathVMLatency) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) @@ -80,10 +79,11 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { tunedPath := filepath.Join(dirPath, tunedSHMMNI) ginkgo.By(fmt.Sprintf("loading tuned data from %s (basepath=%s)", tunedPath, dirPath)) - tuned, err := loadTuned(tunedPath) + tuned, err := util.LoadTuned(tunedPath) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tuned) + verifData := util.MustExtractVerificationOutputAndCommand(cs, targetNode, tuned) + gomega.Expect(verifData.OutputCurrent).ToNot(gomega.Equal(verifData.OutputExpected), "current output %q already matches expected %q", verifData.OutputCurrent, verifData.OutputExpected) tunedMutated := setDeferred(tuned.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -117,7 +117,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { if err != nil { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - recommended, err := getRecommendedProfile(verifCtx.targetTunedPod) + recommended, err := getRecommendedProfile(verifData.TargetTunedPod) if err != nil { return err } @@ -145,13 +145,13 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { } } ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q are not changed from pristine state", curProf.Name)) - out, err := util.ExecCmdInPod(verifCtx.targetTunedPod, verifCtx.args...) + out, err := util.ExecCmdInPod(verifData.TargetTunedPod, verifData.CommandArgs...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verifCtx.output { - return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + if out != verifData.OutputCurrent { + return fmt.Errorf("got: %s; expected: %s", out, verifData.OutputCurrent) } return nil }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) @@ -161,17 +161,18 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { }) ginkgo.It("should revert the profile status on removal", func(ctx context.Context) { - dirPath, err := getCurrentDirPath() + dirPath, err := util.GetCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedPath := filepath.Join(dirPath, tunedSHMMNI) ginkgo.By(fmt.Sprintf("loading tuned data from %s (basepath=%s)", tunedPath, dirPath)) - tuned, err := loadTuned(tunedPath) + tuned, err := util.LoadTuned(tunedPath) gomega.Expect(err).ToNot(gomega.HaveOccurred()) // gather the output now before the profile is applied so we can check nothing changed - verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tuned) + verifData := util.MustExtractVerificationOutputAndCommand(cs, targetNode, tuned) + gomega.Expect(verifData.OutputCurrent).ToNot(gomega.Equal(verifData.OutputExpected), "current output %q already matches expected %q", verifData.OutputCurrent, verifData.OutputExpected) tunedMutated := setDeferred(tuned.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -187,7 +188,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { expectedProfile := *tuned.Spec.Recommend[0].Profile ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) - gomega.Expect(verifCtx.targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), "targetTunedPod %s/%s uid %s phase %s", verifCtx.targetTunedPod.Namespace, verifCtx.targetTunedPod.Name, verifCtx.targetTunedPod.UID, verifCtx.targetTunedPod.Status.Phase) + gomega.Expect(verifData.TargetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), "TargetTunedPod %s/%s uid %s phase %s", verifData.TargetTunedPod.Namespace, verifData.TargetTunedPod.Name, verifData.TargetTunedPod.UID, verifData.TargetTunedPod.Status.Phase) gomega.Eventually(func() error { curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) @@ -207,7 +208,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { if err != nil { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - recommended, err := getRecommendedProfile(verifCtx.targetTunedPod) + recommended, err := getRecommendedProfile(verifData.TargetTunedPod) if err != nil { return err } @@ -245,24 +246,24 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { } ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", curProf.Name)) - out, err := util.ExecCmdInPod(verifCtx.targetTunedPod, verifCtx.args...) + out, err := util.ExecCmdInPod(verifData.TargetTunedPod, verifData.CommandArgs...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verifCtx.output { - return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + if out != verifData.OutputCurrent { + return fmt.Errorf("got: %s; expected: %s", out, verifData.OutputCurrent) } return nil }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) }) ginkgo.It("should be overridden by another deferred update", func(ctx context.Context) { - dirPath, err := getCurrentDirPath() + dirPath, err := util.GetCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedPathSHMMNI := filepath.Join(dirPath, tunedSHMMNI) - tunedDeferred, err := loadTuned(tunedPathSHMMNI) + tunedDeferred, err := util.LoadTuned(tunedPathSHMMNI) gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedMutated := setDeferred(tunedDeferred.DeepCopy()) @@ -279,7 +280,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { expectedProfile := *tunedMutated.Spec.Recommend[0].Profile ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) - targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) @@ -375,7 +376,7 @@ var _ = ginkgo.Describe("[deferred][profile_status] Profile deferred", func() { expectedProfile := *tunedMutated.Spec.Recommend[0].Profile ginkgo.By(fmt.Sprintf("expecting Tuned Profile %q to be picked up", expectedProfile)) - targetTunedPod, err = util.GetTunedForNode(cs, targetNode) + targetTunedPod, err := util.GetTunedForNode(cs, targetNode) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning), targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID, targetTunedPod.Status.Phase) diff --git a/test/e2e/deferred/non_regression.go b/test/e2e/deferred/non_regression.go index bef5c68686..4e676f9755 100644 --- a/test/e2e/deferred/non_regression.go +++ b/test/e2e/deferred/non_regression.go @@ -39,7 +39,7 @@ var _ = ginkgo.Describe("[deferred][non_regression] Profile non-deferred", func( createdTuneds = []string{} - dirPath, err = getCurrentDirPath() + dirPath, err = util.GetCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) @@ -54,7 +54,7 @@ var _ = ginkgo.Describe("[deferred][non_regression] Profile non-deferred", func( }) ginkgo.It("should trigger changes", func(ctx context.Context) { - tuned, err := loadTuned(tunedPathVMLatency) + tuned, err := util.LoadTuned(tunedPathVMLatency) gomega.Expect(err).ToNot(gomega.HaveOccurred()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tuned.Name, ntoutil.HasDeferredUpdateAnnotation(tuned.Annotations))) diff --git a/test/e2e/deferred/operator_test.go b/test/e2e/deferred/operator_test.go index d2cb16b464..55af918630 100644 --- a/test/e2e/deferred/operator_test.go +++ b/test/e2e/deferred/operator_test.go @@ -4,9 +4,6 @@ import ( "context" "encoding/json" "fmt" - "os" - "path/filepath" - goruntime "runtime" "strings" "testing" "time" @@ -20,7 +17,6 @@ import ( tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" - "github.com/openshift/cluster-node-tuning-operator/pkg/manifests" ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" "github.com/openshift/cluster-node-tuning-operator/test/framework" @@ -133,23 +129,6 @@ func setDeferred(obj *tunedv1.Tuned) *tunedv1.Tuned { return obj } -func loadTuned(path string) (*tunedv1.Tuned, error) { - src, err := os.Open(path) - if err != nil { - return nil, err - } - defer src.Close() - return manifests.NewTuned(src) -} - -func getCurrentDirPath() (string, error) { - _, file, _, ok := goruntime.Caller(0) - if !ok { - return "", fmt.Errorf("cannot retrieve tests directory") - } - return filepath.Dir(file), nil -} - func findCondition(conditions []tunedv1.ProfileStatusCondition, conditionType tunedv1.ProfileConditionType) *tunedv1.ProfileStatusCondition { for _, condition := range conditions { if condition.Type == conditionType { @@ -208,42 +187,3 @@ func checkAppliedConditionStaysOKForNode(ctx context.Context, nodeName, expected return err }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) } - -type verificationContext struct { - output string - args []string - targetTunedPod *corev1.Pod -} - -func mustExtractVerificationOutputAndCommand(targetNode *corev1.Node, tuned *tunedv1.Tuned) verificationContext { - ginkgo.GinkgoHelper() - - verificationCommand, ok := tuned.Annotations[verifyCommandAnnotation] - gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", verifyCommandAnnotation) - - verificationCommandArgs := []string{} - err := json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") - ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) - - targetTunedPod, err := util.GetTunedForNode(cs, targetNode) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(targetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) - - // gather the output now before the profile is applied so we can check nothing changed - verificationOutput, err := util.ExecCmdInPod(targetTunedPod, verificationCommandArgs...) - if err != nil { - // not available, which is actually a valid state. Let's record it. - verificationOutput = err.Error() - } else { - verificationOutput = strings.TrimSpace(verificationOutput) - } - ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutput)) - - return verificationContext{ - output: verificationOutput, - args: verificationCommandArgs, - targetTunedPod: targetTunedPod, - } -} diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go index 39b9c02c64..47c75b36a3 100644 --- a/test/e2e/deferred/restart.go +++ b/test/e2e/deferred/restart.go @@ -44,15 +44,15 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { createdTuneds = []string{} - dirPath, err = getCurrentDirPath() + dirPath, err = util.GetCurrentDirPath() gomega.Expect(err).ToNot(gomega.HaveOccurred()) tunedPathSHMMNI = filepath.Join(dirPath, tunedSHMMNI) - tunedObjSHMMNI, err = loadTuned(tunedPathSHMMNI) + tunedObjSHMMNI, err = util.LoadTuned(tunedPathSHMMNI) gomega.Expect(err).NotTo(gomega.HaveOccurred()) tunedPathVMLatency = filepath.Join(dirPath, tunedVMLatency) - tunedObjVMLatency, err = loadTuned(tunedPathVMLatency) + tunedObjVMLatency, err = util.LoadTuned(tunedPathVMLatency) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) @@ -72,7 +72,8 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { tunedImmediate := tunedObjVMLatency - verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tunedImmediate) + verifData := util.MustExtractVerificationOutputAndCommand(cs, targetNode, tunedImmediate) + gomega.Expect(verifData.OutputCurrent).ToNot(gomega.Equal(verifData.OutputExpected), "current output %q already matches expected %q", verifData.OutputCurrent, verifData.OutputExpected) tunedMutated := setDeferred(tunedImmediate.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -170,13 +171,13 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { } ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q did not change over pristine status", curProf.Name)) - out, err := util.ExecCmdInPod(targetTunedPod, verifCtx.args...) + out, err := util.ExecCmdInPod(targetTunedPod, verifData.CommandArgs...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verifCtx.output { - return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + if out != verifData.OutputCurrent { + return fmt.Errorf("got: %s; expected: %s", out, verifData.OutputCurrent) } return nil }).WithPolling(10 * time.Second).WithTimeout(2 * time.Minute).Should(gomega.Succeed()) @@ -267,8 +268,8 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { ginkgo.By(fmt.Sprintf("got again the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) gomega.Consistently(func() error { - ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) for _, verif := range verifications { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q: %v -> %s", targetNode.Name, verif.command, verif.output)) out, err := util.ExecCmdInPod(targetTunedPod, verif.command...) if err != nil { return err @@ -293,7 +294,8 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By(fmt.Sprintf("got the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) - verifCtx := mustExtractVerificationOutputAndCommand(targetNode, tunedImmediate) + verifData := util.MustExtractVerificationOutputAndCommand(cs, targetNode, tunedImmediate) + gomega.Expect(verifData.OutputCurrent).ToNot(gomega.Equal(verifData.OutputExpected), "verification output current %q matches expected %q", verifData.OutputCurrent, verifData.OutputExpected) tunedMutated := setDeferred(tunedImmediate.DeepCopy()) ginkgo.By(fmt.Sprintf("creating tuned object %s deferred=%v", tunedMutated.Name, ntoutil.HasDeferredUpdateAnnotation(tunedMutated.Annotations))) @@ -367,8 +369,8 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { ginkgo.By(fmt.Sprintf("got again the tuned pod running on %q: %s/%s %s", targetNode.Name, targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID)) gomega.Consistently(func() error { - ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q", targetNode.Name)) for _, verif := range verifications { + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q: %v -> %s", targetNode.Name, verif.command, verif.output)) out, err := util.ExecCmdInPod(targetTunedPod, verif.command...) if err != nil { return err @@ -405,14 +407,14 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { util.Logf("profile for target node %q does not match expectations about %q: %v", curProf.Name, expectedProfile, err) } - ginkgo.By("checking real node conditions are restored pristine") - out, err := util.ExecCmdInPod(targetTunedPod, verifCtx.args...) + ginkgo.By(fmt.Sprintf("checking real node conditions are restored pristine: %v -> %s", verifData.CommandArgs, verifData.OutputCurrent)) + out, err := util.ExecCmdInPod(targetTunedPod, verifData.CommandArgs...) if err != nil { return err } out = strings.TrimSpace(out) - if out != verifCtx.output { - return fmt.Errorf("got: %s; expected: %s", out, verifCtx.output) + if out != verifData.OutputCurrent { + return fmt.Errorf("got: %s; expected: %s", out, verifData.OutputCurrent) } return nil }).WithPolling(10 * time.Second).WithTimeout(1 * time.Minute).Should(gomega.Succeed()) diff --git a/test/e2e/util/util.go b/test/e2e/util/util.go index 843bede41e..36e85d18d8 100644 --- a/test/e2e/util/util.go +++ b/test/e2e/util/util.go @@ -4,7 +4,10 @@ import ( "bytes" "context" "fmt" + "os" "os/exec" + "path/filepath" + goruntime "runtime" "strings" "time" @@ -21,6 +24,7 @@ import ( tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" + "github.com/openshift/cluster-node-tuning-operator/pkg/manifests" "github.com/openshift/cluster-node-tuning-operator/test/framework" ) @@ -31,6 +35,23 @@ const ( DefaultWorkerProfile = "openshift-node" ) +func LoadTuned(path string) (*tunedv1.Tuned, error) { + src, err := os.Open(path) + if err != nil { + return nil, err + } + defer src.Close() + return manifests.NewTuned(src) +} + +func GetCurrentDirPath() (string, error) { + _, file, _, ok := goruntime.Caller(0) + if !ok { + return "", fmt.Errorf("cannot retrieve tests directory") + } + return filepath.Dir(file), nil +} + // Logf formats using the default formats for its operands and writes to // ginkgo.GinkgoWriter and a newline is appended. It returns the number of // bytes written and any write error encountered. diff --git a/test/e2e/util/verification.go b/test/e2e/util/verification.go new file mode 100644 index 0000000000..b7dc55f9ea --- /dev/null +++ b/test/e2e/util/verification.go @@ -0,0 +1,65 @@ +package util + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" + "github.com/openshift/cluster-node-tuning-operator/test/framework" +) + +const ( + VerificationCommandAnnotation = "verificationCommand" + VerificationOutputAnnotation = "verificationOutput" +) + +type VerificationData struct { + OutputCurrent string + OutputExpected string + CommandArgs []string + TargetTunedPod *corev1.Pod +} + +func MustExtractVerificationOutputAndCommand(cs *framework.ClientSet, targetNode *corev1.Node, tuned *tunedv1.Tuned) VerificationData { + ginkgo.GinkgoHelper() + + verificationCommand, ok := tuned.Annotations[VerificationCommandAnnotation] + gomega.Expect(ok).To(gomega.BeTrue(), "missing verification command annotation %s", VerificationCommandAnnotation) + + verificationCommandArgs := []string{} + err := json.Unmarshal([]byte(verificationCommand), &verificationCommandArgs) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(verificationCommandArgs).ToNot(gomega.BeEmpty(), "missing verification command args") + ginkgo.By(fmt.Sprintf("verification command: %v", verificationCommandArgs)) + + verificationOutputExpected, ok := tuned.Annotations[VerificationOutputAnnotation] + gomega.Expect(ok).To(gomega.BeTrue(), "missing verification output annotation %s", VerificationOutputAnnotation) + ginkgo.By(fmt.Sprintf("verification expected output: %q", verificationOutputExpected)) + + TargetTunedPod, err := GetTunedForNode(cs, targetNode) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(TargetTunedPod.Status.Phase).To(gomega.Equal(corev1.PodRunning)) + + // gather the output now before the profile is applied so we can check nothing changed + verificationOutputCurrent, err := ExecCmdInPod(TargetTunedPod, verificationCommandArgs...) + if err != nil { + // not available, which is actually a valid state. Let's record it. + verificationOutputCurrent = err.Error() + } else { + verificationOutputCurrent = strings.TrimSpace(verificationOutputCurrent) + } + ginkgo.By(fmt.Sprintf("verification current output: %q", verificationOutputCurrent)) + + return VerificationData{ + OutputCurrent: verificationOutputCurrent, + OutputExpected: verificationOutputExpected, + CommandArgs: verificationCommandArgs, + TargetTunedPod: TargetTunedPod, + } +} From c64fcfef2d97e08e7a07ad14e7f4b3b5ab81f39e Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 15 Jul 2024 17:41:43 +0200 Subject: [PATCH 17/19] e2e: basic: rollback test add basic rollback test using the same manifests we do use in deferred tests. This extra test partially overlap with existing one, and will help pinpoint deferred update test failures Signed-off-by: Francesco Romani --- test/e2e/basic/rollback.go | 124 +++++++++++++++++++++++++++++------ test/e2e/deferred/restart.go | 2 +- 2 files changed, 106 insertions(+), 20 deletions(-) diff --git a/test/e2e/basic/rollback.go b/test/e2e/basic/rollback.go index ef4e111329..5f47a9e368 100644 --- a/test/e2e/basic/rollback.go +++ b/test/e2e/basic/rollback.go @@ -3,6 +3,8 @@ package e2e import ( "context" "fmt" + "path/filepath" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -19,27 +21,37 @@ import ( // Test the functionality of the preStop container lifecycle hook -- TuneD settings rollback. var _ = ginkgo.Describe("[basic][rollback] Node Tuning Operator settings rollback", func() { const ( - profileIngress = "../../../examples/ingress.yaml" - podLabelIngress = "tuned.openshift.io/ingress" - sysctlVar = "net.ipv4.tcp_tw_reuse" - sysctlValDef = "2" // default value of 'sysctlVar' + profileSHMMNI = "../testing_manifests/deferred/tuned-basic-00.yaml" + profileIngress = "../../../examples/ingress.yaml" + podLabelIngress = "tuned.openshift.io/ingress" + sysctlTCPTWReuseVar = "net.ipv4.tcp_tw_reuse" + sysctlValDef = "2" // default value of 'sysctlTCPTWReuseVar' ) ginkgo.Context("TuneD settings rollback", func() { var ( - pod *coreapi.Pod + profilePath string + currentDirPath string + pod *coreapi.Pod ) + ginkgo.BeforeEach(func() { + var err error + currentDirPath, err = util.GetCurrentDirPath() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + profilePath = filepath.Join(currentDirPath, profileIngress) + }) + // Cleanup code to roll back cluster changes done by this test even if it fails in the middle of ginkgo.It() ginkgo.AfterEach(func() { ginkgo.By("cluster changes rollback") if pod != nil { util.ExecAndLogCommand("oc", "label", "pod", "--overwrite", "-n", ntoconfig.WatchNamespace(), pod.Name, podLabelIngress+"-") } - util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profileIngress) + util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) }) - ginkgo.It(fmt.Sprintf("%s set", sysctlVar), func() { + ginkgo.It(fmt.Sprintf("%s set", sysctlTCPTWReuseVar), func() { const ( pollInterval = 5 * time.Second waitDuration = 5 * time.Minute @@ -61,20 +73,20 @@ var _ = ginkgo.Describe("[basic][rollback] Node Tuning Operator settings rollbac err = util.WaitForProfileConditionStatus(cs, pollInterval, waitDuration, node.Name, util.GetDefaultWorkerProfile(node), tunedv1.TunedProfileApplied, coreapi.ConditionTrue) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By(fmt.Sprintf("ensuring the default %s value (%s) is set in Pod %s", sysctlVar, sysctlValDef, pod.Name)) - _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlVar, sysctlValDef) + ginkgo.By(fmt.Sprintf("ensuring the default %s value (%s) is set in Pod %s", sysctlTCPTWReuseVar, sysctlValDef, pod.Name)) + _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlTCPTWReuseVar, sysctlValDef) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By(fmt.Sprintf("labelling Pod %s with label %s", pod.Name, podLabelIngress)) _, _, err = util.ExecAndLogCommand("oc", "label", "pod", "--overwrite", "-n", ntoconfig.WatchNamespace(), pod.Name, podLabelIngress+"=") gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By(fmt.Sprintf("creating custom profile %s", profileIngress)) - _, _, err = util.ExecAndLogCommand("oc", "create", "-n", ntoconfig.WatchNamespace(), "-f", profileIngress) + ginkgo.By(fmt.Sprintf("creating custom profile %s", profilePath)) + _, _, err = util.ExecAndLogCommand("oc", "create", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By("ensuring the custom worker node profile was set") - _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlVar, "1") + _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlTCPTWReuseVar, "1") gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By(fmt.Sprintf("deleting Pod %s", pod.Name)) @@ -94,25 +106,99 @@ var _ = ginkgo.Describe("[basic][rollback] Node Tuning Operator settings rollbac gomega.Expect(err).NotTo(gomega.HaveOccurred(), explain) // rollback = not_on_exit in tuned-main.conf file prevents settings rollback at TuneD exit - ginkgo.By(fmt.Sprintf("ensuring the custom %s value (%s) is still set in Pod %s", sysctlVar, "1", pod.Name)) - _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlVar, "1") + ginkgo.By(fmt.Sprintf("ensuring the custom %s value (%s) is still set in Pod %s", sysctlTCPTWReuseVar, "1", pod.Name)) + _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlTCPTWReuseVar, "1") gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By(fmt.Sprintf("deleting custom profile %s", profileIngress)) - _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profileIngress) + ginkgo.By(fmt.Sprintf("deleting custom profile %s", profilePath)) + _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By(fmt.Sprintf("waiting for TuneD profile %s on node %s", util.GetDefaultWorkerProfile(node), node.Name)) err = util.WaitForProfileConditionStatus(cs, pollInterval, waitDuration, node.Name, util.GetDefaultWorkerProfile(node), tunedv1.TunedProfileApplied, coreapi.ConditionTrue) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = util.ExecCmdInPod(pod, "sysctl", fmt.Sprintf("%s=%s", sysctlVar, sysctlValDef)) + _, err = util.ExecCmdInPod(pod, "sysctl", fmt.Sprintf("%s=%s", sysctlTCPTWReuseVar, sysctlValDef)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // sysctl exits 1 when it fails to configure a kernel parameter at runtime - ginkgo.By(fmt.Sprintf("ensuring the default %s value (%s) is set in Pod %s", sysctlVar, sysctlValDef, pod.Name)) - _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlVar, sysctlValDef) + ginkgo.By(fmt.Sprintf("ensuring the default %s value (%s) is set in Pod %s", sysctlTCPTWReuseVar, sysctlValDef, pod.Name)) + _, err = util.WaitForSysctlValueInPod(pollInterval, waitDuration, pod, sysctlTCPTWReuseVar, sysctlValDef) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("TuneD settings rollback without pod restart", func() { + var ( + profilePath string + currentDirPath string + ) + + ginkgo.BeforeEach(func() { + var err error + currentDirPath, err = util.GetCurrentDirPath() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + profilePath = filepath.Join(currentDirPath, profileSHMMNI) + }) + + // Cleanup code to roll back cluster changes done by this test even if it fails in the middle of ginkgo.It() + ginkgo.AfterEach(func() { + ginkgo.By("cluster changes rollback") + util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) + }) + + ginkgo.It("kernel.shmmni set", func() { + const ( + pollInterval = 5 * time.Second + waitDuration = 5 * time.Minute + ) + + tuned, err := util.LoadTuned(profilePath) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("getting a list of worker nodes") + nodes, err := util.GetNodesByRole(cs, "worker") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes)).NotTo(gomega.BeZero(), "number of worker nodes is 0") + + node := &nodes[0] + defaultProfileName := util.GetDefaultWorkerProfile(node) + + // Expect the default worker node profile applied prior to getting any current values. + ginkgo.By(fmt.Sprintf("waiting for TuneD profile %s on node %s", defaultProfileName, node.Name)) + err = util.WaitForProfileConditionStatus(cs, pollInterval, waitDuration, node.Name, defaultProfileName, tunedv1.TunedProfileApplied, coreapi.ConditionTrue) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By(fmt.Sprintf("checking the pristine state on node %s", node.Name)) + // before the test profile is applied, current node state matches the pristine node state + verifData := util.MustExtractVerificationOutputAndCommand(cs, node, tuned) + gomega.Expect(verifData.OutputCurrent).ToNot(gomega.Equal(verifData.OutputExpected), "current pristine output %q already matches expected %q", verifData.OutputCurrent, verifData.OutputExpected) + + ginkgo.By(fmt.Sprintf("creating custom profile %s", profilePath)) + _, _, err = util.ExecAndLogCommand("oc", "create", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By(fmt.Sprintf("waiting for TuneD profile %s on node %s", "test-shmmni", node.Name)) + err = util.WaitForProfileConditionStatus(cs, pollInterval, waitDuration, node.Name, "test-shmmni", tunedv1.TunedProfileApplied, coreapi.ConditionTrue) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("ensuring the custom worker node profile was set") + out, err := util.ExecCmdInPod(verifData.TargetTunedPod, verifData.CommandArgs...) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + out = strings.TrimSpace(out) + gomega.Expect(out).To(gomega.Equal(verifData.OutputExpected), "command %q output %q does not match desired %q", verifData.CommandArgs, out, verifData.OutputExpected) + + ginkgo.By(fmt.Sprintf("deleting custom profile %s", profilePath)) + _, _, err = util.ExecAndLogCommand("oc", "delete", "-n", ntoconfig.WatchNamespace(), "-f", profilePath) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By(fmt.Sprintf("waiting for TuneD profile %s on node %s", defaultProfileName, node.Name)) + err = util.WaitForProfileConditionStatus(cs, pollInterval, waitDuration, node.Name, defaultProfileName, tunedv1.TunedProfileApplied, coreapi.ConditionTrue) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By(fmt.Sprintf("ensuring the pristine state is restored on node %s", node.Name)) + out, err = util.ExecCmdInPod(verifData.TargetTunedPod, verifData.CommandArgs...) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + out = strings.TrimSpace(out) + gomega.Expect(out).To(gomega.Equal(verifData.OutputCurrent), "command %q output %q does not match pristine %q", verifData.CommandArgs, out, verifData.OutputCurrent) }) }) }) diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go index 47c75b36a3..b1062d83b5 100644 --- a/test/e2e/deferred/restart.go +++ b/test/e2e/deferred/restart.go @@ -170,7 +170,7 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { } } - ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q did not change over pristine status", curProf.Name)) + ginkgo.By(fmt.Sprintf("checking real node conditions for profile %q did not change over pristine status using %v", curProf.Name, verifData.CommandArgs)) out, err := util.ExecCmdInPod(targetTunedPod, verifData.CommandArgs...) if err != nil { return err From 426451af8e24552b16c1c339ddbf04b6469f766d Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 16 Jul 2024 17:29:00 +0200 Subject: [PATCH 18/19] makefile: ensure bindata 0_config requires now `pkg/manifests`, so we need to make sure bindata is generated Signed-off-by: Francesco Romani --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index bb9e36fec3..1cfe47508b 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ cluster-deploy-pao: pao-functests: cluster-label-worker-cnf pao-functests-only .PHONY: pao-functests-only -pao-functests-only: +pao-functests-only: $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/1_performance test/e2e/performanceprofile/functests/6_mustgather_testing test/e2e/performanceprofile/functests/10_performance_ppc" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running Functional Tests" @@ -227,7 +227,7 @@ pao-functests-only: pao-functests-updating-profile: cluster-label-worker-cnf pao-functests-update-only .PHONY: pao-functests-update-only -pao-functests-update-only: +pao-functests-update-only: $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/2_performance_update test/e2e/performanceprofile/functests/3_performance_status test/e2e/performanceprofile/functests/7_performance_kubelet_node test/e2e/performanceprofile/functests/9_reboot" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional Tests" @@ -236,25 +236,25 @@ pao-functests-update-only: pao-functests-performance-workloadhints: cluster-label-worker-cnf pao-functests-performance-workloadhints-only .PHONY: pao-functests-performance-workloadhints-only -pao-functests-performance-workloadhints-only: +pao-functests-performance-workloadhints-only: $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/8_performance_workloadhints" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional WorkloadHints Tests" .PHONY: pao-functests-latency-testing -pao-functests-latency-testing: dist-latency-tests +pao-functests-latency-testing: dist-latency-tests $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/5_latency_testing" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functionalconfiguration latency Tests" .PHONY: pao-functests-mixedcpus -pao-functests-mixedcpus: +pao-functests-mixedcpus: $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/11_mixedcpus" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running MixedCPUs Tests" .PHONY: pao-functests-hypershift -pao-functests-hypershift: +pao-functests-hypershift: $(BINDATA) @echo "Cluster Version" hack/show-cluster-version.sh hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config" -p "-vv -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running Functional Tests over Hypershift" From c9d9c789bee3c388dc3616506c6753fc06b0f920 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 17 Jul 2024 11:44:40 +0200 Subject: [PATCH 19/19] e2e: util: rename ready->wait rename the "ready" util package to "ready" and change the function signatures for better readability. Signed-off-by: Francesco Romani --- test/e2e/deferred/restart.go | 8 ++++---- test/e2e/util/{ready => wait}/node.go | 16 +++++++++------- test/e2e/util/{ready => wait}/pod.go | 4 ++-- 3 files changed, 15 insertions(+), 13 deletions(-) rename test/e2e/util/{ready => wait}/node.go (73%) rename test/e2e/util/{ready => wait}/pod.go (84%) diff --git a/test/e2e/deferred/restart.go b/test/e2e/deferred/restart.go index b1062d83b5..552f1b4ff4 100644 --- a/test/e2e/deferred/restart.go +++ b/test/e2e/deferred/restart.go @@ -17,7 +17,7 @@ import ( ntoconfig "github.com/openshift/cluster-node-tuning-operator/pkg/config" ntoutil "github.com/openshift/cluster-node-tuning-operator/pkg/util" "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" - "github.com/openshift/cluster-node-tuning-operator/test/e2e/util/ready" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util/wait" ) var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { @@ -145,7 +145,7 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { if targetTunedPod.UID == oldTunedPodUID { return fmt.Errorf("pod %s/%s not refreshed old UID %q current UID %q", targetTunedPod.Namespace, targetTunedPod.Name, oldTunedPodUID, targetTunedPod.UID) } - if !ready.Pod(*targetTunedPod) { + if !wait.PodReady(*targetTunedPod) { return fmt.Errorf("pod %s/%s (%s) not ready", targetTunedPod.Namespace, targetTunedPod.Name, targetTunedPod.UID) } return nil @@ -239,7 +239,7 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { _, err = util.ExecCmdInPodNamespace(targetMCDPod.Namespace, targetMCDPod.Name, "chroot", "/rootfs", "systemctl", "reboot") gomega.Expect(err).NotTo(gomega.HaveOccurred()) // Very generous timeout. On baremetal a reboot can take a long time - ready.WaitNodeOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) + wait.NodeBecomeReadyOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) gomega.Eventually(func() error { curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) @@ -340,7 +340,7 @@ var _ = ginkgo.Describe("[deferred][restart] Profile deferred", func() { _, err = util.ExecCmdInPodNamespace(targetMCDPod.Namespace, targetMCDPod.Name, "chroot", "/rootfs", "systemctl", "reboot") gomega.Expect(err).NotTo(gomega.HaveOccurred()) // Very generous timeout. On baremetal a reboot can take a long time - ready.WaitNodeOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) + wait.NodeBecomeReadyOrFail(cs, "post-reboot", targetNode.Name, 20*time.Minute, 5*time.Second) gomega.Eventually(func() error { curProf, err := cs.Profiles(ntoconfig.WatchNamespace()).Get(ctx, targetNode.Name, metav1.GetOptions{}) diff --git a/test/e2e/util/ready/node.go b/test/e2e/util/wait/node.go similarity index 73% rename from test/e2e/util/ready/node.go rename to test/e2e/util/wait/node.go index 5e520d3d0d..470845b169 100644 --- a/test/e2e/util/ready/node.go +++ b/test/e2e/util/wait/node.go @@ -1,4 +1,4 @@ -package ready +package wait import ( "context" @@ -14,7 +14,7 @@ import ( "github.com/openshift/cluster-node-tuning-operator/test/framework" ) -func Node(node corev1.Node) bool { +func NodeReady(node corev1.Node) bool { for _, c := range node.Status.Conditions { if c.Type == corev1.NodeReady { return c.Status == corev1.ConditionTrue @@ -23,7 +23,9 @@ func Node(node corev1.Node) bool { return false } -func WaitNodeOrFail(cs *framework.ClientSet, tag, nodeName string, timeout, polling time.Duration) { +// NodeBecomeReadyOrFail aits for node nodeName to change its status condition from NodeReady == false +// to NodeReady == true with timeout timeout and polling interval polling. +func NodeBecomeReadyOrFail(cs *framework.ClientSet, tag, nodeName string, timeout, polling time.Duration) { ginkgo.GinkgoHelper() util.Logf("%s: waiting for node %q: to be NOT-ready", tag, nodeName) @@ -34,10 +36,10 @@ func WaitNodeOrFail(cs *framework.ClientSet, tag, nodeName string, timeout, poll util.Logf("wait for node %q ready: %v", nodeName, err) return false, nil } - ready := Node(*node) + ready := NodeReady(*node) util.Logf("node %q ready=%v", nodeName, ready) return !ready, nil // note "not" - }).WithTimeout(2*time.Minute).WithPolling(polling).Should(gomega.BeTrue(), "post reboot/1: cannot get readiness status after reboot for node %q", nodeName) + }).WithTimeout(2*time.Minute).WithPolling(polling).Should(gomega.BeTrue(), "node unready: cannot get readiness status for node %q", nodeName) util.Logf("%s: waiting for node %q: to be ready", tag, nodeName) gomega.Eventually(func() (bool, error) { @@ -47,10 +49,10 @@ func WaitNodeOrFail(cs *framework.ClientSet, tag, nodeName string, timeout, poll util.Logf("wait for node %q ready: %v", nodeName, err) return false, nil } - ready := Node(*node) + ready := NodeReady(*node) util.Logf("node %q ready=%v", nodeName, ready) return ready, nil - }).WithTimeout(timeout).WithPolling(polling).Should(gomega.BeTrue(), "post reboot/2: cannot get readiness status after reboot for node %q", nodeName) + }).WithTimeout(timeout).WithPolling(polling).Should(gomega.BeTrue(), "node ready cannot get readiness status for node %q", nodeName) util.Logf("%s: node %q: reported ready", tag, nodeName) } diff --git a/test/e2e/util/ready/pod.go b/test/e2e/util/wait/pod.go similarity index 84% rename from test/e2e/util/ready/pod.go rename to test/e2e/util/wait/pod.go index 2717dec6c5..002fb5eed2 100644 --- a/test/e2e/util/ready/pod.go +++ b/test/e2e/util/wait/pod.go @@ -1,10 +1,10 @@ -package ready +package wait import ( corev1 "k8s.io/api/core/v1" ) -func Pod(pod corev1.Pod) bool { +func PodReady(pod corev1.Pod) bool { if pod.Status.Phase != corev1.PodRunning { return false }