Skip to content

Commit

Permalink
Merge branch 'release/v3.0.5' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
jelemux authored and cesmarvin committed Nov 19, 2024
2 parents 82c48d9 + aa4faf0 commit 6130c7a
Show file tree
Hide file tree
Showing 27 changed files with 1,552 additions and 1,077 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [v3.0.5] - 2024-11-19
### Fixed
- [#113] Use retry watchers for wait steps and thus fix a bug where wait steps for component installations got canceled.

## [v3.0.4] - 2024-11-18
### Fixed
- [#115] Update remote dogu descriptor lib to avoid a nil pointer when recoverable errors occur.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RUN make compile-generic
FROM gcr.io/distroless/static:nonroot
LABEL maintainer="[email protected]" \
NAME="k8s-ces-setup" \
VERSION="3.0.4"
VERSION="3.0.5"

WORKDIR /

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Set these to the desired values
ARTIFACT_ID=k8s-ces-setup
VERSION=3.0.4
VERSION=3.0.5

GOTAG?=1.23.2
MAKEFILES_VERSION=9.3.2
Expand Down
5 changes: 0 additions & 5 deletions app/setup/component/installComponentStep.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@ package component
import (
"context"
"fmt"
"github.com/cloudogu/k8s-component-operator/pkg/api/ecosystem"
v1 "github.com/cloudogu/k8s-component-operator/pkg/api/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

appcontext "github.com/cloudogu/k8s-ces-setup/app/context"
)

type componentsClient interface {
ecosystem.ComponentInterface
}

type installComponentStep struct {
client componentsClient
componentName string
Expand Down
7 changes: 7 additions & 0 deletions app/setup/component/interfaces.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package component

import "github.com/cloudogu/k8s-component-operator/pkg/api/ecosystem"

type componentsClient interface {
ecosystem.ComponentInterface
}
137 changes: 122 additions & 15 deletions app/setup/component/waitForComponentStep.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,46 @@ package component
import (
"context"
"fmt"
"github.com/cloudogu/k8s-ces-setup/app/setup/dogus"
v1 "github.com/cloudogu/k8s-component-operator/pkg/api/v1"
"github.com/cloudogu/retry-lib/retry"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch"
retrywatch "k8s.io/client-go/tools/watch"
"os"
"time"
)

const (
v1LabelK8sComponent = "app.kubernetes.io/name"
)

// DefaultComponentWaitTimeOut30Minutes is the default timeout.
// Since the components are actual applied unordered we use a 30 Minutes timeout as default.
var DefaultComponentWaitTimeOut30Minutes = time.Second * 1800

// componentTimeOutInSecondsEnvVar contains the name of the environment variable that may replace the default component wait timeout.
// An environment variable with this name must contain the seconds as reasonably sized integer (=< int64)
const componentTimeOutInSecondsEnvVar = "COMPONENT_TIMEOUT_SECS"

type waitForComponentStep struct {
client componentsClient
labelSelector string
namespace string
componentName string
timeout time.Duration
}

// NewWaitForComponentStep creates a new setup step which on waits for a component with a specific label
func NewWaitForComponentStep(client componentsClient, labelSelector string, namespace string) *waitForComponentStep {
func NewWaitForComponentStep(client componentsClient, componentName string, namespace string, timeout time.Duration) *waitForComponentStep {
return &waitForComponentStep{
client: client,
labelSelector: labelSelector,
namespace: namespace,
componentName: componentName,
labelSelector: CreateComponentLabelSelector(componentName),
timeout: timeout,
}
}

Expand All @@ -29,29 +53,112 @@ func (wfcs *waitForComponentStep) GetStepDescription() string {

// PerformSetupStep implements all actions in this step
func (wfcs *waitForComponentStep) PerformSetupStep(ctx context.Context) error {
return wfcs.isComponentReady(ctx)
timeoutCtx, cancel := context.WithTimeout(ctx, wfcs.timeout)
defer cancel()
return wfcs.isComponentReady(timeoutCtx)
}

// isComponentReady does a watch on a component and returns nil if the component is installed
// isComponentStatusReady does a watch on a component and returns nil if the component is installed
func (wfcs *waitForComponentStep) isComponentReady(ctx context.Context) error {
watch, err := wfcs.client.Watch(ctx, metav1.ListOptions{LabelSelector: wfcs.labelSelector})
var get *v1.Component
err := retry.OnErrorWithLimit(wfcs.timeout, errors.IsNotFound, func() error {
var getErr error
get, getErr = wfcs.client.Get(ctx, wfcs.componentName, metav1.GetOptions{})
if getErr != nil && !errors.IsNotFound(getErr) {
return fmt.Errorf("failed to get initial component cr %q: %w", wfcs.componentName, getErr)
}

return getErr
})

if err != nil {
return err
}

if isComponentStatusReady(get) {
return nil
}

watcher := componentReadyWatcher{client: wfcs.client, componentName: wfcs.componentName, labelSelector: wfcs.labelSelector}
_, err = retrywatch.Until(ctx, get.ResourceVersion, watcher, watcher.checkComponentStatus)
if err != nil {
return fmt.Errorf("failed to wait for component with label %q with retry watch: %w", wfcs.labelSelector, err)
}

return nil
}

type componentReadyWatcher struct {
client componentsClient
componentName string
labelSelector string
}

// Watch creates a watch for the component defined in this step.
// This function will be called initially and on every retry if the watch gets canceled from a recoverable error.
func (crw componentReadyWatcher) Watch(options metav1.ListOptions) (watch.Interface, error) {
logrus.Debugf("creating initial or retry watch for component %q", crw.componentName)
options.LabelSelector = crw.labelSelector
w, err := crw.client.Watch(context.Background(), options)
if err != nil {
return fmt.Errorf("failed to create watch on component: %w", err)
return nil, fmt.Errorf("failed to create watch for label %q: %w", crw.labelSelector, err)
}

for event := range watch.ResultChan() {
return w, nil
}

// checkComponentStatus is a condition function that will be called on every watch event received from the retry watcher.
// If it returns true, nil the watch will end.
// If it returns false, nil the watch will continue and check further events.
// If it returns and error the watch will end and don't retry.
func (crw componentReadyWatcher) checkComponentStatus(event watch.Event) (bool, error) {
logrus.Debugf("received %q watch event for checking component ready status", event.Type)
switch event.Type {
case watch.Error:
status, ok := event.Object.(*metav1.Status)
if !ok {
return false, fmt.Errorf("failed to cast event object to status")
} else {
return false, fmt.Errorf("watch error message: %q, reason: %q", status.Message, status.Reason)
}
case watch.Added, watch.Modified:
component, ok := event.Object.(*v1.Component)
if !ok {
watch.Stop()
return fmt.Errorf("error wait for component: failed to cast event to component: selector=[%s] type=[%s]; object=[%+v]",
wfcs.labelSelector, event.Type, event.Object)
logrus.Errorf("failed to cast event object to component: selector=[%s] type=[%s]; object=[%+v]", crw.labelSelector, event.Type, event.Object)
return false, nil
}

if component.Status.Status == v1.ComponentStatusInstalled && component.Status.Health == v1.AvailableHealthStatus {
watch.Stop()
return nil
if isComponentStatusReady(component) {
return true, nil
}
return false, nil
case watch.Deleted:
return false, fmt.Errorf("abort watch because of component deletion")
default:
return false, nil
}
}

func isComponentStatusReady(component *v1.Component) bool {
if component.Status.Status == v1.ComponentStatusInstalled && component.Status.Health == v1.AvailableHealthStatus {
logrus.Infof("component %q is installed and available", component.Spec.Name)
return true
}
logrus.Debugf("component %q is not installed and not available", component.Spec.Name)
return false
}

func CreateComponentLabelSelector(name string) string {
return fmt.Sprintf("%s=%s", v1LabelK8sComponent, name)
}

// TimeoutInSeconds returns either DefaultComponentWaitTimeOut30Minutes or a positive integer if set as EnvVar
// COMPONENT_TIMEOUT_SECS. See also componentTimeOutInSecondsEnvVar
func TimeoutInSeconds() time.Duration {
defaultTimeout := DefaultComponentWaitTimeOut30Minutes
if podTimeoutRaw, ok := os.LookupEnv(componentTimeOutInSecondsEnvVar); ok {
logrus.Infof("Custom component timeout found")
return dogus.ParseTimeoutString(podTimeoutRaw, defaultTimeout)
}

return fmt.Errorf("component is not ready: watch for component aborted")
return defaultTimeout
}
Loading

0 comments on commit 6130c7a

Please sign in to comment.