Skip to content

Commit 06bb3a7

Browse files
knative-prow-robotSaschaSchwarze0HeavyWombat
authored
Ensure ContainerHealthy condition is set back to True (#15712)
Signed-off-by: Sascha Schwarze <[email protected]> Co-authored-by: Sascha Schwarze <[email protected]> Co-authored-by: Matthias Diester <[email protected]>
1 parent b17688f commit 06bb3a7

File tree

5 files changed

+124
-7
lines changed

5 files changed

+124
-7
lines changed

pkg/reconciler/autoscaling/hpa/hpa_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ func TestReconcile(t *testing.T) {
331331
Name: "nop deletion reconcile",
332332
// Test that with a DeletionTimestamp we do nothing.
333333
Objects: []runtime.Object{
334-
pa(testNamespace, testRevision, WithHPAClass, WithPADeletionTimestamp),
334+
WithDeletionTimestamp(pa(testNamespace, testRevision, WithHPAClass)),
335335
deploy(testNamespace, testRevision),
336336
},
337337
Key: key(testNamespace, testRevision),

pkg/reconciler/revision/reconcile_resources.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,10 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)
7676

7777
// If a container keeps crashing (no active pods in the deployment although we want some)
7878
if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 {
79-
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)})
79+
pods, err := c.kubeclient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{
80+
LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector),
81+
Limit: 1,
82+
})
8083
if err != nil {
8184
logger.Errorw("Error getting pods", zap.Error(err))
8285
return nil
@@ -117,6 +120,10 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)
117120
}
118121
}
119122

123+
if deployment.Status.ReadyReplicas > 0 {
124+
rev.Status.MarkContainerHealthyTrue()
125+
}
126+
120127
return nil
121128
}
122129

pkg/reconciler/revision/table_test.go

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
tracingconfig "knative.dev/pkg/tracing/config"
4242
autoscalingv1alpha1 "knative.dev/serving/pkg/apis/autoscaling/v1alpha1"
4343
defaultconfig "knative.dev/serving/pkg/apis/config"
44+
"knative.dev/serving/pkg/apis/serving"
4445
v1 "knative.dev/serving/pkg/apis/serving/v1"
4546
"knative.dev/serving/pkg/autoscaler/config/autoscalerconfig"
4647
servingclient "knative.dev/serving/pkg/client/injection/client"
@@ -544,7 +545,7 @@ func TestReconcile(t *testing.T) {
544545
WithRoutingState(v1.RoutingStateActive, fc),
545546
),
546547
pa("foo", "pull-backoff", WithReachabilityReachable), // pa can't be ready since deployment times out.
547-
pod(t, "foo", "pull-backoff", WithWaitingContainer("pull-backoff", "ImagePullBackoff", "can't pull it")),
548+
pod(t, "foo", "pull-backoff", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithWaitingContainer("pull-backoff", "ImagePullBackoff", "can't pull it")),
548549
timeoutDeploy(deploy(t, "foo", "pull-backoff"), "Timed out!"),
549550
image("foo", "pull-backoff"),
550551
},
@@ -570,7 +571,7 @@ func TestReconcile(t *testing.T) {
570571
WithRoutingState(v1.RoutingStateActive, fc),
571572
WithLogURL, allUnknownConditions, MarkActive),
572573
pa("foo", "pod-error", WithReachabilityReachable), // PA can't be ready, since no traffic.
573-
pod(t, "foo", "pod-error", WithFailingContainer("pod-error", 5, "I failed man!")),
574+
pod(t, "foo", "pod-error", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithFailingContainer("pod-error", 5, "I failed man!")),
574575
deploy(t, "foo", "pod-error"),
575576
image("foo", "pod-error"),
576577
},
@@ -742,6 +743,60 @@ func TestReconcile(t *testing.T) {
742743
PodSpecPersistentVolumeClaim: defaultconfig.Enabled,
743744
PodSpecPersistentVolumeWrite: defaultconfig.Enabled,
744745
}}),
746+
}, {
747+
Name: "revision's ContainerHealthy turns back to True if the deployment is healthy",
748+
Objects: []runtime.Object{
749+
Revision("foo", "container-unhealthy",
750+
WithLogURL,
751+
MarkRevisionReady,
752+
withDefaultContainerStatuses(),
753+
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
754+
MarkContainerHealthyFalse("ExitCode137"),
755+
),
756+
pa("foo", "container-unhealthy",
757+
WithPASKSReady,
758+
WithScaleTargetInitialized,
759+
WithTraffic,
760+
WithReachabilityReachable,
761+
WithPAStatusService("something"),
762+
),
763+
readyDeploy(deploy(t, "foo", "container-unhealthy", withReplicas(1))),
764+
image("foo", "container-unhealthy"),
765+
},
766+
Key: "foo/container-unhealthy",
767+
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
768+
Object: Revision("foo", "container-unhealthy",
769+
WithLogURL,
770+
MarkRevisionReady,
771+
withDefaultContainerStatuses(),
772+
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
773+
),
774+
}},
775+
WantEvents: []string{
776+
Eventf(corev1.EventTypeNormal, "RevisionReady", "Revision becomes ready upon all resources being ready"),
777+
},
778+
}, {
779+
Name: "revision's ContainerHealthy stays True even if the last Pod with restarts terminates",
780+
Objects: []runtime.Object{
781+
Revision("foo", "container-healthy",
782+
WithLogURL,
783+
MarkRevisionReady,
784+
withDefaultContainerStatuses(),
785+
WithRevisionLabel(serving.RoutingStateLabelKey, "active"),
786+
MarkContainerHealthyTrue(),
787+
),
788+
pa("foo", "container-healthy",
789+
WithPASKSReady,
790+
WithScaleTargetInitialized,
791+
WithTraffic,
792+
WithReachabilityReachable,
793+
WithPAStatusService("something"),
794+
),
795+
WithDeletionTimestamp(pod(t, "foo", "container-healthy", WithPodCondition(corev1.PodReady, corev1.ConditionFalse, "ContainersNotReady"), WithFailingContainer("crashed-at-some-point", 128, "OOMKilled"))),
796+
readyDeploy(deploy(t, "foo", "container-healthy", withReplicas(0))),
797+
image("foo", "container-healthy"),
798+
},
799+
Key: "foo/container-healthy",
745800
}}
746801

747802
table.Test(t, MakeFactory(func(ctx context.Context, listers *Listers, _ configmap.Watcher) controller.Reconciler {
@@ -851,6 +906,19 @@ func allUnknownConditions(r *v1.Revision) {
851906

852907
type configOption func(*config.Config)
853908

909+
type deploymentOption func(*appsv1.Deployment)
910+
911+
// withReplicas configures the number of replicas on the Deployment
912+
func withReplicas(replicas int32) deploymentOption {
913+
return func(d *appsv1.Deployment) {
914+
d.Spec.Replicas = &replicas
915+
d.Status.AvailableReplicas = replicas
916+
d.Status.ReadyReplicas = replicas
917+
d.Status.Replicas = replicas
918+
d.Status.UpdatedReplicas = replicas
919+
}
920+
}
921+
854922
func deploy(t *testing.T, namespace, name string, opts ...interface{}) *appsv1.Deployment {
855923
t.Helper()
856924
cfg := reconcilerTestConfig()
@@ -876,6 +944,13 @@ func deploy(t *testing.T, namespace, name string, opts ...interface{}) *appsv1.D
876944
if err != nil {
877945
t.Fatal("failed to create deployment")
878946
}
947+
948+
for _, opt := range opts {
949+
if deploymentOpt, ok := opt.(deploymentOption); ok {
950+
deploymentOpt(deployment)
951+
}
952+
}
953+
879954
return deployment
880955
}
881956

pkg/testing/functional.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,11 @@ func WithNoTraffic(reason, message string) PodAutoscalerOption {
132132
}
133133
}
134134

135-
// WithPADeletionTimestamp will set the DeletionTimestamp on the PodAutoscaler.
136-
func WithPADeletionTimestamp(r *autoscalingv1alpha1.PodAutoscaler) {
135+
// WithDeletionTimestamp will set the DeletionTimestamp on the object.
136+
func WithDeletionTimestamp[T metav1.Object](obj T) T {
137137
t := metav1.NewTime(time.Unix(1e9, 0))
138-
r.ObjectMeta.SetDeletionTimestamp(&t)
138+
obj.SetDeletionTimestamp(&t)
139+
return obj
139140
}
140141

141142
// WithHPAClass updates the PA to add the hpa class annotation.
@@ -288,6 +289,25 @@ func WithEndpointsOwnersRemoved(eps *corev1.Endpoints) {
288289
// PodOption enables further configuration of a Pod.
289290
type PodOption func(*corev1.Pod)
290291

292+
// WithPodCondition sets a condition in the status
293+
func WithPodCondition(conditionType corev1.PodConditionType, status corev1.ConditionStatus, reason string) PodOption {
294+
return func(pod *corev1.Pod) {
295+
for i, condition := range pod.Status.Conditions {
296+
if condition.Type == conditionType {
297+
pod.Status.Conditions[i].Status = status
298+
pod.Status.Conditions[i].Reason = reason
299+
return
300+
}
301+
}
302+
303+
pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{
304+
Type: conditionType,
305+
Status: status,
306+
Reason: reason,
307+
})
308+
}
309+
}
310+
291311
// WithFailingContainer sets the .Status.ContainerStatuses on the pod to
292312
// include a container named accordingly to fail with the given state.
293313
func WithFailingContainer(name string, exitCode int, message string) PodOption {

pkg/testing/v1/revision.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,27 @@ func MarkDeploying(reason string) RevisionOption {
152152
}
153153
}
154154

155+
// MarkContainerHealthyUnknown changes the ContainerHealthy condition to Unknown with the given reason
155156
func MarkContainerHealthyUnknown(reason string) RevisionOption {
156157
return func(r *v1.Revision) {
157158
r.Status.MarkContainerHealthyUnknown(reason, "")
158159
}
159160
}
160161

162+
// MarkContainerHealthyFalse changes the ContainerHealthy condition to False with the given reason
163+
func MarkContainerHealthyFalse(reason string) RevisionOption {
164+
return func(r *v1.Revision) {
165+
r.Status.MarkContainerHealthyFalse(reason, "")
166+
}
167+
}
168+
169+
// MarkContainerHealthyTrue changes the ContainerHealthy condition to True
170+
func MarkContainerHealthyTrue() RevisionOption {
171+
return func(r *v1.Revision) {
172+
r.Status.MarkContainerHealthyTrue()
173+
}
174+
}
175+
161176
// MarkProgressDeadlineExceeded calls the method of the same name on the Revision
162177
// with the message we expect the Revision Reconciler to pass.
163178
func MarkProgressDeadlineExceeded(message string) RevisionOption {

0 commit comments

Comments
 (0)