diff --git a/workflow/controller/operator_agent_test.go b/workflow/controller/operator_agent_test.go index 399ebcf594b3..c35f83c2dc9c 100644 --- a/workflow/controller/operator_agent_test.go +++ b/workflow/controller/operator_agent_test.go @@ -87,3 +87,53 @@ func TestHTTPTemplateWithoutServiceAccount(t *testing.T) { assert.Equal(t, `create agent pod failed with reason:"failed to get token volumes: serviceaccounts "default" not found"`, woc.wf.Status.Nodes["hello-world"].Message) }) } + +func TestHTTPTemplateWhenAgentPodFailed(t *testing.T) { + wf := wfv1.MustUnmarshalWorkflow(httpwf) + cancel, controller := newController(wf, defaultServiceAccount) + defer cancel() + + t.Run("ExecuteHTTPTemplateWhenAgentPodFailed", func(t *testing.T) { + ctx := context.Background() + woc := newWorkflowOperationCtx(wf, controller) + woc.operate(ctx) + pod, err := controller.kubeclientset.CoreV1().Pods(woc.wf.Namespace).Get(ctx, woc.getAgentPodName(), metav1.GetOptions{}) + require.NoError(t, err) + assert.NotNil(t, pod) + ts, err := controller.wfclientset.ArgoprojV1alpha1().WorkflowTaskSets(wf.Namespace).Get(ctx, "hello-world", metav1.GetOptions{}) + require.NoError(t, err) + assert.NotNil(t, ts) + assert.Len(t, ts.Spec.Tasks, 1) + ts.Status.Nodes = make(map[string]wfv1.NodeResult) + ts.Status.Nodes["hello-world"] = wfv1.NodeResult{ + Phase: wfv1.NodePending, + Message: "Queuing", + } + ts, err = controller.wfclientset.ArgoprojV1alpha1().WorkflowTaskSets(wf.Namespace).UpdateStatus(ctx, ts, metav1.UpdateOptions{}) + require.NoError(t, err) + assert.Equal(t, wfv1.NodePending, ts.Status.Nodes["hello-world"].Phase) + wf, err = controller.wfclientset.ArgoprojV1alpha1().Workflows(wf.Namespace).Get(ctx, "hello-world", metav1.GetOptions{}) + require.NoError(t, err) + + // simulate agent pod failure scenario + pod.Status.Phase = v1.PodFailed + pod.Status.Message = "manual termination" + pod, err = controller.kubeclientset.CoreV1().Pods(woc.wf.Namespace).UpdateStatus(ctx, pod, metav1.UpdateOptions{}) + require.NoError(t, err) + assert.Equal(t, v1.PodFailed, pod.Status.Phase) + // sleep 1 second to wait for informer getting pod info + time.Sleep(time.Second) + woc = newWorkflowOperationCtx(wf, controller) + woc.operate(ctx) + assert.Equal(t, wfv1.WorkflowError, woc.wf.Status.Phase) + assert.Equal(t, `agent pod failed with reason:"manual termination"`, woc.wf.Status.Message) + assert.Len(t, woc.wf.Status.Nodes, 1) + assert.Equal(t, wfv1.NodeError, woc.wf.Status.Nodes["hello-world"].Phase) + assert.Equal(t, `agent pod failed with reason:"manual termination"`, woc.wf.Status.Nodes["hello-world"].Message) + ts, err = controller.wfclientset.ArgoprojV1alpha1().WorkflowTaskSets(wf.Namespace).Get(ctx, "hello-world", metav1.GetOptions{}) + require.NoError(t, err) + assert.NotNil(t, ts) + assert.Empty(t, ts.Spec.Tasks) + assert.Empty(t, ts.Status.Nodes) + }) +} diff --git a/workflow/controller/taskset.go b/workflow/controller/taskset.go index de244c139f47..1f8748a2877a 100644 --- a/workflow/controller/taskset.go +++ b/workflow/controller/taskset.go @@ -138,6 +138,10 @@ func (woc *wfOperationCtx) reconcileTaskSet(ctx context.Context) error { return err } + if len(woc.taskSet) == 0 { + return nil + } + woc.log.Info("TaskSet Reconciliation") if workflowTaskSet != nil && len(workflowTaskSet.Status.Nodes) > 0 { for nodeID, taskResult := range workflowTaskSet.Status.Nodes { diff --git a/workflow/controller/taskset_test.go b/workflow/controller/taskset_test.go index dd54ced0a8de..72c5522da0a4 100644 --- a/workflow/controller/taskset_test.go +++ b/workflow/controller/taskset_test.go @@ -400,8 +400,7 @@ status: require.NoError(t, err) woc := newWorkflowOperationCtx(wf, controller) time.Sleep(1 * time.Second) - err = woc.reconcileTaskSet(ctx) - require.NoError(t, err) + woc.operate(ctx) memo, err := controller.kubeclientset.CoreV1().ConfigMaps("default").Get(ctx, "cache-demo-1", v1.GetOptions{}) require.NoError(t, err) assert.NotEmpty(t, memo.Data["cache-demo-1"])