Skip to content

Commit 6c9df33

Browse files
committed
Just delete daemonset pods
Signed-off-by: naoki-take <[email protected]>
1 parent 131033c commit 6c9df33

File tree

4 files changed

+53
-73
lines changed

4 files changed

+53
-73
lines changed

op/reboot.go

Lines changed: 24 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func (c rebootDrainStartCommand) Run(ctx context.Context, inf cke.Infrastructure
203203

204204
//
205205

206-
type rebootEvictDaemonSetPodOp struct {
206+
type rebootDeleteDaemonSetPodOp struct {
207207
finished bool
208208

209209
entries []*cke.RebootQueueEntry
@@ -214,118 +214,90 @@ type rebootEvictDaemonSetPodOp struct {
214214
failedNodes []string
215215
}
216216

217-
func RebootEvictDaemonSetPodOp(apiserver *cke.Node, entries []*cke.RebootQueueEntry, config *cke.Reboot) cke.InfoOperator {
218-
return &rebootEvictDaemonSetPodOp{
217+
func RebootDeleteDaemonSetPodOp(apiserver *cke.Node, entries []*cke.RebootQueueEntry, config *cke.Reboot) cke.InfoOperator {
218+
return &rebootDeleteDaemonSetPodOp{
219219
entries: entries,
220220
config: config,
221221
apiserver: apiserver,
222222
}
223223
}
224224

225-
type rebootEvictDaemonSetPodCommand struct {
226-
entries []*cke.RebootQueueEntry
227-
protectedNamespaces *metav1.LabelSelector
228-
apiserver *cke.Node
229-
evictAttempts int
230-
evictInterval time.Duration
225+
type rebootDeleteDaemonSetPodCommand struct {
226+
entries []*cke.RebootQueueEntry
227+
apiserver *cke.Node
231228

232229
notifyFailedNode func(string)
233230
}
234231

235-
func (o *rebootEvictDaemonSetPodOp) Name() string {
236-
return "reboot-evict-daemonset-pod"
232+
func (o *rebootDeleteDaemonSetPodOp) Name() string {
233+
return "reboot-delete-daemonset-pod"
237234
}
238235

239-
func (o *rebootEvictDaemonSetPodOp) notifyFailedNode(node string) {
236+
func (o *rebootDeleteDaemonSetPodOp) notifyFailedNode(node string) {
240237
o.mu.Lock()
241238
o.failedNodes = append(o.failedNodes, node)
242239
o.mu.Unlock()
243240
}
244241

245-
func (o *rebootEvictDaemonSetPodOp) Targets() []string {
242+
func (o *rebootDeleteDaemonSetPodOp) Targets() []string {
246243
ipAddresses := make([]string, len(o.entries))
247244
for i, entry := range o.entries {
248245
ipAddresses[i] = entry.Node
249246
}
250247
return ipAddresses
251248
}
252249

253-
func (o *rebootEvictDaemonSetPodOp) Info() string {
250+
func (o *rebootDeleteDaemonSetPodOp) Info() string {
254251
if len(o.failedNodes) == 0 {
255252
return ""
256253
}
257-
return fmt.Sprintf("failed to evict DaemonSet pods on some nodes: %v", o.failedNodes)
254+
return fmt.Sprintf("failed to delete DaemonSet pods on some nodes: %v", o.failedNodes)
258255
}
259256

260-
func (o *rebootEvictDaemonSetPodOp) NextCommand() cke.Commander {
257+
func (o *rebootDeleteDaemonSetPodOp) NextCommand() cke.Commander {
261258
if o.finished {
262259
return nil
263260
}
264261
o.finished = true
265262

266-
attempts := 1
267-
if o.config.EvictRetries != nil {
268-
attempts = *o.config.EvictRetries + 1
269-
}
270-
interval := 0 * time.Second
271-
if o.config.EvictInterval != nil {
272-
interval = time.Second * time.Duration(*o.config.EvictInterval)
273-
}
274-
275-
return rebootEvictDaemonSetPodCommand{
276-
entries: o.entries,
277-
protectedNamespaces: o.config.ProtectedNamespaces,
278-
apiserver: o.apiserver,
279-
notifyFailedNode: o.notifyFailedNode,
280-
evictAttempts: attempts,
281-
evictInterval: interval,
263+
return rebootDeleteDaemonSetPodCommand{
264+
entries: o.entries,
265+
apiserver: o.apiserver,
266+
notifyFailedNode: o.notifyFailedNode,
282267
}
283268
}
284269

285-
func (c rebootEvictDaemonSetPodCommand) Command() cke.Command {
270+
func (c rebootDeleteDaemonSetPodCommand) Command() cke.Command {
286271
ipAddresses := make([]string, len(c.entries))
287272
for i, entry := range c.entries {
288273
ipAddresses[i] = entry.Node
289274
}
290275
return cke.Command{
291-
Name: "rebootEvictDaemonSetPodCommand",
276+
Name: "rebootDeleteDaemonSetPodCommand",
292277
Target: strings.Join(ipAddresses, ","),
293278
}
294279
}
295280

296-
func (c rebootEvictDaemonSetPodCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error {
281+
func (c rebootDeleteDaemonSetPodCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error {
297282
cs, err := inf.K8sClient(ctx, c.apiserver)
298283
if err != nil {
299284
return err
300285
}
301286

302-
protected, err := listProtectedNamespaces(ctx, cs, c.protectedNamespaces)
303-
if err != nil {
304-
return err
305-
}
306-
307-
// evict DaemonSet pod on each node
308-
// cordon is unnecessary for DaemonSet pods, so dry-run eviction is also skipped.
287+
// delete DaemonSet pod on each node
309288
for _, entry := range c.entries {
310289
// keep entry.Status as RebootStatusDraining and don't update it here.
311290

312-
log.Info("start eviction of DaemonSet pod", map[string]interface{}{
291+
log.Info("start deletion of DaemonSet pod", map[string]interface{}{
313292
"name": entry.Node,
314293
})
315-
err := evictOrDeleteOnDeleteDaemonSetPod(ctx, cs, entry.Node, protected, c.evictAttempts, c.evictInterval)
294+
err := deleteOnDeleteDaemonSetPod(ctx, cs, entry.Node)
316295
if err != nil {
317-
log.Warn("eviction of DaemonSet pod failed", map[string]interface{}{
296+
log.Warn("deletion of DaemonSet pod failed", map[string]interface{}{
318297
"name": entry.Node,
319298
log.FnError: err,
320299
})
321300
c.notifyFailedNode(entry.Node)
322-
err = drainBackOff(ctx, inf, entry, err)
323-
if err != nil {
324-
return err
325-
}
326-
log.Info("eviction of DaemonSet pod succeeded", map[string]interface{}{
327-
"name": entry.Node,
328-
})
329301
}
330302
}
331303

op/reboot_decide.go

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -101,44 +101,35 @@ func enumerateOnDeleteDaemonSetPods(ctx context.Context, cs *kubernetes.Clientse
101101
// dryRunEvictOrDeleteNodePod checks eviction or deletion of Pods on the specified Node can proceed.
102102
// It returns an error if a running Pod exists or an eviction of the Pod in protected namespace failed.
103103
func dryRunEvictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool) error {
104-
return enumeratePods(ctx, cs, node,
105-
doEvictOrDeleteNodePod(ctx, cs, node, protected, 0, time.Duration(0), true),
106-
func(pod *corev1.Pod) error {
107-
return fmt.Errorf("job-managed pod exists: %s/%s, phase=%s", pod.Namespace, pod.Name, pod.Status.Phase)
108-
},
109-
)
104+
return doEvictOrDeleteNodePod(ctx, cs, node, protected, 0, 0, true)
110105
}
111106

112107
// evictOrDeleteNodePod evicts or delete Pods on the specified Node.
113108
// If a running Job Pod exists, this function returns an error.
114109
func evictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool, attempts int, interval time.Duration) error {
115-
return enumeratePods(ctx, cs, node,
116-
doEvictOrDeleteNodePod(ctx, cs, node, protected, attempts, interval, false),
117-
func(pod *corev1.Pod) error {
118-
return fmt.Errorf("job-managed pod exists: %s/%s, phase=%s", pod.Namespace, pod.Name, pod.Status.Phase)
119-
},
120-
)
110+
return doEvictOrDeleteNodePod(ctx, cs, node, protected, attempts, interval, false)
121111
}
122112

123-
// evictOrDeleteOnDeleteDaemonSetPod evicts or delete Pods on the specified Node that are owned by "updateStrategy:OnDelete" DaemonSets.
124-
func evictOrDeleteOnDeleteDaemonSetPod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool, attempts int, interval time.Duration) error {
125-
return enumerateOnDeleteDaemonSetPods(ctx, cs, node, doEvictOrDeleteNodePod(ctx, cs, node, protected, attempts, interval, false))
113+
// deleteOnDeleteDaemonSetPod evicts or delete Pods on the specified Node that are owned by "updateStrategy:OnDelete" DaemonSets.
114+
func deleteOnDeleteDaemonSetPod(ctx context.Context, cs *kubernetes.Clientset, node string) error {
115+
return doDeleteOnDeleteDaemonSetPod(ctx, cs, node)
126116
}
127117

128-
// doEvictOrDeleteNodePod returns a pod handler that evicts or delete Pods on the specified Node.
118+
// doEvictOrDeleteNodePod evicts or delete Pods on the specified Node.
129119
// It first tries eviction.
130120
// If the eviction failed and the Pod's namespace is not protected, it deletes the Pod.
131121
// If the eviction failed and the Pod's namespace is protected, it retries after `interval` interval at most `attempts` times.
122+
// If a running Job Pod exists, this function returns an error.
132123
// If `dry` is true, it performs dry run and `attempts` and `interval` are ignored.
133-
func doEvictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool, attempts int, interval time.Duration, dry bool) func(pod *corev1.Pod) error {
124+
func doEvictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool, attempts int, interval time.Duration, dry bool) error {
134125
var deleteOptions *metav1.DeleteOptions
135126
if dry {
136127
deleteOptions = &metav1.DeleteOptions{
137128
DryRun: []string{"All"},
138129
}
139130
}
140131

141-
return func(pod *corev1.Pod) error {
132+
return enumeratePods(ctx, cs, node, func(pod *corev1.Pod) error {
142133
if dry && !protected[pod.Namespace] {
143134
// in case of dry-run for Pods in non-protected namespace,
144135
// return immediately because its "eviction or deletion" never fails
@@ -209,7 +200,24 @@ func doEvictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node
209200
return fmt.Errorf("failed to evict pod %s/%s due to PDB: %w", pod.Namespace, pod.Name, err)
210201
}
211202
return nil
212-
}
203+
}, func(pod *corev1.Pod) error {
204+
return fmt.Errorf("job-managed pod exists: %s/%s, phase=%s", pod.Namespace, pod.Name, pod.Status.Phase)
205+
})
206+
}
207+
208+
// doDeleteOnDeleteDaemonSetPod deletes 'OnDelete' DaemonSet pods on the specified Node.
209+
func doDeleteOnDeleteDaemonSetPod(ctx context.Context, cs *kubernetes.Clientset, node string) error {
210+
return enumerateOnDeleteDaemonSetPods(ctx, cs, node, func(pod *corev1.Pod) error {
211+
err := cs.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
212+
if err != nil && !apierrors.IsNotFound(err) {
213+
return err
214+
}
215+
log.Info("deleted daemonset pod", map[string]interface{}{
216+
"namespace": pod.Namespace,
217+
"name": pod.Name,
218+
})
219+
return nil
220+
})
213221
}
214222

215223
// checkPodDeletion checks whether the evicted or deleted Pods are eventually deleted.

server/strategy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ func rebootOps(c *cke.Cluster, constraints *cke.Constraints, rebootArgs DecideOp
899899

900900
if len(rebootArgs.DrainCompleted) > 0 {
901901
// After eviction of normal pods, evict "OnDelete" daemonset pods.
902-
ops = append(ops, op.RebootEvictDaemonSetPodOp(nf.HealthyAPIServer(), rebootArgs.DrainCompleted, &c.Reboot))
902+
ops = append(ops, op.RebootDeleteDaemonSetPodOp(nf.HealthyAPIServer(), rebootArgs.DrainCompleted, &c.Reboot))
903903
ops = append(ops, op.RebootRebootOp(nf.HealthyAPIServer(), rebootArgs.DrainCompleted, &c.Reboot))
904904
}
905905
if len(rebootArgs.NewlyDrained) > 0 {

server/strategy_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2637,7 +2637,7 @@ func TestDecideOps(t *testing.T) {
26372637
},
26382638
}),
26392639
ExpectedOps: []opData{
2640-
{"reboot-evict-daemonset-pod", 1},
2640+
{"reboot-delete-daemonset-pod", 1},
26412641
{"reboot-reboot", 1},
26422642
},
26432643
},

0 commit comments

Comments
 (0)