Skip to content

Commit ca19925

Browse files
committed
Optimize expectations for Advanced DaemonSet (#940)
Signed-off-by: FillZpp <[email protected]>
1 parent ccbdf0d commit ca19925

File tree

2 files changed

+29
-20
lines changed

2 files changed

+29
-20
lines changed

pkg/controller/daemonset/daemonset_controller.go

+13-16
Original file line numberDiff line numberDiff line change
@@ -689,15 +689,12 @@ func (dsc *ReconcileDaemonSet) syncNodes(ds *appsv1alpha1.DaemonSet, podsToDelet
689689

690690
err = dsc.podControl.CreatePods(ds.Namespace, podTemplate, ds, metav1.NewControllerRef(ds, controllerKind))
691691

692-
if err != nil && errors.IsTimeout(err) {
693-
// Pod is created but its initialization has timed out.
694-
// If the initialization is successful eventually, the
695-
// controller will observe the creation via the informer.
696-
// If the initialization fails, or if the pod keeps
697-
// uninitialized for a long time, the informer will not
698-
// receive any update, and the controller will create a new
699-
// pod when the expectation expires.
700-
return
692+
if err != nil {
693+
if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) {
694+
// If the namespace is being torn down, we can safely ignore
695+
// this error since all subsequent creations will fail.
696+
return
697+
}
701698
}
702699
if err != nil {
703700
klog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name)
@@ -709,12 +706,10 @@ func (dsc *ReconcileDaemonSet) syncNodes(ds *appsv1alpha1.DaemonSet, podsToDelet
709706
}
710707
createWait.Wait()
711708
// any skipped pods that we never attempted to start shouldn't be expected.
712-
skippedPods := createDiff - batchSize
709+
skippedPods := createDiff - (batchSize + pos)
713710
if errorCount < len(errCh) && skippedPods > 0 {
714711
klog.V(2).Infof("Slow-start failure. Skipping creation of %d pods, decrementing expectations for set %q/%q", skippedPods, ds.Namespace, ds.Name)
715-
for i := 0; i < skippedPods; i++ {
716-
dsc.expectations.CreationObserved(dsKey)
717-
}
712+
dsc.expectations.LowerExpectations(dsKey, skippedPods, 0)
718713
// The skipped pods will be retried later. The next controller resync will
719714
// retry the slow start process.
720715
break
@@ -728,10 +723,12 @@ func (dsc *ReconcileDaemonSet) syncNodes(ds *appsv1alpha1.DaemonSet, podsToDelet
728723
go func(ix int) {
729724
defer deleteWait.Done()
730725
if err := dsc.podControl.DeletePod(ds.Namespace, podsToDelete[ix], ds); err != nil {
731-
klog.V(2).Infof("Failed deletion, decrementing expectations for set %q/%q", ds.Namespace, ds.Name)
732726
dsc.expectations.DeletionObserved(dsKey)
733-
errCh <- err
734-
utilruntime.HandleError(err)
727+
if !errors.IsNotFound(err) {
728+
klog.V(2).Infof("Failed deletion, decremented expectations for set %q/%q", ds.Namespace, ds.Name)
729+
errCh <- err
730+
utilruntime.HandleError(err)
731+
}
735732
}
736733
}(i)
737734
}

pkg/controller/daemonset/daemonset_event_handler.go

+16-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"reflect"
2222
"strings"
23+
"sync"
2324

2425
v1 "k8s.io/api/core/v1"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -40,7 +41,8 @@ var _ handler.EventHandler = &podEventHandler{}
4041

4142
type podEventHandler struct {
4243
client.Reader
43-
expectations kubecontroller.ControllerExpectationsInterface
44+
expectations kubecontroller.ControllerExpectationsInterface
45+
deletionUIDCache sync.Map
4446
}
4547

4648
func enqueueDaemonSet(q workqueue.RateLimitingInterface, ds *appsv1alpha1.DaemonSet) {
@@ -117,7 +119,7 @@ func (e *podEventHandler) Update(evt event.UpdateEvent, q workqueue.RateLimiting
117119
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
118120
// for modification of the deletion timestamp and expect an ds to create more replicas asap, not wait
119121
// until the kubelet actually deletes the pod.
120-
e.Delete(event.DeleteEvent{Object: evt.ObjectNew}, q)
122+
e.deletePod(curPod, q, false)
121123
return
122124
}
123125

@@ -153,7 +155,10 @@ func (e *podEventHandler) Delete(evt event.DeleteEvent, q workqueue.RateLimiting
153155
klog.Errorf("DeleteEvent parse pod failed, DeleteStateUnknown: %#v, obj: %#v", evt.DeleteStateUnknown, evt.Object)
154156
return
155157
}
158+
e.deletePod(pod, q, true)
159+
}
156160

161+
func (e *podEventHandler) deletePod(pod *v1.Pod, q workqueue.RateLimitingInterface, isDeleted bool) {
157162
controllerRef := metav1.GetControllerOf(pod)
158163
if controllerRef == nil {
159164
// No controller should care about orphans being deleted.
@@ -164,8 +169,15 @@ func (e *podEventHandler) Delete(evt event.DeleteEvent, q workqueue.RateLimiting
164169
return
165170
}
166171

167-
klog.V(4).Infof("Pod %s/%s deleted, owner: %s", pod.Namespace, pod.Name, ds.Name)
168-
e.expectations.DeletionObserved(keyFunc(ds))
172+
if _, loaded := e.deletionUIDCache.LoadOrStore(pod.UID, struct{}{}); !loaded {
173+
e.expectations.DeletionObserved(keyFunc(ds))
174+
}
175+
if isDeleted {
176+
e.deletionUIDCache.Delete(pod.UID)
177+
klog.V(4).Infof("Pod %s/%s deleted, owner: %s", pod.Namespace, pod.Name, ds.Name)
178+
} else {
179+
klog.V(4).Infof("Pod %s/%s terminating, owner: %s", pod.Namespace, pod.Name, ds.Name)
180+
}
169181
enqueueDaemonSet(q, ds)
170182
}
171183

0 commit comments

Comments
 (0)