Skip to content

Commit 933b7b3

Browse files
Abhish2702Abhishek Bansalzachaller
authored
fix: guardrail to not overload stable replicaset (argoproj#3878)
* fix: unexpected downtime in rollouts Signed-off-by: Abhishek Bansal <[email protected]> * lint Signed-off-by: Zach Aller <[email protected]> --------- Signed-off-by: Abhishek Bansal <[email protected]> Signed-off-by: Zach Aller <[email protected]> Co-authored-by: Abhishek Bansal <[email protected]> Co-authored-by: Zach Aller <[email protected]>
1 parent f4289b7 commit 933b7b3

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

rollout/trafficrouting.go

+23
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010

1111
"github.com/argoproj/argo-rollouts/rollout/trafficrouting/plugin"
1212

13+
appsv1 "k8s.io/api/apps/v1"
14+
1315
"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
1416
"github.com/argoproj/argo-rollouts/rollout/trafficrouting"
1517
"github.com/argoproj/argo-rollouts/rollout/trafficrouting/alb"
@@ -133,6 +135,23 @@ func (c *Controller) NewTrafficRoutingReconciler(roCtx *rolloutContext) ([]traff
133135
return nil, nil
134136
}
135137

138+
func (c *rolloutContext) checkReplicasAvailable(rs *appsv1.ReplicaSet, desiredWeight int32) bool {
139+
if rs == nil {
140+
return false
141+
}
142+
availableReplicas := rs.Status.AvailableReplicas
143+
totalReplicas := *c.rollout.Spec.Replicas
144+
145+
desiredReplicas := (desiredWeight * totalReplicas) / 100
146+
if availableReplicas < desiredReplicas {
147+
c.log.Infof("ReplicaSet '%s' has %d available replicas, waiting for %d", rs.Name, availableReplicas, desiredReplicas)
148+
return false
149+
}
150+
151+
return true
152+
153+
}
154+
136155
// this currently only be used in the canary strategy
137156
func (c *rolloutContext) reconcileTrafficRouting() error {
138157
reconcilers, err := c.newTrafficRoutingReconciler(c)
@@ -243,6 +262,10 @@ func (c *rolloutContext) reconcileTrafficRouting() error {
243262
desiredWeight = weightutil.MaxTrafficWeight(c.rollout)
244263
}
245264
}
265+
266+
if !c.checkReplicasAvailable(c.stableRS, weightutil.MaxTrafficWeight(c.rollout)-desiredWeight) {
267+
return nil
268+
}
246269
// We need to check for revision > 1 because when we first install the rollout we run step 0 this prevents that.
247270
// There is a bigger fix needed for the reasons on why we run step 0 on rollout install, that needs to be explored.
248271
revision, revisionFound := annotations.GetRevisionAnnotation(c.rollout)

rollout/trafficrouting_test.go

+67
Original file line numberDiff line numberDiff line change
@@ -1435,3 +1435,70 @@ func TestDontWeightOrHaveManagedRoutesDuringInterruptedUpdate(t *testing.T) {
14351435
f.fakeTrafficRouting.AssertCalled(t, "RemoveManagedRoutes", mock.Anything, mock.Anything)
14361436

14371437
}
1438+
1439+
// This test verifies that if we are shifting traffic to stable replicaset without the stable replicaset being available proportional to the weight, the traffic shouldn't be switched immediately to the stable replicaset.
1440+
func TestCheckReplicaSetAvailable(t *testing.T) {
1441+
fix := newFixture(t)
1442+
defer fix.Close()
1443+
1444+
steps := []v1alpha1.CanaryStep{
1445+
{
1446+
SetWeight: pointer.Int32(60),
1447+
},
1448+
{
1449+
Pause: &v1alpha1.RolloutPause{},
1450+
},
1451+
}
1452+
1453+
rollout1 := newCanaryRollout("test-rollout", 10, nil, steps, pointer.Int32(1), intstr.FromInt(1), intstr.FromInt(1))
1454+
rollout1.Spec.Strategy.Canary.DynamicStableScale = true
1455+
rollout1.Spec.Strategy.Canary.TrafficRouting = &v1alpha1.RolloutTrafficRouting{
1456+
SMI: &v1alpha1.SMITrafficRouting{},
1457+
}
1458+
rollout1.Spec.Strategy.Canary.CanaryService = "canary-service"
1459+
rollout1.Spec.Strategy.Canary.StableService = "stable-service"
1460+
rollout1.Status.ReadyReplicas = 10
1461+
rollout1.Status.AvailableReplicas = 10
1462+
1463+
rollout2 := bumpVersion(rollout1)
1464+
1465+
replicaSet1 := newReplicaSetWithStatus(rollout1, 1, 1)
1466+
replicaSet2 := newReplicaSetWithStatus(rollout2, 9, 9)
1467+
1468+
replicaSet1Hash := replicaSet1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
1469+
replicaSet2Hash := replicaSet2.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
1470+
canarySelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: replicaSet2Hash}
1471+
stableSelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: replicaSet1Hash}
1472+
canarySvc := newService("canary-service", 80, canarySelector, rollout1)
1473+
stableSvc := newService("stable-service", 80, stableSelector, rollout1)
1474+
1475+
rollout2.Spec = rollout1.Spec
1476+
rollout2.Status.StableRS = replicaSet1Hash
1477+
rollout2.Status.CurrentPodHash = replicaSet1Hash
1478+
rollout2.Status.Canary.Weights = &v1alpha1.TrafficWeights{
1479+
Canary: v1alpha1.WeightDestination{
1480+
Weight: 10,
1481+
ServiceName: "canary-service",
1482+
PodTemplateHash: replicaSet2Hash,
1483+
},
1484+
Stable: v1alpha1.WeightDestination{
1485+
Weight: 90,
1486+
ServiceName: "stable-service",
1487+
PodTemplateHash: replicaSet1Hash,
1488+
},
1489+
}
1490+
1491+
fix.kubeobjects = append(fix.kubeobjects, replicaSet1, replicaSet2, canarySvc, stableSvc)
1492+
fix.replicaSetLister = append(fix.replicaSetLister, replicaSet1, replicaSet2)
1493+
1494+
fix.rolloutLister = append(fix.rolloutLister, rollout2)
1495+
fix.objects = append(fix.objects, rollout2)
1496+
1497+
fix.expectUpdateReplicaSetAction(replicaSet1)
1498+
fix.expectUpdateRolloutAction(rollout2)
1499+
fix.expectUpdateReplicaSetAction(replicaSet1)
1500+
fix.expectPatchRolloutAction(rollout2)
1501+
fix.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler()
1502+
fix.fakeTrafficRouting.On("RemoveManagedRoutes", mock.Anything, mock.Anything, mock.Anything).Return(nil)
1503+
fix.run(getKey(rollout1, t))
1504+
}

0 commit comments

Comments
 (0)