Skip to content

Commit 3887d68

Browse files
committed
OCPBUGS-44238: Add Readiness Probe to Router Status Tests
Previously, the router was configured without a readiness probe, resulting in racy startup conditions during router status stress tests. Routers would be marked as ready immediately upon starting, causing the waitForReadyReplicaSet function to proceed prematurely. This allowed the next step of route creation to occur before the routers had fully initialized. This often led to the first two routers to fight over the route status while the third router was still starting. As a result, the third router missed observing these early status contentions, leading to more writes to the route status than we were expecting. Adding the readiness probe also revealed that HAProxy was failing to start due to insufficient permissions. The anyuid SCC was added to the router's service account to resolve the issue.
1 parent 929fc7c commit 3887d68

File tree

1 file changed

+58
-2
lines changed

1 file changed

+58
-2
lines changed

test/extended/router/stress.go

+58-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"fmt"
7+
"k8s.io/utils/ptr"
78
"strings"
89
"text/tabwriter"
910
"time"
@@ -29,6 +30,7 @@ import (
2930
admissionapi "k8s.io/pod-security-admission/api"
3031

3132
routev1 "github.com/openshift/api/route/v1"
33+
v2 "github.com/openshift/api/security/v1"
3234
routeclientset "github.com/openshift/client-go/route/clientset/versioned"
3335
v1 "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1"
3436
exutil "github.com/openshift/origin/test/extended/util"
@@ -78,6 +80,23 @@ var _ = g.Describe("[sig-network][Feature:Router][apigroup:route.openshift.io]",
7880
Name: "system:router",
7981
},
8082
}, metav1.CreateOptions{})
83+
// The router typically runs with allowPrivilegeEscalation enabled; however, system:router defaults to the
84+
// restricted-v2 scc, which disallows privilege escalation. The restricted policy permits privilege escalation.
85+
_, err = oc.AdminKubeClient().RbacV1().RoleBindings(ns).Create(context.Background(), &rbacv1.RoleBinding{
86+
ObjectMeta: metav1.ObjectMeta{
87+
Name: "router-restricted",
88+
},
89+
Subjects: []rbacv1.Subject{
90+
{
91+
Kind: "ServiceAccount",
92+
Name: "default",
93+
},
94+
},
95+
RoleRef: rbacv1.RoleRef{
96+
Kind: "ClusterRole",
97+
Name: "system:openshift:scc:restricted",
98+
},
99+
}, metav1.CreateOptions{})
81100
o.Expect(err).NotTo(o.HaveOccurred())
82101
})
83102

@@ -546,17 +565,54 @@ func scaledRouter(name, image string, args []string) *appsv1.ReplicaSet {
546565
Template: corev1.PodTemplateSpec{
547566
ObjectMeta: metav1.ObjectMeta{
548567
Labels: map[string]string{"app": name},
568+
Annotations: map[string]string{
569+
// The restricted-v2 scc preempts restricted, so we must pin to restricted.
570+
v2.RequiredSCCAnnotation: "restricted",
571+
},
549572
},
550573
Spec: corev1.PodSpec{
551574
TerminationGracePeriodSeconds: &one,
552575
Containers: []corev1.Container{
553576
{
554577
Env: []corev1.EnvVar{
555-
{Name: "NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.name"}}},
578+
{
579+
Name: "NAME", ValueFrom: &corev1.EnvVarSource{
580+
FieldRef: &corev1.ObjectFieldSelector{
581+
FieldPath: "metadata.name",
582+
},
583+
},
584+
},
585+
{
586+
Name: "POD_NAMESPACE",
587+
ValueFrom: &corev1.EnvVarSource{
588+
FieldRef: &corev1.ObjectFieldSelector{
589+
FieldPath: "metadata.namespace",
590+
},
591+
},
592+
},
556593
},
557594
Name: "router",
558595
Image: image,
559-
Args: args,
596+
Args: append(args, "--stats-port=1936", "--metrics-type=haproxy"),
597+
Ports: []corev1.ContainerPort{
598+
{
599+
ContainerPort: 1936,
600+
Name: "stats",
601+
Protocol: corev1.ProtocolTCP,
602+
},
603+
},
604+
ReadinessProbe: &corev1.Probe{
605+
ProbeHandler: corev1.ProbeHandler{
606+
HTTPGet: &corev1.HTTPGetAction{
607+
Path: "/healthz/ready",
608+
Port: intstr.FromInt32(1936),
609+
},
610+
},
611+
},
612+
SecurityContext: &corev1.SecurityContext{
613+
// Default is true, but explicitly specified here for clarity.
614+
AllowPrivilegeEscalation: ptr.To[bool](true),
615+
},
560616
},
561617
},
562618
},

0 commit comments

Comments
 (0)