Skip to content

Commit

Permalink
OCPBUGS-44238: Add Readiness Probe to Router Status Tests
Browse files Browse the repository at this point in the history
Previously, the router was configured without a readiness probe, resulting in
racy startup conditions during router status stress tests. Routers would be
marked as ready immediately upon starting, causing the waitForReadyReplicaSet
function to proceed prematurely. This allowed the next step of route creation
to occur before the routers had fully initialized.

This often led to the first two routers to fight over the route status while
the third router was still starting. As a result, the third router missed
observing these early status contentions, leading to more writes to the
route status than we were expecting.

Adding the readiness probe also revealed that HAProxy was failing to start
due to insufficient permissions. The anyuid SCC was added to the router's
service account to resolve the issue.
  • Loading branch information
gcs278 committed Jan 25, 2025
1 parent 929fc7c commit c9b5c94
Showing 1 changed file with 58 additions and 2 deletions.
60 changes: 58 additions & 2 deletions test/extended/router/stress.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ import (
"k8s.io/client-go/tools/cache"
e2e "k8s.io/kubernetes/test/e2e/framework"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/ptr"

routev1 "github.com/openshift/api/route/v1"
v2 "github.com/openshift/api/security/v1"
routeclientset "github.com/openshift/client-go/route/clientset/versioned"
v1 "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1"
exutil "github.com/openshift/origin/test/extended/util"
Expand Down Expand Up @@ -78,6 +80,23 @@ var _ = g.Describe("[sig-network][Feature:Router][apigroup:route.openshift.io]",
Name: "system:router",
},
}, metav1.CreateOptions{})
// The router typically runs with allowPrivilegeEscalation enabled; however, system:router defaults to the
// restricted-v2 scc, which disallows privilege escalation. The restricted policy permits privilege escalation.
_, err = oc.AdminKubeClient().RbacV1().RoleBindings(ns).Create(context.Background(), &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "router-restricted",
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: "default",
},
},
RoleRef: rbacv1.RoleRef{
Kind: "ClusterRole",
Name: "system:openshift:scc:restricted",
},
}, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
})

Expand Down Expand Up @@ -546,17 +565,54 @@ func scaledRouter(name, image string, args []string) *appsv1.ReplicaSet {
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"app": name},
Annotations: map[string]string{
// The restricted-v2 scc preempts restricted, so we must pin to restricted.
v2.RequiredSCCAnnotation: "restricted",
},
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &one,
Containers: []corev1.Container{
{
Env: []corev1.EnvVar{
{Name: "NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.name"}}},
{
Name: "NAME", ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.name",
},
},
},
{
Name: "POD_NAMESPACE",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "metadata.namespace",
},
},
},
},
Name: "router",
Image: image,
Args: args,
Args: append(args, "--stats-port=1936", "--metrics-type=haproxy"),
Ports: []corev1.ContainerPort{
{
ContainerPort: 1936,
Name: "stats",
Protocol: corev1.ProtocolTCP,
},
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz/ready",
Port: intstr.FromInt32(1936),
},
},
},
SecurityContext: &corev1.SecurityContext{
// Default is true, but explicitly specified here for clarity.
AllowPrivilegeEscalation: ptr.To[bool](true),
},
},
},
},
Expand Down

0 comments on commit c9b5c94

Please sign in to comment.