Skip to content

Commit babf593

Browse files
committed
fix(test): configuration changes and fixes needed to scale-test
Signed-off-by: Alex Castilio dos Santos <[email protected]>
1 parent dc3ab0f commit babf593

14 files changed

+242
-126
lines changed

.github/workflows/scale-test.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,12 @@ jobs:
9696
NUM_REPLICAS: ${{ inputs.num_replicas }}
9797
NUM_NETPOLS: ${{ inputs.num_netpol }}
9898
CLEANUP: ${{ inputs.cleanup }}
99-
IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }}
99+
IMAGE_REGISTRY: ${{ vars.ACR_NAME }}
100100
IMAGE_NAMESPACE: ${{ github.repository }}
101101
TAG: ${{ inputs.image_tag }}
102102
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
103103
shell: bash
104104
run: |
105105
set -euo pipefail
106-
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false
106+
[[ $TAG == "" ]] && TAG=$(make version)
107+
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false

test/e2e/common/common.go

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ const (
2222
KubeSystemNamespace = "kube-system"
2323
TestPodNamespace = "kube-system-test"
2424
AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY"
25+
OutputFilePathEnv = "OUTPUT_FILEPATH"
2526
)
2627

2728
var (

test/e2e/framework/kubernetes/check-pod-status.go

+10-19
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ import (
1414
)
1515

1616
const (
17-
RetryTimeoutPodsReady = 5 * time.Minute
18-
RetryIntervalPodsReady = 5 * time.Second
17+
RetryTimeoutPodsReady = 5 * time.Minute
18+
RetryIntervalPodsReady = 5 * time.Second
19+
timeoutWaitForPodsSeconds = 1200
1920

2021
printInterval = 5 // print to stdout every 5 iterations
2122
)
@@ -48,7 +49,7 @@ func (w *WaitPodsReady) Run() error {
4849
return fmt.Errorf("error creating Kubernetes client: %w", err)
4950
}
5051

51-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
52+
ctx, cancel := context.WithTimeout(context.Background(), timeoutWaitForPodsSeconds*time.Second)
5253
defer cancel()
5354

5455
return WaitForPodReady(ctx, clientset, w.Namespace, w.LabelSelector)
@@ -60,7 +61,6 @@ func (w *WaitPodsReady) Stop() error {
6061
}
6162

6263
func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
63-
podReadyMap := make(map[string]bool)
6464

6565
printIterator := 0
6666
conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
@@ -78,34 +78,25 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
7878
return false, nil
7979
}
8080

81-
// check each indviidual pod to see if it's in Running state
81+
// check each individual pod to see if it's in Running state
8282
for i := range podList.Items {
83-
var pod *corev1.Pod
84-
pod, err = clientset.CoreV1().Pods(namespace).Get(ctx, podList.Items[i].Name, metav1.GetOptions{})
85-
if err != nil {
86-
return false, fmt.Errorf("error getting Pod: %w", err)
87-
}
8883

8984
// Check the Pod phase
90-
if pod.Status.Phase != corev1.PodRunning {
85+
if podList.Items[i].Status.Phase != corev1.PodRunning {
9186
if printIterator%printInterval == 0 {
92-
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
87+
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", podList.Items[i].Name)
9388
}
9489
return false, nil
9590
}
9691

9792
// Check all container status.
98-
for _, containerStatus := range pod.Status.ContainerStatuses {
99-
if !containerStatus.Ready {
100-
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", containerStatus.Name, pod.Name)
93+
for i := range podList.Items[i].Status.ContainerStatuses {
94+
if !podList.Items[i].Status.ContainerStatuses[i].Ready {
95+
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", podList.Items[i].Status.ContainerStatuses[i].Name, podList.Items[i].Name)
10196
return false, nil
10297
}
10398
}
10499

105-
if !podReadyMap[pod.Name] {
106-
log.Printf("pod \"%s\" is in Running state\n", pod.Name)
107-
podReadyMap[pod.Name] = true
108-
}
109100
}
110101
log.Printf("all pods in namespace \"%s\" with label \"%s\" are in Running state\n", namespace, labelSelector)
111102
return true, nil

test/e2e/framework/kubernetes/create-kapinger-deployment.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
138138
"memory": resource.MustParse("20Mi"),
139139
},
140140
Limits: v1.ResourceList{
141-
"memory": resource.MustParse("20Mi"),
141+
"memory": resource.MustParse("100Mi"),
142142
},
143143
},
144144
Ports: []v1.ContainerPort{

test/e2e/framework/kubernetes/delete-namespace.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func (d *DeleteNamespace) Run() error {
3030
return fmt.Errorf("error creating Kubernetes client: %w", err)
3131
}
3232

33-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
33+
ctx, cancel := context.WithTimeout(context.Background(), 1200*time.Second)
3434
defer cancel()
3535

3636
err = clientset.CoreV1().Namespaces().Delete(ctx, d.Namespace, metaV1.DeleteOptions{})
@@ -41,7 +41,7 @@ func (d *DeleteNamespace) Run() error {
4141
}
4242

4343
backoff := wait.Backoff{
44-
Steps: 6,
44+
Steps: 9,
4545
Duration: 10 * time.Second,
4646
Factor: 2.0,
4747
// Jitter: 0.1,

test/e2e/framework/kubernetes/install-retina-helm.go

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ func (i *InstallHelmChart) Run() error {
9191
chart.Values["image"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-agent"
9292
chart.Values["image"].(map[string]interface{})["initRepository"] = imageRegistry + "/" + imageNamespace + "/retina-init"
9393
chart.Values["operator"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-operator"
94+
chart.Values["operator"].(map[string]interface{})["enabled"] = true
9495

9596
getclient := action.NewGet(actionConfig)
9697
release, err := getclient.Run(i.ReleaseName)

test/e2e/framework/scaletest/add-shared-labels.go

+43-18
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"log"
78
"time"
89

910
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -50,32 +51,21 @@ func (a *AddSharedLabelsToAllPods) Run() error {
5051
return fmt.Errorf("error creating Kubernetes client: %w", err)
5152
}
5253

53-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
54+
ctx, cancel := contextToLabelAllPods()
5455
defer cancel()
5556

5657
resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})
5758

58-
patch := []patchStringValue{}
59-
60-
for i := 0; i < a.NumSharedLabelsPerPod; i++ {
61-
patch = append(patch, patchStringValue{
62-
Op: "add",
63-
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
64-
Value: "val",
65-
})
66-
}
67-
68-
patchBytes, err := json.Marshal(patch)
59+
patchBytes, err := getSharedLabelsPatch(a.NumSharedLabelsPerPod)
6960
if err != nil {
70-
return fmt.Errorf("error marshalling patch: %w", err)
61+
return fmt.Errorf("error getting label patch: %w", err)
7162
}
7263

7364
for _, resource := range resources.Items {
74-
clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
75-
types.JSONPatchType,
76-
patchBytes,
77-
metav1.PatchOptions{},
78-
)
65+
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
66+
if err != nil {
67+
log.Printf("Error adding shared labels to pod %s: %s\n", resource.Name, err)
68+
}
7969
}
8070

8171
return nil
@@ -85,3 +75,38 @@ func (a *AddSharedLabelsToAllPods) Run() error {
8575
func (a *AddSharedLabelsToAllPods) Stop() error {
8676
return nil
8777
}
78+
79+
func patchLabel(ctx context.Context, clientset *kubernetes.Clientset, namespace, podName string, patchBytes []byte) error {
80+
log.Println("Labeling Pod", podName)
81+
_, err := clientset.CoreV1().Pods(namespace).Patch(ctx, podName,
82+
types.JSONPatchType,
83+
patchBytes,
84+
metav1.PatchOptions{},
85+
)
86+
if err != nil {
87+
return fmt.Errorf("failed to patch pod: %w", err)
88+
}
89+
90+
return nil
91+
}
92+
93+
func getSharedLabelsPatch(numLabels int) ([]byte, error) {
94+
patch := []patchStringValue{}
95+
for i := 0; i < numLabels; i++ {
96+
patch = append(patch, patchStringValue{
97+
Op: "add",
98+
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
99+
Value: "val",
100+
})
101+
}
102+
b, err := json.Marshal(patch)
103+
if err != nil {
104+
return nil, fmt.Errorf("error marshalling patch: %w", err)
105+
}
106+
107+
return b, nil
108+
}
109+
110+
func contextToLabelAllPods() (context.Context, context.CancelFunc) {
111+
return context.WithTimeout(context.Background(), 120*time.Minute)
112+
}

test/e2e/framework/scaletest/add-unique-labels.go

+6-11
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
package scaletest
22

33
import (
4-
"context"
54
"encoding/json"
65
"fmt"
7-
"time"
86

97
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10-
"k8s.io/apimachinery/pkg/types"
118
"k8s.io/client-go/kubernetes"
129
"k8s.io/client-go/tools/clientcmd"
1310
)
@@ -44,7 +41,7 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
4441
return fmt.Errorf("error creating Kubernetes client: %w", err)
4542
}
4643

47-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
44+
ctx, cancel := contextToLabelAllPods()
4845
defer cancel()
4946

5047
resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})
@@ -53,7 +50,6 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
5350

5451
for _, resource := range resources.Items {
5552
patch := []patchStringValue{}
56-
5753
for i := 0; i < a.NumUniqueLabelsPerPod; i++ {
5854
patch = append(patch, patchStringValue{
5955
Op: "add",
@@ -65,14 +61,13 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
6561

6662
patchBytes, err := json.Marshal(patch)
6763
if err != nil {
68-
return fmt.Errorf("error marshalling patch: %w", err)
64+
return fmt.Errorf("failed to marshal patch: %w", err)
6965
}
7066

71-
clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
72-
types.JSONPatchType,
73-
patchBytes,
74-
metav1.PatchOptions{},
75-
)
67+
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
68+
if err != nil {
69+
return fmt.Errorf("error adding unique label to pod: %w", err)
70+
}
7671
}
7772

7873
return nil

test/e2e/framework/scaletest/create-resources.go

+22-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
e2ekubernetes "github.com/microsoft/retina/test/e2e/framework/kubernetes"
10+
"github.com/microsoft/retina/test/retry"
1011
"k8s.io/apimachinery/pkg/runtime"
1112
"k8s.io/client-go/kubernetes"
1213
"k8s.io/client-go/tools/clientcmd"
@@ -48,11 +49,18 @@ func (c *CreateResources) Run() error {
4849
return fmt.Errorf("error creating Kubernetes client: %w", err)
4950
}
5051

51-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
52+
ctx, cancel := context.WithTimeout(context.Background(), 1200*time.Second)
5253
defer cancel()
5354

55+
retrier := retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}
56+
5457
for _, resource := range resources {
55-
e2ekubernetes.CreateResource(ctx, resource, clientset)
58+
err := retrier.Do(ctx, func() error {
59+
return e2ekubernetes.CreateResource(ctx, resource, clientset)
60+
})
61+
if err != nil {
62+
return fmt.Errorf("error creating resource: %w", err)
63+
}
5664
}
5765

5866
return nil
@@ -71,12 +79,6 @@ func (c *CreateResources) getResources() []runtime.Object {
7179
// kwokDeployments := c.generateDeployments(c.NumKwokDeployments, c.NumKwokReplicas, "kwok")
7280
// objs = append(objs, kwokDeployments...)
7381

74-
realDeployments := c.generateDeployments()
75-
objs = append(objs, realDeployments...)
76-
77-
services := c.generateServices("real")
78-
objs = append(objs, services...)
79-
8082
kapinger := e2ekubernetes.CreateKapingerDeployment{
8183
KapingerNamespace: c.Namespace,
8284
KubeConfigFilePath: c.KubeConfigFilePath,
@@ -88,6 +90,13 @@ func (c *CreateResources) getResources() []runtime.Object {
8890
kapingerSA := kapinger.GetKapingerServiceAccount()
8991

9092
objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding, kapingerSA)
93+
94+
realDeployments := c.generateDeployments()
95+
objs = append(objs, realDeployments...)
96+
97+
services := c.generateServices()
98+
objs = append(objs, services...)
99+
91100
// c.generateKwokNodes()
92101
log.Println("Finished generating YAMLs")
93102
return objs
@@ -118,6 +127,8 @@ func (c *CreateResources) generateDeployments() []runtime.Object {
118127
labelPrefix := fmt.Sprintf("%s-dep-lab", name)
119128

120129
deployment.Name = name
130+
deployment.Labels["name"] = name
131+
deployment.Spec.Template.Labels["name"] = name
121132

122133
r := int32(c.NumRealReplicas)
123134
deployment.Spec.Replicas = &r
@@ -135,7 +146,7 @@ func (c *CreateResources) generateDeployments() []runtime.Object {
135146
return objs
136147
}
137148

138-
func (c *CreateResources) generateServices(svcKind string) []runtime.Object {
149+
func (c *CreateResources) generateServices() []runtime.Object {
139150
objs := []runtime.Object{}
140151

141152
kapingerSvc := e2ekubernetes.CreateKapingerDeployment{
@@ -146,10 +157,10 @@ func (c *CreateResources) generateServices(svcKind string) []runtime.Object {
146157
for i := 0; i < c.NumRealServices; i++ {
147158
template := kapingerSvc.GetKapingerService()
148159

149-
name := fmt.Sprintf("%s-svc-%05d", svcKind, i)
160+
name := fmt.Sprintf("%s-svc-%05d", c.RealPodType, i)
150161
template.Name = name
151162

152-
template.Spec.Selector["name"] = fmt.Sprintf("%s-%s-dep-%05d", svcKind, c.RealPodType, i)
163+
template.Spec.Selector["name"] = fmt.Sprintf("%s-dep-%05d", c.RealPodType, i)
153164

154165
objs = append(objs, template)
155166
}

test/e2e/framework/scaletest/delete-and-re-add-labels.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func (d *DeleteAndReAddLabels) Run() error {
4848
return fmt.Errorf("error creating Kubernetes client: %w", err)
4949
}
5050

51-
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
51+
ctx, cancel := contextToLabelAllPods()
5252
defer cancel()
5353

5454
labelsToDelete := `"shared-lab-00000": null, "shared-lab-00001": null, "shared-lab-00002": null`
@@ -91,6 +91,7 @@ func (d *DeleteAndReAddLabels) Run() error {
9191
func (d *DeleteAndReAddLabels) addLabels(ctx context.Context, clientset *kubernetes.Clientset, pods *corev1.PodList, patch string) error {
9292

9393
for _, pod := range pods.Items {
94+
log.Println("Labeling Pod", pod.Name)
9495
_, err := clientset.CoreV1().Pods(d.Namespace).Patch(ctx, pod.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
9596
if err != nil {
9697
return fmt.Errorf("error patching pod: %w", err)
@@ -103,6 +104,7 @@ func (d *DeleteAndReAddLabels) addLabels(ctx context.Context, clientset *kuberne
103104
func (d *DeleteAndReAddLabels) deleteLabels(ctx context.Context, clientset *kubernetes.Clientset, pods *corev1.PodList, patch string) error {
104105

105106
for _, pod := range pods.Items {
107+
log.Println("Deleting label from Pod", pod.Name)
106108
_, err := clientset.CoreV1().Pods(d.Namespace).Patch(ctx, pod.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
107109
if err != nil {
108110
return fmt.Errorf("error patching pod: %w", err)

0 commit comments

Comments
 (0)