Skip to content

Commit 3b7787b

Browse files
authored
test: Windows HNS e2e and no crashes step (#789)
# Description Please provide a brief description of the changes made in this pull request. ## Related Issue If this pull request is related to any issue, please mention it here. Additionally, make sure that the issue is assigned to you before submitting this pull request. ## Checklist - [ ] I have read the [contributing documentation](https://retina.sh/docs/contributing). - [ ] I signed and signed-off the commits (`git commit -S -s ...`). See [this documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) on signing commits. - [ ] I have correctly attributed the author(s) of the code. - [ ] I have tested the changes locally. - [ ] I have followed the project's style guidelines. - [ ] I have updated the documentation, if necessary. - [ ] I have added tests, if applicable. ## Screenshots (if applicable) or Testing Completed Please add any relevant screenshots or GIFs to showcase the changes made. ## Additional Notes Add any additional notes or context about the pull request here. --- Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information on how to contribute to this project.
1 parent 1f19ed0 commit 3b7787b

File tree

15 files changed

+400
-63
lines changed

15 files changed

+400
-63
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
*.so
99
*.dylib
1010

11+
# Avoid checking in keys
12+
*.pem
13+
1114
# Test binary, built with `go test -c`
1215
*.test
1316

deploy/legacy/manifests/controller/helm/retina/templates/daemonset.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,15 @@ spec:
214214
fieldRef:
215215
apiVersion: v1
216216
fieldPath: status.hostIP
217+
livenessProbe:
218+
httpGet:
219+
path: /metrics
220+
port: {{ .Values.retinaPort }}
221+
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }}
222+
periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }}
223+
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }}
224+
failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }}
225+
successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
217226
securityContext:
218227
capabilities:
219228
add:

test/e2e/README.md

+16
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,19 @@ For reference, see the `test-all` recipe in the root [Makefile](../../Makefile).
1717

1818
For sample test, please check out:
1919
[the Retina E2E.](./scenarios/retina/drop/scenario.go)
20+
21+
## Sample VSCode `settings.json` for running with existing cluster
22+
23+
```json
24+
"go.testFlags": [
25+
"-v",
26+
"-timeout=40m",
27+
"-tags=e2e",
28+
"-args",
29+
"-create-infra=false",
30+
"-delete-infra=false",
31+
"-image-namespace=retistrynamespace",
32+
"-image-registry=yourregistry",
33+
"-image-tag=yourtesttag",
34+
],
35+
```

test/e2e/framework/kubernetes/create-kapinger-deployment.go

+3
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
107107
},
108108

109109
Spec: v1.PodSpec{
110+
NodeSelector: map[string]string{
111+
"kubernetes.io/os": "linux",
112+
},
110113
Affinity: &v1.Affinity{
111114
PodAntiAffinity: &v1.PodAntiAffinity{
112115
// prefer an even spread across the cluster to avoid scheduling on the same node

test/e2e/framework/kubernetes/exec-pod.go

+22-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package kubernetes
22

33
import (
4+
"bytes"
45
"context"
56
"fmt"
67
"log"
@@ -9,6 +10,7 @@ import (
910

1011
v1 "k8s.io/api/core/v1"
1112
"k8s.io/client-go/kubernetes"
13+
"k8s.io/client-go/rest"
1214
"k8s.io/client-go/tools/clientcmd"
1315
"k8s.io/client-go/tools/remotecommand"
1416
"k8s.io/kubectl/pkg/scheme"
@@ -27,7 +29,17 @@ func (e *ExecInPod) Run() error {
2729
ctx, cancel := context.WithCancel(context.Background())
2830
defer cancel()
2931

30-
err := ExecPod(ctx, e.KubeConfigFilePath, e.PodNamespace, e.PodName, e.Command)
32+
config, err := clientcmd.BuildConfigFromFlags("", e.KubeConfigFilePath)
33+
if err != nil {
34+
return fmt.Errorf("error building kubeconfig: %w", err)
35+
}
36+
37+
clientset, err := kubernetes.NewForConfig(config)
38+
if err != nil {
39+
return fmt.Errorf("error creating Kubernetes client: %w", err)
40+
}
41+
42+
_, err = ExecPod(ctx, clientset, config, e.PodNamespace, e.PodName, e.Command)
3143
if err != nil {
3244
return fmt.Errorf("error executing command [%s]: %w", e.Command, err)
3345
}
@@ -43,17 +55,8 @@ func (e *ExecInPod) Stop() error {
4355
return nil
4456
}
4557

46-
func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, command string) error {
47-
config, err := clientcmd.BuildConfigFromFlags("", kubeConfigFilePath)
48-
if err != nil {
49-
return fmt.Errorf("error building kubeconfig: %w", err)
50-
}
51-
52-
clientset, err := kubernetes.NewForConfig(config)
53-
if err != nil {
54-
return fmt.Errorf("error creating Kubernetes client: %w", err)
55-
}
56-
58+
func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) ([]byte, error) {
59+
log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
5760
req := clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName).
5861
Namespace(namespace).SubResource(ExecSubResources)
5962
option := &v1.PodExecOptions{
@@ -69,20 +72,21 @@ func ExecPod(ctx context.Context, kubeConfigFilePath, namespace, podName, comman
6972
scheme.ParameterCodec,
7073
)
7174

75+
var buf bytes.Buffer
7276
exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
7377
if err != nil {
74-
return fmt.Errorf("error creating executor: %w", err)
78+
return buf.Bytes(), fmt.Errorf("error creating executor: %w", err)
7579
}
7680

77-
log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
7881
err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
7982
Stdin: os.Stdin,
80-
Stdout: os.Stdout,
81-
Stderr: os.Stderr,
83+
Stdout: &buf,
84+
Stderr: &buf,
8285
})
8386
if err != nil {
84-
return fmt.Errorf("error executing command: %w", err)
87+
return buf.Bytes(), fmt.Errorf("error executing command: %w", err)
8588
}
8689

87-
return nil
90+
res := buf.Bytes()
91+
return res, nil
8892
}

test/e2e/framework/kubernetes/get-logs.go

+17-3
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,16 @@ import (
1212
"k8s.io/client-go/tools/clientcmd"
1313
)
1414

15-
func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
15+
type GetPodLogs struct {
16+
KubeConfigFilePath string
17+
Namespace string
18+
LabelSelector string
19+
}
20+
21+
func (p *GetPodLogs) Run() error {
22+
fmt.Printf("printing pod logs for namespace: %s, labelselector: %s\n", p.Namespace, p.LabelSelector)
1623
// Load the kubeconfig file to get the configuration to access the cluster
17-
config, err := clientcmd.BuildConfigFromFlags("", kubeconfigpath)
24+
config, err := clientcmd.BuildConfigFromFlags("", p.KubeConfigFilePath)
1825
if err != nil {
1926
log.Printf("error building kubeconfig: %s\n", err)
2027
}
@@ -25,8 +32,14 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
2532
log.Printf("error creating clientset: %s\n", err)
2633
}
2734

35+
PrintPodLogs(context.Background(), clientset, p.Namespace, p.LabelSelector)
36+
37+
return nil
38+
}
39+
40+
func PrintPodLogs(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) {
2841
// List all the pods in the namespace
29-
pods, err := clientset.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
42+
pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
3043
LabelSelector: labelSelector,
3144
})
3245
if err != nil {
@@ -55,5 +68,6 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) {
5568

5669
// Print the logs
5770
log.Println(string(buf))
71+
fmt.Printf("#######################################################\n")
5872
}
5973
}

test/e2e/framework/kubernetes/install-retina-helm.go

+24-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package kubernetes
22

33
import (
4+
"context"
45
"fmt"
56
"log"
67
"os"
@@ -11,10 +12,12 @@ import (
1112
"helm.sh/helm/v3/pkg/action"
1213
"helm.sh/helm/v3/pkg/chart/loader"
1314
"helm.sh/helm/v3/pkg/cli"
15+
"k8s.io/client-go/kubernetes"
16+
"k8s.io/client-go/tools/clientcmd"
1417
)
1518

1619
const (
17-
createTimeout = 240 * time.Second // windpws is slow
20+
createTimeout = 20 * time.Minute // windows is slow
1821
deleteTimeout = 60 * time.Second
1922
)
2023

@@ -32,6 +35,8 @@ type InstallHelmChart struct {
3235
}
3336

3437
func (i *InstallHelmChart) Run() error {
38+
ctx, cancel := context.WithTimeout(context.Background(), createTimeout)
39+
defer cancel()
3540
settings := cli.New()
3641
settings.KubeConfig = i.KubeConfigFilePath
3742
actionConfig := new(action.Configuration)
@@ -97,7 +102,7 @@ func (i *InstallHelmChart) Run() error {
97102
client.WaitForJobs = true
98103

99104
// install the chart here
100-
rel, err := client.Run(chart, chart.Values)
105+
rel, err := client.RunWithContext(ctx, chart, chart.Values)
101106
if err != nil {
102107
return fmt.Errorf("failed to install chart: %w", err)
103108
}
@@ -106,6 +111,23 @@ func (i *InstallHelmChart) Run() error {
106111
// this will confirm the values set during installation
107112
log.Printf("chart values: %v\n", rel.Config)
108113

114+
// ensure all pods are running, since helm doesn't care about windows
115+
config, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath)
116+
if err != nil {
117+
return fmt.Errorf("error building kubeconfig: %w", err)
118+
}
119+
120+
clientset, err := kubernetes.NewForConfig(config)
121+
if err != nil {
122+
return fmt.Errorf("error creating Kubernetes client: %w", err)
123+
}
124+
125+
labelSelector := "k8s-app=retina"
126+
err = WaitForPodReady(ctx, clientset, "kube-system", labelSelector)
127+
if err != nil {
128+
return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
129+
}
130+
109131
return nil
110132
}
111133

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package kubernetes
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"k8s.io/client-go/kubernetes"
8+
"k8s.io/client-go/tools/clientcmd"
9+
)
10+
11+
var ErrPodCrashed = fmt.Errorf("pod has crashes")
12+
13+
type EnsureStableCluster struct {
14+
LabelSelector string
15+
PodNamespace string
16+
KubeConfigFilePath string
17+
}
18+
19+
func (n *EnsureStableCluster) Run() error {
20+
config, err := clientcmd.BuildConfigFromFlags("", n.KubeConfigFilePath)
21+
if err != nil {
22+
return fmt.Errorf("error building kubeconfig: %w", err)
23+
}
24+
25+
clientset, err := kubernetes.NewForConfig(config)
26+
if err != nil {
27+
return fmt.Errorf("error creating Kubernetes client: %w", err)
28+
}
29+
30+
err = WaitForPodReady(context.TODO(), clientset, n.PodNamespace, n.LabelSelector)
31+
if err != nil {
32+
return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
33+
}
34+
return nil
35+
}
36+
37+
func (n *EnsureStableCluster) Prevalidate() error {
38+
return nil
39+
}
40+
41+
func (n *EnsureStableCluster) Stop() error {
42+
return nil
43+
}

test/e2e/framework/kubernetes/port-forward.go

+14-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"time"
1212

1313
retry "github.com/microsoft/retina/test/retry"
14+
v1 "k8s.io/api/core/v1"
1415
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1516
"k8s.io/client-go/kubernetes"
1617
"k8s.io/client-go/tools/clientcmd"
@@ -120,14 +121,23 @@ func (p *PortForward) Run() error {
120121
}
121122

122123
func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kubernetes.Clientset) (string, error) {
123-
targetPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
124+
targetPodsAll, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
124125
LabelSelector: p.LabelSelector,
125126
FieldSelector: "status.phase=Running",
126127
})
127128
if errAffinity != nil {
128129
return "", fmt.Errorf("could not list pods in %q with label %q: %w", p.Namespace, p.LabelSelector, errAffinity)
129130
}
130131

132+
// omit windows pods because we can't port-forward to them
133+
targetPodsLinux := make([]v1.Pod, 0)
134+
for i := range targetPodsAll.Items {
135+
if targetPodsAll.Items[i].Spec.NodeSelector["kubernetes.io/os"] != "windows" {
136+
targetPodsLinux = append(targetPodsLinux, targetPodsAll.Items[i])
137+
}
138+
}
139+
140+
// get all pods with optional label affinity
131141
affinityPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
132142
LabelSelector: p.OptionalLabelAffinity,
133143
FieldSelector: "status.phase=Running",
@@ -143,10 +153,10 @@ func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kuber
143153
}
144154

145155
// if a pod is found on the same node as an affinity pod, use it
146-
for i := range targetPods.Items {
147-
if affinityNodes[targetPods.Items[i].Spec.NodeName] {
156+
for i := range targetPodsLinux {
157+
if affinityNodes[targetPodsLinux[i].Spec.NodeName] {
148158
// found a pod with the specified label, on a node with the optional label affinity
149-
return targetPods.Items[i].Name, nil
159+
return targetPodsLinux[i].Name, nil
150160
}
151161
}
152162

test/e2e/framework/kubernetes/wait-pod-ready.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,18 @@ import (
1515
const (
1616
RetryTimeoutPodsReady = 5 * time.Minute
1717
RetryIntervalPodsReady = 5 * time.Second
18+
19+
printInterval = 5 // print to stdout every 5 iterations
1820
)
1921

2022
func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
2123
podReadyMap := make(map[string]bool)
2224

25+
printIterator := 0
2326
conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
27+
defer func() {
28+
printIterator++
29+
}()
2430
var podList *corev1.PodList
2531
podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
2632
if err != nil {
@@ -40,11 +46,21 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
4046
return false, fmt.Errorf("error getting Pod: %w", err)
4147
}
4248

49+
for istatus := range pod.Status.ContainerStatuses {
50+
status := &pod.Status.ContainerStatuses[istatus]
51+
if status.RestartCount > 0 {
52+
return false, fmt.Errorf("pod %s has %d restarts: status: %+v: %w", pod.Name, status.RestartCount, status, ErrPodCrashed)
53+
}
54+
}
55+
4356
// Check the Pod phase
4457
if pod.Status.Phase != corev1.PodRunning {
45-
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
58+
if printIterator%printInterval == 0 {
59+
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
60+
}
4661
return false, nil
4762
}
63+
4864
if !podReadyMap[pod.Name] {
4965
log.Printf("pod \"%s\" is in Running state\n", pod.Name)
5066
podReadyMap[pod.Name] = true
@@ -56,6 +72,7 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
5672

5773
err := wait.PollUntilContextCancel(ctx, RetryIntervalPodsReady, true, conditionFunc)
5874
if err != nil {
75+
PrintPodLogs(ctx, clientset, namespace, labelSelector)
5976
return fmt.Errorf("error waiting for pods in namespace \"%s\" with label \"%s\" to be in Running state: %w", namespace, labelSelector, err)
6077
}
6178
return nil

0 commit comments

Comments
 (0)