Skip to content

Commit 1573846

Browse files
authored
Add waits, check, ctx w/ timeout to stop race cond (#1795)
There’s potential for flakiness due to timing issues. For example, the test assumes that: • The ClusterRoleBinding and token are immediately effective. • The curl pod becomes ready promptly. • The metrics endpoint is available as soon as the pod is ready. Seems like these all can be covered by retry logic in the validate method. Signed-off-by: Brett Tofel <[email protected]>
1 parent 6cf1853 commit 1573846

File tree

1 file changed

+38
-3
lines changed

1 file changed

+38
-3
lines changed

test/e2e/metrics_test.go

+38-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ package e2e
1515

1616
import (
1717
"bytes"
18+
"context"
19+
"fmt"
1820
"io"
1921
"os/exec"
2022
"strings"
2123
"testing"
24+
"time"
2225

2326
"github.com/stretchr/testify/require"
2427

@@ -156,11 +159,43 @@ func (c *MetricsTestConfig) validate(token string) {
156159
require.Contains(c.t, string(output), "200 OK", "Metrics endpoint did not return 200 OK")
157160
}
158161

159-
// cleanup created resources
162+
// cleanup removes the created resources. Uses a context with timeout to prevent hangs.
160163
func (c *MetricsTestConfig) cleanup() {
161164
c.t.Log("Cleaning up resources")
162-
_ = exec.Command(c.client, "delete", "clusterrolebinding", c.clusterBinding, "--ignore-not-found=true").Run()
163-
_ = exec.Command(c.client, "delete", "pod", c.curlPodName, "-n", c.namespace, "--ignore-not-found=true").Run()
165+
_ = exec.Command(c.client, "delete", "clusterrolebinding", c.clusterBinding, "--ignore-not-found=true", "--force").Run()
166+
_ = exec.Command(c.client, "delete", "pod", c.curlPodName, "-n", c.namespace, "--ignore-not-found=true", "--force").Run()
167+
168+
// Create a context with a 60-second timeout.
169+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
170+
defer cancel()
171+
172+
// Wait for the ClusterRoleBinding to be deleted.
173+
if err := waitForDeletion(ctx, c.client, "clusterrolebinding", c.clusterBinding); err != nil {
174+
c.t.Logf("Error waiting for clusterrolebinding deletion: %v", err)
175+
} else {
176+
c.t.Log("ClusterRoleBinding deleted")
177+
}
178+
179+
// Wait for the Pod to be deleted.
180+
if err := waitForDeletion(ctx, c.client, "pod", c.curlPodName, "-n", c.namespace); err != nil {
181+
c.t.Logf("Error waiting for pod deletion: %v", err)
182+
} else {
183+
c.t.Log("Pod deleted")
184+
}
185+
}
186+
187+
// waitForDeletion uses "kubectl wait" to block until the specified resource is deleted
188+
// or until the 60-second timeout is reached.
189+
func waitForDeletion(ctx context.Context, client, resourceType, resourceName string, extraArgs ...string) error {
190+
args := []string{"wait", "--for=delete", resourceType, resourceName}
191+
args = append(args, extraArgs...)
192+
args = append(args, "--timeout=60s")
193+
cmd := exec.CommandContext(ctx, client, args...)
194+
output, err := cmd.CombinedOutput()
195+
if err != nil {
196+
return fmt.Errorf("error waiting for deletion of %s %s: %v, output: %s", resourceType, resourceName, err, string(output))
197+
}
198+
return nil
164199
}
165200

166201
// getComponentNamespace returns the namespace where operator-controller or catalogd is running

0 commit comments

Comments
 (0)