diff --git a/.github/workflows/scale-test.yaml b/.github/workflows/scale-test.yaml index cec9fd7dae..36a70fe84d 100644 --- a/.github/workflows/scale-test.yaml +++ b/.github/workflows/scale-test.yaml @@ -8,13 +8,15 @@ on: required: true type: string cluster_name: - description: "AKS Cluster Name" + description: "AKS Cluster Name (nodes to receive traffic pods should be labeled with scale-test=true)" required: true type: string - location: - description: "Azure Location" + image_namespace: + description: "Image Namespace (if not set, default namespace will be used)" + type: string + image_tag: + description: "Image Tag (if not set, default for this commit will be used)" type: string - default: ${{ vars.AZURE_LOCATION }} num_deployments: description: "Number of Traffic Deployments" default: 1000 @@ -27,20 +29,8 @@ on: description: "Number of Network Policies" default: 1000 type: number - num_unique_labels_per_pod: - description: "Number of Unique Labels per Pod" - default: 2 - type: number - num_unique_labels_per_deployment: - description: "Number of Unique Labels per Deployment" - default: 2 - type: number - num_shared_labels_per_pod: - description: "Number of Shared Labels per Pod" - default: 3 - type: number - delete_labels: - description: "Delete Labels" + cleanup: + description: "Clean up environment after test" default: true type: boolean @@ -54,10 +44,6 @@ on: description: "AKS Cluster Name" required: true type: string - location: - description: "Azure Location" - type: string - default: ${{ vars.AZURE_LOCATION }} num_deployments: description: "Number of Traffic Deployments" default: 1000 @@ -70,20 +56,8 @@ on: description: "Number of Network Policies" default: 1000 type: number - num_unique_labels_per_pod: - description: "Number of Unique Labels per Pod" - default: 2 - type: number - num_unique_labels_per_deployment: - description: "Number of Unique Labels per Deployment" - default: 2 - type: number - num_shared_labels_per_pod: - description: "Number of Shared Labels per Pod" - default: 3 - type: number - delete_labels: - description: "Delete Labels" + cleanup: + description: "Clean up environment after test" default: true type: boolean @@ -116,17 +90,17 @@ jobs: - name: Run Scale Test env: AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION }} - AZURE_LOCATION: ${{ input.location }} AZURE_RESOURCE_GROUP: ${{ inputs.resource_group }} CLUSTER_NAME: ${{ inputs.cluster_name }} NUM_DEPLOYMENTS: ${{ inputs.num_deployments }} NUM_REPLICAS: ${{ inputs.num_replicas }} NUM_NETPOLS: ${{ inputs.num_netpol }} - NUM_UNIQUE_LABELS_PER_POD: ${{ inputs.num_unique_labels_per_pod }} - NUM_SHARED_LABELS_PER_POD: ${{ inputs.num_shared_labels_per_pod }} - NUM_UNIQUE_LABELS_PER_DEPLOYMENT: ${{ inputs.num_unique_labels_per_deployment }} - DELETE_LABELS: ${{ inputs.delete_labels }} + CLEANUP: ${{ inputs.cleanup }} + IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }} + IMAGE_NAMESPACE: ${{ github.repository }} + TAG: ${{ inputs.image_tag }} + AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }} shell: bash run: | set -euo pipefail - go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$(make version) -image-registry=${{vars.ACR_NAME}} -image-namespace=${{github.repository}} + go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false diff --git a/go.mod b/go.mod index 9cbd05643e..8bff51c014 100644 --- a/go.mod +++ b/go.mod @@ -333,6 +333,7 @@ require ( k8s.io/apiextensions-apiserver v0.30.3 k8s.io/cli-runtime v0.30.3 k8s.io/kubectl v0.30.3 + k8s.io/metrics v0.30.3 k8s.io/perf-tests/network/benchmarks/netperf v0.0.0-00010101000000-000000000000 sigs.k8s.io/controller-runtime v0.18.5 ) diff --git a/go.sum b/go.sum index 31a16c4184..5f553a5dd7 100644 --- a/go.sum +++ b/go.sum @@ -1201,6 +1201,8 @@ k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 h1:1dWzkmJrrprYvjGwh9kEUx k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA= k8s.io/kubectl v0.30.3 h1:YIBBvMdTW0xcDpmrOBzcpUVsn+zOgjMYIu7kAq+yqiI= k8s.io/kubectl v0.30.3/go.mod h1:IcR0I9RN2+zzTRUa1BzZCm4oM0NLOawE6RzlDvd1Fpo= +k8s.io/metrics v0.30.3 h1:gKCpte5zykrOmQhZ8qmsxyJslMdiLN+sqbBfIWNpbGM= +k8s.io/metrics v0.30.3/go.mod h1:W06L2nXRhOwPkFYDJYWdEIS3u6JcJy3ebIPYbndRs6A= k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 h1:MDF6h2H/h4tbzmtIKTuctcwZmY0tY9mD9fNT47QO6HI= k8s.io/utils v0.0.0-20240921022957-49e7df575cb6/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo= diff --git a/test/e2e/common/common.go b/test/e2e/common/common.go index f3e5e7333b..4bb1336b55 100644 --- a/test/e2e/common/common.go +++ b/test/e2e/common/common.go @@ -5,6 +5,7 @@ package common import ( + "flag" "os" "os/user" "strconv" @@ -17,12 +18,17 @@ import ( const ( RetinaPort int = 10093 // netObsRGtag is used to tag resources created by this test suite - NetObsRGtag = "-e2e-netobs-" - KubeSystemNamespace = "kube-system" - TestPodNamespace = "kube-system-test" + NetObsRGtag = "-e2e-netobs-" + KubeSystemNamespace = "kube-system" + TestPodNamespace = "kube-system-test" + AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY" ) -var AzureLocations = []string{"eastus2", "northeurope", "uksouth", "centralindia", "westus2"} +var ( + AzureLocations = []string{"eastus2", "northeurope", "uksouth", "centralindia", "westus2"} + CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing") + DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing") +) func ClusterNameForE2ETest(t *testing.T) string { clusterName := os.Getenv("CLUSTER_NAME") diff --git a/test/e2e/framework/azure/get-fqdn.go b/test/e2e/framework/azure/get-fqdn.go new file mode 100644 index 0000000000..f69b06ad38 --- /dev/null +++ b/test/e2e/framework/azure/get-fqdn.go @@ -0,0 +1,27 @@ +package azure + +import ( + "context" + "fmt" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4" +) + +func GetFqdnFn(subscriptionId, resourceGroupName, clusterName string) (string, error) { + cred, err := azidentity.NewAzureCLICredential(nil) + if err != nil { + return "", fmt.Errorf("failed to obtain a credential: %w", err) + } + ctx := context.Background() + clientFactory, err := armcontainerservice.NewClientFactory(subscriptionId, cred, nil) + if err != nil { + return "", fmt.Errorf("failed to create client: %w", err) + } + res, err := clientFactory.NewManagedClustersClient().Get(ctx, resourceGroupName, clusterName, nil) + if err != nil { + return "", fmt.Errorf("failed to finish the get managed cluster client request: %w", err) + } + + return *res.Properties.Fqdn, nil +} diff --git a/test/e2e/framework/kubernetes/install-retina-helm.go b/test/e2e/framework/kubernetes/install-retina-helm.go index c00270a8c9..7f1828f17c 100644 --- a/test/e2e/framework/kubernetes/install-retina-helm.go +++ b/test/e2e/framework/kubernetes/install-retina-helm.go @@ -19,7 +19,7 @@ import ( const ( createTimeout = 20 * time.Minute // windows is slow - deleteTimeout = 60 * time.Second + deleteTimeout = 5 * time.Minute ) var ( diff --git a/test/e2e/framework/kubernetes/uninstall-helm.go b/test/e2e/framework/kubernetes/uninstall-helm.go new file mode 100644 index 0000000000..539915b4ab --- /dev/null +++ b/test/e2e/framework/kubernetes/uninstall-helm.go @@ -0,0 +1,45 @@ +package kubernetes + +import ( + "fmt" + "log" + "os" + + "helm.sh/helm/v3/pkg/action" + "helm.sh/helm/v3/pkg/cli" +) + +type UninstallHelmChart struct { + Namespace string + ReleaseName string + KubeConfigFilePath string +} + +func (i *UninstallHelmChart) Run() error { + settings := cli.New() + settings.KubeConfig = i.KubeConfigFilePath + actionConfig := new(action.Configuration) + + err := actionConfig.Init(settings.RESTClientGetter(), i.Namespace, os.Getenv("HELM_DRIVER"), log.Printf) + if err != nil { + return fmt.Errorf("failed to initialize helm action config: %w", err) + } + + delclient := action.NewUninstall(actionConfig) + delclient.Wait = true + delclient.Timeout = deleteTimeout + _, err = delclient.Run(i.ReleaseName) + if err != nil { + return fmt.Errorf("failed to delete existing release %s: %w", i.ReleaseName, err) + } + + return nil +} + +func (i *UninstallHelmChart) Prevalidate() error { + return nil +} + +func (i *UninstallHelmChart) Stop() error { + return nil +} diff --git a/test/e2e/framework/scaletest/create-resources.go b/test/e2e/framework/scaletest/create-resources.go index f709ba3991..688ab57747 100644 --- a/test/e2e/framework/scaletest/create-resources.go +++ b/test/e2e/framework/scaletest/create-resources.go @@ -85,7 +85,9 @@ func (c *CreateResources) getResources() []runtime.Object { kapingerClusterRoleBinding := kapinger.GetKapingerClusterRoleBinding() - objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding) + kapingerSA := kapinger.GetKapingerServiceAccount() + + objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding, kapingerSA) // c.generateKwokNodes() log.Println("Finished generating YAMLs") return objs @@ -101,6 +103,9 @@ func (c *CreateResources) generateDeployments() []runtime.Object { } template := kapinger.GetKapingerDeployment() + if template.Labels == nil { + template.Labels = make(map[string]string) + } template.Labels["is-real"] = "true" template.Spec.Template.Labels["is-real"] = "true" template.Spec.Template.Spec.NodeSelector["scale-test"] = "true" diff --git a/test/e2e/framework/scaletest/get-publish-metrics.go b/test/e2e/framework/scaletest/get-publish-metrics.go new file mode 100644 index 0000000000..3495addf33 --- /dev/null +++ b/test/e2e/framework/scaletest/get-publish-metrics.go @@ -0,0 +1,212 @@ +package scaletest + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "sync" + "time" + + "github.com/microsoft/retina/pkg/telemetry" + "github.com/microsoft/retina/test/e2e/common" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + metrics "k8s.io/metrics/pkg/client/clientset/versioned" +) + +type GetAndPublishMetrics struct { + KubeConfigFilePath string + AdditionalTelemetryProperty map[string]string + Labels map[string]string + OutputFilePath string + stop chan struct{} + wg sync.WaitGroup + telemetryClient *telemetry.TelemetryClient + appInsightsKey string +} + +func (g *GetAndPublishMetrics) Run() error { + if g.appInsightsKey != "" { + telemetry.InitAppInsights(g.appInsightsKey, g.AdditionalTelemetryProperty["retinaVersion"]) + + telemetryClient, err := telemetry.NewAppInsightsTelemetryClient("retina-scale-test", g.AdditionalTelemetryProperty) + if err != nil { + return errors.Wrap(err, "error creating telemetry client") + } + + g.telemetryClient = telemetryClient + } + + g.stop = make(chan struct{}) + g.wg.Add(1) + + go func() { + + t := time.NewTicker(5 * time.Minute) + + for { + select { + + case <-t.C: + err := g.getAndPublishMetrics() + if err != nil { + log.Fatalf("error getting and publishing number of restarts: %v", err) + return + } + + case <-g.stop: + g.wg.Done() + return + + } + } + + }() + + return nil +} + +func (g *GetAndPublishMetrics) Stop() error { + telemetry.ShutdownAppInsights() + close(g.stop) + g.wg.Wait() + return nil +} + +func (g *GetAndPublishMetrics) Prevalidate() error { + if os.Getenv(common.AzureAppInsightsKeyEnv) == "" { + log.Println("env ", common.AzureAppInsightsKeyEnv, " not provided") + } + g.appInsightsKey = os.Getenv(common.AzureAppInsightsKeyEnv) + + if _, ok := g.AdditionalTelemetryProperty["retinaVersion"]; !ok { + return fmt.Errorf("retinaVersion is required in AdditionalTelemetryProperty") + } + return nil +} + +func (g *GetAndPublishMetrics) getAndPublishMetrics() error { + + config, err := clientcmd.BuildConfigFromFlags("", g.KubeConfigFilePath) + if err != nil { + return fmt.Errorf("error building kubeconfig: %w", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return fmt.Errorf("error creating Kubernetes client: %w", err) + } + + mc, err := metrics.NewForConfig(config) + if err != nil { + return fmt.Errorf("error creating metrics client: %w", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second) + defer cancel() + + metrics, err := g.getMetrics(ctx, clientset, mc) + if err != nil { + return fmt.Errorf("error getting metrics: %w", err) + } + + // Publish metrics + if g.telemetryClient != nil { + log.Println("Publishing metrics to AppInsights") + for _, metric := range metrics { + g.telemetryClient.TrackEvent("scale-test", metric) + + } + } + + // Write metrics to file + if g.OutputFilePath != "" { + log.Println("Writing metrics to file ", g.OutputFilePath) + file, err := os.OpenFile(g.OutputFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("error writing to csv file: %w", err) + } + defer file.Close() + + for _, m := range metrics { + b, err := json.Marshal(m) + if err != nil { + return fmt.Errorf("error marshalling metric: %w", err) + } + file.Write(b) + file.WriteString("\n") + } + + } + + return nil +} + +type metric map[string]string + +func (g *GetAndPublishMetrics) getMetrics(ctx context.Context, k8sClient *kubernetes.Clientset, metricsClient *metrics.Clientset) ([]metric, error) { + + labelSelector := labels.Set(g.Labels).String() + + pods, err := k8sClient.CoreV1().Pods(common.KubeSystemNamespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) + if err != nil { + return nil, errors.Wrap(err, "error getting nodes") + } + + nodesMetricsInt := metricsClient.MetricsV1beta1().NodeMetricses() + podMetricsInt := metricsClient.MetricsV1beta1().PodMetricses(common.KubeSystemNamespace) + + var allPodsHealth []metric + + timestamp := time.Now().UTC().Format(time.RFC3339) + + for _, pod := range pods.Items { + var podHealth metric = make(map[string]string) + + podMetrics, err := podMetricsInt.Get(ctx, pod.Name, metav1.GetOptions{}) + if err != nil { + return nil, errors.Wrap(err, "error getting pod metrics") + } + + podMem := resource.MustParse("0") + podCpu := resource.MustParse("0") + for _, cm := range podMetrics.Containers { + podMem.Add(cm.Usage["memory"]) + podCpu.Add(cm.Usage["cpu"]) + } + + nodeMetrics, err := nodesMetricsInt.Get(ctx, pod.Spec.NodeName, metav1.GetOptions{}) + if err != nil { + return nil, errors.Wrap(err, "error getting node metrics") + } + + nodeMem := nodeMetrics.Usage["memory"] + nodeCpu := nodeMetrics.Usage["cpu"] + + restarts := 0 + + for _, containerStatus := range pod.Status.ContainerStatuses { + restarts = restarts + int(containerStatus.RestartCount) + } + + podHealth["timestamp"] = timestamp + podHealth["pod"] = pod.Name + podHealth["podCpuInMilliCore"] = fmt.Sprintf("%d", podCpu.MilliValue()) + podHealth["podMemoryInMB"] = fmt.Sprintf("%d", podMem.Value()/(1048576)) + podHealth["podRestarts"] = fmt.Sprintf("%d", restarts) + podHealth["node"] = pod.Spec.NodeName + podHealth["nodeCpuInMilliCore"] = fmt.Sprintf("%d", nodeCpu.MilliValue()) + podHealth["nodeMemoryInMB"] = fmt.Sprintf("%d", nodeMem.Value()/(1048576)) + + allPodsHealth = append(allPodsHealth, podHealth) + + } + + return allPodsHealth, nil +} diff --git a/test/e2e/framework/scaletest/options.go b/test/e2e/framework/scaletest/options.go index 8ed26b5d3c..6b5284422b 100644 --- a/test/e2e/framework/scaletest/options.go +++ b/test/e2e/framework/scaletest/options.go @@ -35,4 +35,6 @@ type Options struct { DeleteNetworkPoliciesTimes int numKwokPods int numRealPods int + LabelsToGetMetrics map[string]string + AdditionalTelemetryProperty map[string]string } diff --git a/test/e2e/framework/scaletest/templates/networkpolicy.go b/test/e2e/framework/scaletest/templates/networkpolicy.go index 7114fb2cad..2eef3e3161 100644 --- a/test/e2e/framework/scaletest/templates/networkpolicy.go +++ b/test/e2e/framework/scaletest/templates/networkpolicy.go @@ -19,6 +19,9 @@ var ( "Ingress", "Egress", }, + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{}, + }, }, } ) diff --git a/test/e2e/jobs/jobs.go b/test/e2e/jobs/jobs.go index 925df6dc01..ddc58b5daf 100644 --- a/test/e2e/jobs/jobs.go +++ b/test/e2e/jobs/jobs.go @@ -94,6 +94,18 @@ func InstallRetina(kubeConfigFilePath, chartPath string) *types.Job { return job } +func UninstallRetina(kubeConfigFilePath, chartPath string) *types.Job { + job := types.NewJob("Uninstall Retina") + + job.AddStep(&kubernetes.UninstallHelmChart{ + Namespace: common.KubeSystemNamespace, + ReleaseName: "retina", + KubeConfigFilePath: kubeConfigFilePath, + }, nil) + + return job +} + func InstallAndTestRetinaBasicMetrics(kubeConfigFilePath, chartPath string, testPodNamespace string) *types.Job { job := types.NewJob("Install and test Retina with basic metrics") diff --git a/test/e2e/jobs/scale.go b/test/e2e/jobs/scale.go index 0e0e48d0f8..89215785c1 100644 --- a/test/e2e/jobs/scale.go +++ b/test/e2e/jobs/scale.go @@ -1,6 +1,7 @@ package retina import ( + "os" "time" "github.com/microsoft/retina/test/e2e/framework/kubernetes" @@ -15,14 +16,14 @@ func DefaultScaleTestOptions() scaletest.Options { NumKwokDeployments: 0, NumKwokReplicas: 0, MaxRealPodsPerNode: 100, - NumRealDeployments: 3, - RealPodType: "agnhost", - NumRealReplicas: 2, - NumRealServices: 1, + NumRealDeployments: 1000, + RealPodType: "kapinger", + NumRealReplicas: 40, + NumRealServices: 1000, NumNetworkPolicies: 10, NumUnapliedNetworkPolicies: 10, - NumUniqueLabelsPerPod: 2, - NumUniqueLabelsPerDeployment: 2, + NumUniqueLabelsPerPod: 0, + NumUniqueLabelsPerDeployment: 1, NumSharedLabelsPerPod: 3, KubeconfigPath: "", RestartNpmPods: false, @@ -37,6 +38,8 @@ func DefaultScaleTestOptions() scaletest.Options { DeleteNetworkPolicies: false, DeleteNetworkPoliciesInterval: 60 * time.Second, DeleteNetworkPoliciesTimes: 1, + LabelsToGetMetrics: map[string]string{}, + AdditionalTelemetryProperty: map[string]string{}, } } @@ -60,6 +63,15 @@ func ScaleTest(opt *scaletest.Options) *types.Job { job.AddStep(&kubernetes.CreateNamespace{}, nil) + job.AddStep(&scaletest.GetAndPublishMetrics{ + Labels: opt.LabelsToGetMetrics, + AdditionalTelemetryProperty: opt.AdditionalTelemetryProperty, + OutputFilePath: os.Getenv("OUTPUT_FILEPATH"), + }, &types.StepOptions{ + SkipSavingParametersToJob: true, + RunInBackgroundWithID: "get-metrics", + }) + job.AddStep(&scaletest.CreateResources{ NumKwokDeployments: opt.NumKwokDeployments, NumKwokReplicas: opt.NumKwokReplicas, @@ -95,13 +107,11 @@ func ScaleTest(opt *scaletest.Options) *types.Job { NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod, }, nil) - // TODO: Add steps to get the state of the cluster - - // job.AddStep(&kubernetes.GetDeployment{}) - - // job.AddStep(&kubernetes.GetDaemonSet{}) + job.AddStep(&types.Stop{ + BackgroundID: "get-metrics", + }, nil) - // job.AddStep(&kubernetes.DescribePods{}) + job.AddStep(&kubernetes.DeleteNamespace{}, nil) return job } diff --git a/test/e2e/retina_e2e_test.go b/test/e2e/retina_e2e_test.go index d1b1b16e95..546f596bb5 100644 --- a/test/e2e/retina_e2e_test.go +++ b/test/e2e/retina_e2e_test.go @@ -4,7 +4,6 @@ package retina import ( "crypto/rand" - "flag" "math/big" "os" "path/filepath" @@ -17,18 +16,11 @@ import ( "github.com/stretchr/testify/require" ) -var ( - createInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing") - deleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing") -) - // TestE2ERetina tests all e2e scenarios for retina func TestE2ERetina(t *testing.T) { ctx, cancel := helpers.Context(t) defer cancel() - flag.Parse() - // Truncate the username to 8 characters clusterName := common.ClusterNameForE2ETest(t) @@ -62,11 +54,11 @@ func TestE2ERetina(t *testing.T) { kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem") // CreateTestInfra - createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *createInfra)) + createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *common.CreateInfra)) createTestInfra.Run(ctx) t.Cleanup(func() { - if *deleteInfra { + if *common.DeleteInfra { _ = jobs.DeleteTestInfra(subID, rg, clusterName, location).Run() } }) diff --git a/test/e2e/retina_perf_test.go b/test/e2e/retina_perf_test.go index 5c24aa24cb..38651bc0ea 100644 --- a/test/e2e/retina_perf_test.go +++ b/test/e2e/retina_perf_test.go @@ -46,7 +46,7 @@ func TestE2EPerfRetina(t *testing.T) { cwd, err := os.Getwd() require.NoError(t, err) - appInsightsKey := os.Getenv("AZURE_APP_INSIGHTS_KEY") + appInsightsKey := os.Getenv(common.AzureAppInsightsKeyEnv) if appInsightsKey == "" { t.Log("No app insights key provided, results will be saved locally at ./ as `netperf-benchmark-*`, `netperf-result-*`, and `netperf-regression-*`") } diff --git a/test/e2e/scale_test.go b/test/e2e/scale_test.go index fe73c85f69..6769dccc09 100644 --- a/test/e2e/scale_test.go +++ b/test/e2e/scale_test.go @@ -7,9 +7,12 @@ import ( "math/big" "os" "path/filepath" + "strconv" "testing" "github.com/microsoft/retina/test/e2e/common" + "github.com/microsoft/retina/test/e2e/framework/azure" + "github.com/microsoft/retina/test/e2e/framework/generic" "github.com/microsoft/retina/test/e2e/framework/helpers" "github.com/microsoft/retina/test/e2e/framework/types" jobs "github.com/microsoft/retina/test/e2e/jobs" @@ -49,25 +52,64 @@ func TestE2ERetina_Scale(t *testing.T) { chartPath := filepath.Join(rootDir, "deploy", "legacy", "manifests", "controller", "helm", "retina") kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem") + // Scale test parameters + opt := jobs.DefaultScaleTestOptions() + opt.KubeconfigPath = kubeConfigFilePath + + NumDeployments := os.Getenv("NUM_DEPLOYMENTS") + NumReplicas := os.Getenv("NUM_REPLICAS") + NumNetworkPolicies := os.Getenv("NUM_NET_POL") + CleanUp := os.Getenv("CLEANUP") + + if NumDeployments != "" { + opt.NumRealDeployments, err = strconv.Atoi(NumDeployments) + opt.NumRealServices = opt.NumRealDeployments + require.NoError(t, err) + } + if NumReplicas != "" { + opt.NumRealReplicas, err = strconv.Atoi(NumReplicas) + require.NoError(t, err) + } + if NumNetworkPolicies != "" { + opt.NumNetworkPolicies, err = strconv.Atoi(NumNetworkPolicies) + require.NoError(t, err) + } + if CleanUp != "" { + opt.DeleteLabels, err = strconv.ParseBool(CleanUp) + require.NoError(t, err) + } + + RetinaVersion := os.Getenv(generic.DefaultTagEnv) + require.NotEmpty(t, RetinaVersion) + opt.AdditionalTelemetryProperty["retinaVersion"] = RetinaVersion + opt.AdditionalTelemetryProperty["clusterName"] = clusterName + + // AppInsightsKey is required for telemetry + require.NotEmpty(t, os.Getenv(common.AzureAppInsightsKeyEnv)) + + opt.LabelsToGetMetrics = map[string]string{"k8s-app": "retina"} + // CreateTestInfra - createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *createInfra)) + createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *common.CreateInfra)) createTestInfra.Run(ctx) t.Cleanup(func() { - if *deleteInfra { + if *common.DeleteInfra { _ = jobs.DeleteTestInfra(subID, rg, clusterName, location).Run() } }) + fqdn, err := azure.GetFqdnFn(subID, rg, clusterName) + require.NoError(t, err) + opt.AdditionalTelemetryProperty["clusterFqdn"] = fqdn + // Install Retina installRetina := types.NewRunner(t, jobs.InstallRetina(kubeConfigFilePath, chartPath)) installRetina.Run(ctx) - // Scale test - opt := jobs.DefaultScaleTestOptions() - opt.KubeconfigPath = kubeConfigFilePath - opt.RealPodType = "kapinger" - opt.DeleteLabels = true + t.Cleanup(func() { + _ = jobs.UninstallRetina(kubeConfigFilePath, chartPath).Run() + }) scale := types.NewRunner(t, jobs.ScaleTest(&opt)) scale.Run(ctx) diff --git a/test/e2e/scenarios/perf/publish-perf-results.go b/test/e2e/scenarios/perf/publish-perf-results.go index 49d77ce0ee..4f1dd05344 100644 --- a/test/e2e/scenarios/perf/publish-perf-results.go +++ b/test/e2e/scenarios/perf/publish-perf-results.go @@ -8,6 +8,7 @@ import ( "os" "github.com/microsoft/retina/pkg/telemetry" + "github.com/microsoft/retina/test/e2e/common" "github.com/microsoft/retina/test/e2e/framework/generic" "github.com/pkg/errors" ) @@ -21,7 +22,7 @@ func (v *PublishPerfResults) Prevalidate() error { } func (v *PublishPerfResults) Run() error { - appInsightsKey := os.Getenv("AZURE_APP_INSIGHTS_KEY") + appInsightsKey := os.Getenv(common.AzureAppInsightsKeyEnv) if appInsightsKey == "" { log.Println("No app insights key provided, skipping publishing results") return nil