Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: automate scale test execution #1248

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/daily-scale-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Daily Scale Test

on:
push:
branches:
- alexcastilio/scale-test-workflow
schedule:
- cron: "0 0 * * *"

permissions:
contents: read
id-token: write

jobs:
call-scale-test:
uses: ./.github/workflows/scale-test.yaml
with:
num_deployments: 1000
num_replicas: 20
# TODO: Fix values
num_netpol: 0
num_nodes: 1000
cleanup: false
secrets: inherit
26 changes: 12 additions & 14 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:
description: "Image Namespace (if not set, default namespace will be used)"
type: string
image_tag:
description: "Image Tag (if not set, default for this commit will be used)"
description: "Image Tag (if not set, latest commit from 'main' will be used)"
type: string
num_deployments:
description: "Number of Traffic Deployments"
Expand All @@ -36,25 +36,21 @@ on:

workflow_call:
inputs:
resource_group:
description: "Azure Resource Group"
required: true
type: string
cluster_name:
description: "AKS Cluster Name"
required: true
type: string
num_deployments:
description: "Number of Traffic Deployments"
default: 1000
default: 100
type: number
num_replicas:
description: "Number of Traffic Replicas per Deployment"
default: 40
default: 10
type: number
num_netpol:
description: "Number of Network Policies"
default: 1000
default: 100
type: number
num_nodes:
description: "Number of nodes per pool"
default: 100
type: number
cleanup:
description: "Clean up environment after test"
Expand Down Expand Up @@ -100,8 +96,10 @@ jobs:
IMAGE_NAMESPACE: ${{ inputs.image_namespace == '' && github.repository || inputs.image_namespace }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
NODES: ${{ inputs.num_nodes }}
CREATE_INFRA: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
set -euo pipefail
[[ $TAG == "" ]] && TAG=$(make version)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
[[ $TAG == "" ]] && TAG=$(curl -s https://api.github.com/repos/microsoft/retina/commits | jq -r '.[0].sha' | cut -c1-7)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=$(echo $CREATE_INFRA) -delete-infra=$(echo $CREATE_INFRA)
53 changes: 51 additions & 2 deletions test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ package common

import (
"flag"
"os"
"os/user"
"path/filepath"
"strconv"
"testing"
"time"

"github.com/microsoft/retina/test/e2e/framework/params"
"github.com/stretchr/testify/require"
)

Expand All @@ -31,6 +31,13 @@ var (
Architectures = []string{"amd64", "arm64"}
CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
ScaleTestInfra = ScaleTestInfraHandler{
location: params.Location,
subscriptionID: params.SubscriptionID,
resourceGroup: params.ResourceGroup,
clusterName: params.ClusterName,
nodes: params.Nodes,
}

// kubeconfig: path to kubeconfig file, in not provided,
// a new k8s cluster will be created
Expand All @@ -49,8 +56,50 @@ var (
}
)

type ScaleTestInfraHandler struct {
location string
subscriptionID string
resourceGroup string
clusterName string
nodes string
}

func (s ScaleTestInfraHandler) GetSubscriptionID() string {
return s.subscriptionID
}

func (s ScaleTestInfraHandler) GetLocation() string {
if s.location == "" {
return "westus2"
}
return s.location
}

func (s ScaleTestInfraHandler) GetResourceGroup() string {
if s.resourceGroup != "" {
return s.resourceGroup
}
// Use the cluster name as the resource group name by default.
return s.GetClusterName()
}

func (s ScaleTestInfraHandler) GetNodes() string {
if s.nodes == "" {
// Default to 100 nodes per pool
return "100"
}
return s.nodes
}

func (s ScaleTestInfraHandler) GetClusterName() string {
if s.clusterName != "" {
return s.clusterName
}
return "retina-scale-test"
}

func ClusterNameForE2ETest(t *testing.T) string {
clusterName := os.Getenv("CLUSTER_NAME")
clusterName := params.ClusterName
if clusterName == "" {
curuser, err := user.Current()
require.NoError(t, err)
Expand Down
44 changes: 43 additions & 1 deletion test/e2e/framework/azure/create-cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package azure
import (
"context"
"fmt"
"log"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
Expand All @@ -23,6 +24,24 @@ type CreateCluster struct {
ResourceGroupName string
Location string
ClusterName string
podCidr string
vmSize string
networkPluginMode string
Nodes int32
}

func (c *CreateCluster) SetPodCidr(podCidr string) *CreateCluster {
c.podCidr = podCidr
return c
}

func (c *CreateCluster) SetVMSize(vmSize string) *CreateCluster {
c.vmSize = vmSize
return c
}
func (c *CreateCluster) SetNetworkPluginMode(networkPluginMode string) *CreateCluster {
c.networkPluginMode = networkPluginMode
return c
}

func (c *CreateCluster) Run() error {
Expand All @@ -36,15 +55,38 @@ func (c *CreateCluster) Run() error {
if err != nil {
return fmt.Errorf("failed to create client: %w", err)
}
if c.Nodes == 0 {
c.Nodes = MaxNumberOfNodes
}

template := GetStarterClusterTemplate(c.Location)

if c.Nodes > 0 {
template.Properties.AgentPoolProfiles[0].Count = to.Ptr(c.Nodes)
}

if c.podCidr != "" {
template.Properties.NetworkProfile.PodCidr = to.Ptr(c.podCidr)
}

if c.vmSize != "" {
template.Properties.AgentPoolProfiles[0].VMSize = to.Ptr(c.vmSize)
}

if c.networkPluginMode != "" {
template.Properties.NetworkProfile.NetworkPluginMode = to.Ptr(armcontainerservice.NetworkPluginMode(c.networkPluginMode))
}

poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location), nil)
log.Printf("creating cluster %s in location %s...", c.ClusterName, c.Location)
poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, template, nil)
if err != nil {
return fmt.Errorf("failed to finish the create cluster request: %w", err)
}
_, err = poller.PollUntilDone(ctx, nil)
if err != nil {
return fmt.Errorf("failed to pull the create cluster result: %w", err)
}
log.Printf("cluster created %s in location %s...", c.ClusterName, c.Location)

return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
Containers: []v1.Container{
{
Name: "kapinger",
Image: "acnpublic.azurecr.io/kapinger:20241014.7",
Image: "acnpublic.azurecr.io/kapinger:v0.0.23-9-g23ef222",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"memory": resource.MustParse("20Mi"),
Expand Down
76 changes: 76 additions & 0 deletions test/e2e/framework/kubernetes/label-nodes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package kubernetes

import (
"context"
"encoding/json"
"fmt"
"log"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

type patchStringValue struct {
Op string `json:"op"`
Path string `json:"path"`
Value string `json:"value"`
}

type LabelNodes struct {
KubeConfigFilePath string
Labels map[string]string
}

func (l *LabelNodes) Prevalidate() error {
return nil
}

func (l *LabelNodes) Run() error {
config, err := clientcmd.BuildConfigFromFlags("", l.KubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
defer cancel()

nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to get nodes: %w", err)
}

patch := []patchStringValue{}
for k, v := range l.Labels {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/" + k,
Value: v,
})
}
b, err := json.Marshal(patch)
if err != nil {
return fmt.Errorf("failed to marshal patch: %w", err)
}

for i := range nodes.Items {
log.Println("Labeling node", nodes.Items[i].Name)
_, err = clientset.CoreV1().Nodes().Patch(ctx, nodes.Items[i].Name, types.JSONPatchType, b, metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to patch pod: %w", err)
}
}

return nil
}

func (l *LabelNodes) Stop() error {
return nil
}
17 changes: 17 additions & 0 deletions test/e2e/framework/params/params.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package params

import (
"os"
)

var (
Location = os.Getenv("LOCATION")
SubscriptionID = os.Getenv("AZURE_SUBSCRIPTION_ID")
ResourceGroup = os.Getenv("AZURE_RESOURCE_GROUP")
ClusterName = os.Getenv("CLUSTER_NAME")
Nodes = os.Getenv("NODES")
NumDeployments = os.Getenv("NUM_DEPLOYMENTS")
NumReplicas = os.Getenv("NUM_REPLICAS")
NumNetworkPolicies = os.Getenv("NUM_NET_POL")
CleanUp = os.Getenv("CLEANUP")
)
47 changes: 47 additions & 0 deletions test/e2e/jobs/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"time"

"github.com/microsoft/retina/test/e2e/common"
"github.com/microsoft/retina/test/e2e/framework/azure"
"github.com/microsoft/retina/test/e2e/framework/generic"
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/e2e/framework/scaletest"
"github.com/microsoft/retina/test/e2e/framework/types"
Expand Down Expand Up @@ -45,6 +47,51 @@ func DefaultScaleTestOptions() scaletest.Options {
}
}

func GetScaleTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string, nodes int32, createInfra bool) *types.Job {
job := types.NewJob("Get scale test infrastructure")

if createInfra {
job.AddStep(&azure.CreateResourceGroup{
SubscriptionID: subID,
ResourceGroupName: rg,
Location: location,
}, nil)

job.AddStep((&azure.CreateCluster{
ClusterName: clusterName,
Nodes: nodes,
}).
SetPodCidr("100.64.0.0/10").
SetVMSize("Standard_D4_v3").
SetNetworkPluginMode("overlay"), nil)

job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: kubeConfigFilePath,
}, nil)

} else {
job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: kubeConfigFilePath,
ClusterName: clusterName,
SubscriptionID: subID,
ResourceGroupName: rg,
Location: location,
}, nil)
}

job.AddStep(&kubernetes.LabelNodes{
Labels: map[string]string{"scale-test": "true"},
}, nil)

job.AddStep(&generic.LoadFlags{
TagEnv: generic.DefaultTagEnv,
ImageNamespaceEnv: generic.DefaultImageNamespace,
ImageRegistryEnv: generic.DefaultImageRegistry,
}, nil)

return job
}

func ScaleTest(opt *scaletest.Options) *types.Job {
job := types.NewJob("Scale Test")

Expand Down
Loading
Loading