From e45ad213d5e26b681271fd4d898c0dcbda63715f Mon Sep 17 00:00:00 2001 From: Vipul Singh Date: Wed, 28 Jun 2023 12:06:47 -0700 Subject: [PATCH] ci:[CNI] Windows cniv1 load test pipeline (#2024) CI:[CNI] Windows cniv1 load test pipeline --- .../cilium-overlay-load-test-template.yaml | 12 +- .../create-cluster-template.yaml | 5 +- .../pod-deployment-template.yaml | 9 +- .../restart-node-template.yaml | 6 +- .../validate-state-template.yaml | 6 +- .pipelines/cni/pipeline.yaml | 11 + .../windows-cni-load-test-template.yaml | 134 ++++++++++ .../aks-swift/e2e-step-template.yaml | 8 +- .../cilium-overlay-e2e-step-template.yaml | 8 +- .../cilium/cilium-e2e-step-template.yaml | 8 +- Makefile | 15 +- cni/build/windows.Dockerfile | 20 ++ hack/{swift => aks}/Makefile | 21 ++ hack/{swift => aks}/README.md | 1 + hack/{swift => aks}/kube-proxy.json | 0 hack/manifests/windows-update.yaml | 64 +++++ hack/scripts/updatecni.ps1 | 52 ++++ test/integration/load/load_test.go | 10 +- .../load/privileged-daemonset-windows.yaml | 32 +++ ...oyment.yaml => noop-deployment-linux.yaml} | 2 + .../manifests/noop-deployment-windows.yaml | 23 ++ test/internal/k8sutils/utils.go | 4 +- test/validate/client.go | 2 + test/validate/windows_validate.go | 228 ++++++++++++++++++ 24 files changed, 647 insertions(+), 34 deletions(-) create mode 100644 .pipelines/cni/singletenancy/windows-cni-load-test-template.yaml create mode 100644 cni/build/windows.Dockerfile rename hack/{swift => aks}/Makefile (91%) rename hack/{swift => aks}/README.md (95%) rename hack/{swift => aks}/kube-proxy.json (100%) create mode 100644 hack/manifests/windows-update.yaml create mode 100644 hack/scripts/updatecni.ps1 create mode 100644 test/integration/manifests/load/privileged-daemonset-windows.yaml rename test/integration/manifests/{load/noop-deployment.yaml => noop-deployment-linux.yaml} (85%) create mode 100644 test/integration/manifests/noop-deployment-windows.yaml create mode 100644 test/validate/windows_validate.go diff --git a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml index 9cf7a72a5c..044535bc6d 100644 --- a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml +++ b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml @@ -37,7 +37,7 @@ stages: inlineScript: | set -ex az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) ls -lah pwd kubectl cluster-info @@ -70,6 +70,8 @@ stages: - template: ../load-test-templates/pod-deployment-template.yaml parameters: clusterName: ${{ parameters.clusterName }} + scaleup: 2400 + os: linux - stage: validate_state dependsOn: pod_deployment displayName: "Validate State" @@ -122,7 +124,7 @@ stages: addSpnToEnvironment: true inlineScript: | set -ex - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) name: "GetCluster" displayName: "Get AKS Cluster" - script: | @@ -149,9 +151,9 @@ stages: if [ "$(DELETE_RESOURCES)" ] then echo "Deleting Cluster and resource group" - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) - make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" else echo "Deletion of resources is False" diff --git a/.pipelines/cni/load-test-templates/create-cluster-template.yaml b/.pipelines/cni/load-test-templates/create-cluster-template.yaml index 251f20ce1a..de07665774 100644 --- a/.pipelines/cni/load-test-templates/create-cluster-template.yaml +++ b/.pipelines/cni/load-test-templates/create-cluster-template.yaml @@ -3,6 +3,7 @@ parameters: clusterName: "" nodeCount: "" vmSize: "" + windowsVMSize: "" steps: - task: AzureCLI@1 @@ -13,7 +14,7 @@ steps: addSpnToEnvironment: true inlineScript: | set -ex - make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift ${{ parameters.clusterType }} AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) NODE_COUNT=${{ parameters.nodeCount }} VM_SIZE=${{ parameters.vmSize }} + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks ${{ parameters.clusterType }} AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) NODE_COUNT=${{ parameters.nodeCount }} VM_SIZE=${{ parameters.vmSize }} WINDOWS_VM_SKU=${{ parameters.windowsVMSize }} WINDOWS_USERNAME=${WINDOWS_USERNAME} WINDOWS_PASSWORD=${WINDOWS_PASSWORD} name: "CreateAksCluster" displayName: "Create AKS Cluster" diff --git a/.pipelines/cni/load-test-templates/pod-deployment-template.yaml b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml index fd3dfca680..6032c028d2 100644 --- a/.pipelines/cni/load-test-templates/pod-deployment-template.yaml +++ b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml @@ -1,6 +1,9 @@ parameters: clusterName: "" - + scaleup: 1000 + os: "" + iterations: 4 + steps: - task: AzureCLI@1 displayName: "Pod Deployment" @@ -12,6 +15,6 @@ steps: inlineScript: | set -ex az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) cd test/integration/load - go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=4 -scaleup=2400 + go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=${{ parameters.iterations }} -scaleup=${{ parameters.scaleup }} -os=${{ parameters.os }} diff --git a/.pipelines/cni/load-test-templates/restart-node-template.yaml b/.pipelines/cni/load-test-templates/restart-node-template.yaml index caf63196d2..4009150b8a 100644 --- a/.pipelines/cni/load-test-templates/restart-node-template.yaml +++ b/.pipelines/cni/load-test-templates/restart-node-template.yaml @@ -11,15 +11,15 @@ steps: inlineScript: | echo "Scale up the pods and immediated restart the nodes" clusterName=${{ parameters.clusterName }}-$(make revision) - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${clusterName} - make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName} + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) cd test/integration/load echo "Scaling the pods down to 100 per node" go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -scaleup=1000 -skip-wait=true cd ../../../ echo "Restarting the nodes" vmss_name=$(az vmss list -g MC_${clusterName}_${clusterName}_$(LOCATION) --query "[].name" -o tsv) - make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(LOCATION) VMSS_NAME=$vmss_name + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(LOCATION) VMSS_NAME=$vmss_name cd test/integration/load go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=1000 name: "RestartNodes" diff --git a/.pipelines/cni/load-test-templates/validate-state-template.yaml b/.pipelines/cni/load-test-templates/validate-state-template.yaml index 8b4ab0102d..8f0ae209a2 100644 --- a/.pipelines/cni/load-test-templates/validate-state-template.yaml +++ b/.pipelines/cni/load-test-templates/validate-state-template.yaml @@ -1,5 +1,6 @@ parameters: clusterName: "" + os: "linux" restartCase: "false" steps: @@ -10,10 +11,9 @@ steps: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - export RESTART_CASE=${{ parameters.restartCase }} - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) kubectl get pods -A - make test-validate-state + make test-validate-state OS=${{ parameters.os }} RESTART_CASE=${{ parameters.restartCase }} name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 diff --git a/.pipelines/cni/pipeline.yaml b/.pipelines/cni/pipeline.yaml index afad7accc1..3fda84f807 100644 --- a/.pipelines/cni/pipeline.yaml +++ b/.pipelines/cni/pipeline.yaml @@ -24,3 +24,14 @@ stages: nodeCount: 10 vmSize: "Standard_DS4_v2" dependsOn: setup + - template: singletenancy/windows-cni-load-test-template.yaml + parameters: + name: win_cniv1 + clusterType: "windows-cniv1-up" + clusterName: "win-cniv1" + nodeCount: 2 + vmSize: "Standard_B2s" + dependsOn: setup + windowsVMSize: ${WINDOWS_VM_SKU} + os: windows + cni: cniv1 diff --git a/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml b/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml new file mode 100644 index 0000000000..6c46d5acb3 --- /dev/null +++ b/.pipelines/cni/singletenancy/windows-cni-load-test-template.yaml @@ -0,0 +1,134 @@ +parameters: + dependsOn: "" + name: "" + clusterType: "" + clusterName: "" + nodeCount: "" + vmSize: "" + windowsVMSize: "" + os: "" + cni: "" + +stages: + - stage: createAKSclusterWindows + dependsOn: ${{ parameters.dependsOn }} + displayName: "Windows AKS Cluster ${{ parameters.cni }}" + jobs: + - job: create_aks_cluster_with_${{ parameters.name }} + steps: + - template: ../load-test-templates/create-cluster-template.yaml + parameters: + clusterType: ${{ parameters.clusterType }} + clusterName: ${{ parameters.clusterName }} + nodeCount: ${{ parameters.nodeCount }} + vmSize: ${{ parameters.vmSize }} + windowsVMSize: ${{ parameters.windowsVMSize }} + - stage: build_images + dependsOn: ${{ parameters.dependsOn }} + displayName: "Build CNI Images" + jobs: + - job: build_cni_images + pool: + name: "$(BUILD_POOL_NAME_LINUX_AMD64)" + strategy: + matrix: + windows_cniv1_amd64: + os: windows + name: cni-plugin + arch: amd64 + os_version: ltsc2022 + steps: + - template: ../../containers/container-template.yaml + parameters: + arch: $(arch) + name: $(name) + os: $(os) + os_version: $(os_version) + - stage: update_cni + dependsOn: + - createAKSclusterWindows + - build_images + displayName: "Update CNI on Cluster" + jobs: + - job: deploy_pods + strategy: + matrix: + windows_cniv1_amd64: + os: windows + arch: amd64 + os_version: ltsc2022 + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + export CNI_IMAGE=$(make acncli-image-name-and-tag OS=$(os) ARCH=$(arch) OS_VERSION=$(os_version)) + az extension add --name aks-preview + clusterName=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${clusterName} + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + envsubst < ./hack/manifests/windows-update.yaml | kubectl apply -f - + name: "UploadCNI" + displayName: "Upload CNI" + - script: | + set -ex + kubectl rollout status daemonset/azure-cni-windows -n kube-system + kubectl get pods -A + name: "WaitForCNI" + displayName: "Wait For CNI" + - stage: pod_deployment_windows + dependsOn: update_cni + displayName: "Pod Deployment" + jobs: + - job: deploy_pods + steps: + - template: ../load-test-templates/pod-deployment-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + scaleup: ${WINDOWS_SCALEUP} + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} + iterations: ${WINDOWS_ITERATIONS} + - stage: validate_state_windows + dependsOn: pod_deployment_windows + displayName: "Validate State" + jobs: + - job: validate_state + steps: + - template: ../load-test-templates/validate-state-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} + - stage: delete_resources + displayName: "Delete Resources" + dependsOn: + - validate_state_windows + jobs: + - job: delete_resources + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + if [ "$(DELETE_RESOURCES)" ] + then + echo "Deleting Cluster and resource group" + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(LOCATION) + make -C ./hack/aks down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${{ parameters.clusterName }}-$(make revision) + echo "Cluster and resources down" + else + echo "Deletion of resources is False" + fi + name: "CleanUpCluster" + displayName: "Cleanup cluster" + condition: always() diff --git a/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml b/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml index 1753774243..c5ad773e4b 100644 --- a/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml +++ b/.pipelines/singletenancy/aks-swift/e2e-step-template.yaml @@ -32,8 +32,8 @@ steps: inlineScript: | mkdir -p ~/.kube/ echo "Create AKS cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift byocni-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) OSSKU=${{ parameters.osSku }} + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks byocni-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) OSSKU=${{ parameters.osSku }} echo "Cluster successfully created" displayName: Create test cluster condition: succeeded() @@ -124,8 +124,8 @@ steps: addSpnToEnvironment: true inlineScript: | echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-${{ parameters.osSku }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml index 9ed3b09a82..0fbf47af48 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml @@ -31,8 +31,8 @@ steps: inlineScript: | mkdir -p ~/.kube/ echo "Create AKS Overlay cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) - make -C ./hack/swift overlay-no-kube-proxy-up AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) + make -C ./hack/aks overlay-no-kube-proxy-up AZCLI=az REGION=$(REGION_OVERLAY_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms echo "Cluster successfully created" displayName: Create Overlay cluster condition: succeeded() @@ -189,8 +189,8 @@ steps: addSpnToEnvironment: true inlineScript: | echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az - make -C ./hack/swift down SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az + make -C ./hack/aks down SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) AZCLI=az CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml index 3fefd72ecd..7f4404d879 100644 --- a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml @@ -31,8 +31,8 @@ steps: inlineScript: | mkdir -p ~/.kube/ echo "Create AKS cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift swift-no-kube-proxy-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks swift-no-kube-proxy-up AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) VM_SIZE=Standard_B2ms echo "Cluster successfully created" displayName: Create test cluster condition: succeeded() @@ -173,8 +173,8 @@ steps: addSpnToEnvironment: true inlineScript: | echo "Deleting cluster" - make -C ./hack/swift azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) - make -C ./hack/swift down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) + make -C ./hack/aks azcfg AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) + make -C ./hack/aks down AZCLI=az REGION=$(REGION_AKS_CLUSTER_TEST) SUB=$(SUB_AZURE_NETWORK_AGENT_TEST) CLUSTER=${{ parameters.clusterName }}-$(make revision) echo "Cluster and resources down" name: "Cleanupcluster" displayName: "Cleanup cluster" diff --git a/Makefile b/Makefile index 3933d0c206..6ed78f0e8b 100644 --- a/Makefile +++ b/Makefile @@ -438,6 +438,19 @@ npm-image-pull: ## pull cns container image. IMAGE=$(NPM_IMAGE) \ TAG=$(NPM_PLATFORM_TAG) +# cni-plugin + +cni-plugin-image: ## build cni plugin container image. + $(MAKE) container \ + DOCKERFILE=cni/build/$(OS).Dockerfile \ + IMAGE=$(ACNCLI_IMAGE) \ + EXTRA_BUILD_ARGS='--build-arg CNI_AI_PATH=$(CNI_AI_PATH) --build-arg CNI_AI_ID=$(CNI_AI_ID) --build-arg OS_VERSION=$(OS_VERSION)' \ + PLATFORM=$(PLATFORM) \ + TAG=$(ACNCLI_PLATFORM_TAG) \ + OS=$(OS) \ + ARCH=$(ARCH) \ + OS_VERSION=$(OS_VERSION) + ## Legacy @@ -724,7 +737,7 @@ test-integration: ## run all integration tests. go test -mod=readonly -buildvcs=false -timeout 1h -coverpkg=./... -race -covermode atomic -coverprofile=coverage.out -tags=integration ./test/integration... test-validate-state: - cd test/integration/load && go test -count 1 -timeout 30m -tags load -run ^TestValidateState -tags=load -restart-case=$(RESTART_CASE) + cd test/integration/load && go test -count 1 -timeout 30m -tags load -run ^TestValidateState -tags=load -restart-case=$(RESTART_CASE) -os=$(OS) cd ../../.. test-cyclonus: ## run the cyclonus test for npm. diff --git a/cni/build/windows.Dockerfile b/cni/build/windows.Dockerfile new file mode 100644 index 0000000000..036081a6e5 --- /dev/null +++ b/cni/build/windows.Dockerfile @@ -0,0 +1,20 @@ +ARG OS_VERSION +FROM --platform=linux/amd64 mcr.microsoft.com/oss/go/microsoft/golang:1.20 AS builder +ARG VERSION +ARG CNI_AI_PATH +ARG CNI_AI_ID +WORKDIR /azure-container-networking +COPY . . +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/network/plugin/main.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-telemetry.exe -trimpath -ldflags "-X main.version="$VERSION" -X "$CNI_AI_PATH"="$CNI_AI_ID"" -gcflags="-dwarflocationlists=true" cni/telemetry/service/telemetrymain.go +RUN GOOS=windows CGO_ENABLED=0 go build -a -o azure-vnet-ipam.exe -trimpath -ldflags "-X main.version="$VERSION"" -gcflags="-dwarflocationlists=true" cni/ipam/plugin/main.go + +FROM mcr.microsoft.com/windows/servercore:${OS_VERSION} +SHELL ["powershell", "-command"] +COPY --from=builder /azure-container-networking/azure-vnet.exe azure-vnet.exe +COPY --from=builder /azure-container-networking/azure-vnet-telemetry.exe azure-vnet-telemetry.exe +COPY --from=builder /azure-container-networking/azure-vnet-ipam.exe azure-vnet-ipam.exe + +# This would be replaced with dropgz version of windows. +COPY --from=builder /azure-container-networking/hack/scripts/updatecni.ps1 updatecni.ps1 +ENTRYPOINT ["powershell.exe", ".\\updatecni.ps1"] diff --git a/hack/swift/Makefile b/hack/aks/Makefile similarity index 91% rename from hack/swift/Makefile rename to hack/aks/Makefile index 73d8074e1b..b16019b1dc 100644 --- a/hack/swift/Makefile +++ b/hack/aks/Makefile @@ -190,6 +190,27 @@ swift-up: rg-up swift-net-up ## Bring up a SWIFT AzCNI cluster --yes @$(MAKE) set-kubeconf +windows-cniv1-up: rg-up overlay-net-up ## Bring up a Windows CNIv1 cluster + $(AZCLI) aks create -n $(CLUSTER) -g $(GROUP) -l $(REGION) \ + --node-count $(NODE_COUNT) \ + --node-vm-size $(VM_SIZE) \ + --network-plugin azure \ + --windows-admin-password $(WINDOWS_PASSWORD) \ + --windows-admin-username $(WINDOWS_USERNAME) \ + --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ + --no-ssh-key \ + --yes + + $(AZCLI) aks nodepool add --resource-group $(GROUP) --cluster-name $(CLUSTER) \ + --os-type Windows \ + --os-sku Windows2022 \ + --max-pods 250 \ + --name npwin \ + --node-count $(NODE_COUNT) \ + -s $(WINDOWS_VM_SKU) + + @$(MAKE) set-kubeconf + down: ## Delete the cluster $(AZCLI) aks delete -g $(GROUP) -n $(CLUSTER) --yes @$(MAKE) unset-kubeconf diff --git a/hack/swift/README.md b/hack/aks/README.md similarity index 95% rename from hack/swift/README.md rename to hack/aks/README.md index 40069d88a3..3a4e80e4f0 100644 --- a/hack/swift/README.md +++ b/hack/aks/README.md @@ -28,6 +28,7 @@ AKS Clusters swift-byocni-up Bring up a SWIFT BYO CNI cluster swift-cilium-up Bring up a SWIFT Cilium cluster swift-up Bring up a SWIFT AzCNI cluster + windows-cniv1-up Bring up a Windows AzCNIv1 cluster down Delete the cluster vmss-restart Restart the nodes of the cluster ``` diff --git a/hack/swift/kube-proxy.json b/hack/aks/kube-proxy.json similarity index 100% rename from hack/swift/kube-proxy.json rename to hack/aks/kube-proxy.json diff --git a/hack/manifests/windows-update.yaml b/hack/manifests/windows-update.yaml new file mode 100644 index 0000000000..2f21f31516 --- /dev/null +++ b/hack/manifests/windows-update.yaml @@ -0,0 +1,64 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: azure-cni-windows + labels: + tier: node + app: azure-cni + namespace: kube-system +spec: + selector: + matchLabels: + app: azure-cni + template: + metadata: + labels: + tier: node + app: azure-cni + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - windows + - key: kubernetes.io/arch + operator: In + values: + - amd64 + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\system" + hostNetwork: true + serviceAccountName: azure-cni + tolerations: + - operator: Exists + effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cni-drop + image: ${CNI_IMAGE} + imagePullPolicy: Always + volumeMounts: + - name: cni-bin + mountPath: /k/azurecni/bin/ + volumes: + - name: cni-bin + hostPath: + path: /k/azurecni/bin + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: azure-cni + namespace: kube-system + labels: + addonmanager.kubernetes.io/mode: EnsureExists diff --git a/hack/scripts/updatecni.ps1 b/hack/scripts/updatecni.ps1 new file mode 100644 index 0000000000..4ee26376ab --- /dev/null +++ b/hack/scripts/updatecni.ps1 @@ -0,0 +1,52 @@ +Write-Host $env:CONTAINER_SANDBOX_MOUNT_POINT +$sourceCNI = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet.exe" +$sourceIpam = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet-ipam.exe" +$sourceTelemetry = $env:CONTAINER_SANDBOX_MOUNT_POINT + "azure-vnet-telemetry.exe" + +$sourceCNIVersion = & "$sourceCNI" -v +$currentVersion = "" +$sourceTelemetryVersion = & "$sourceTelemetry" -v +$currentTelemetryVersion = "" + +$cniExists = Test-Path "C:\k\azurecni\bin\azure-vnet.exe" +$telemetryExists = Test-Path "C:\k\azurecni\bin\azure-vnet-telemetry.exe" + +Write-Host "Source $sourceCNIVersion" +Write-Host "Source Telemetry $sourceTelemetryVersion" + +if ($cniExists) { + $currentVersion = & "C:\k\azurecni\bin\azure-vnet.exe" -v +} + +if($telemetryExists){ + $currentTelemetryVersion = & "C:\k\azurecni\bin\azure-vnet-telemetry.exe" -v +} + + +Write-Host "Current Host $currentVersion" +Write-Host "Current Telemetry $currentTelemetryVersion" + +## check telemetry was already installed so not to get stuck in a infinite loop of rebooting and killing the process +if ($currentTelemetryVersion -ne $sourceTelemetryVersion){ + $processes = Get-Process -Name azure-vnet-telemetry -ErrorAction SilentlyContinue + for ($i = 0; $i -lt $processes.Count; $i++) { + Write-Host "Killing azure-vnet-telemetry process..." + $processes[$i].Kill() + } + Write-Host "copying azure-vnet-telemetry to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet-telemetry.exe" + Copy-Item $sourceTelemetry -Destination "C:\k\azurecni\bin" +} + +## check CNI was already installed so not to get stuck in a infinite loop of rebooting +if ($currentVersion -ne $sourceCNIVersion){ + Write-Host "copying azure-vnet to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet.exe" + Copy-Item $sourceCNI -Destination "C:\k\azurecni\bin" + + Write-Host "copying azure-vnet-ipam to windows node..." + Remove-Item "C:\k\azurecni\bin\azure-vnet-ipam.exe" + Copy-Item $sourceIpam -Destination "C:\k\azurecni\bin" +} + +Start-Sleep -s 1000 diff --git a/test/integration/load/load_test.go b/test/integration/load/load_test.go index dbceb15ed9..ae09235eac 100644 --- a/test/integration/load/load_test.go +++ b/test/integration/load/load_test.go @@ -14,7 +14,6 @@ import ( const ( manifestDir = "../manifests" - noopdeployment = manifestDir + "/load/noop-deployment.yaml" podLabelSelector = "load-test=true" ) @@ -31,6 +30,11 @@ var ( namespace = "load-test" ) +var noopDeploymentMap = map[string]string{ + "windows": manifestDir + "/noop-deployment-windows.yaml", + "linux": manifestDir + "/noop-deployment-linux.yaml", +} + /* In order to run the scale tests, you need a k8s cluster and its kubeconfig. If no kubeconfig is passed, the test will attempt to find one in the default location for kubectl config. @@ -75,7 +79,7 @@ func TestLoad(t *testing.T) { } } - deployment, err := k8sutils.MustParseDeployment(noopdeployment) + deployment, err := k8sutils.MustParseDeployment(noopDeploymentMap[*osType]) if err != nil { t.Fatal(err) } @@ -166,7 +170,7 @@ func TestScaleDeployment(t *testing.T) { } } - deployment, err := k8sutils.MustParseDeployment(noopdeployment) + deployment, err := k8sutils.MustParseDeployment(noopDeploymentMap[*osType]) if err != nil { t.Fatal(err) } diff --git a/test/integration/manifests/load/privileged-daemonset-windows.yaml b/test/integration/manifests/load/privileged-daemonset-windows.yaml new file mode 100644 index 0000000000..c9a7839013 --- /dev/null +++ b/test/integration/manifests/load/privileged-daemonset-windows.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: privileged-daemonset + namespace: kube-system + labels: + app: privileged-daemonset +spec: + selector: + matchLabels: + app: privileged-daemonset + template: + metadata: + labels: + app: privileged-daemonset + spec: + nodeSelector: + kubernetes.io/os: windows + containers: + - name: powershell + image: mcr.microsoft.com/powershell:lts-nanoserver-1809 + securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\SYSTEM" + command: + - powershell.exe + - -command + - | + while ($true) { Start-Sleep -Seconds 2147483 } + hostNetwork: true + terminationGracePeriodSeconds: 0 diff --git a/test/integration/manifests/load/noop-deployment.yaml b/test/integration/manifests/noop-deployment-linux.yaml similarity index 85% rename from test/integration/manifests/load/noop-deployment.yaml rename to test/integration/manifests/noop-deployment-linux.yaml index 85272941c6..6b12793189 100644 --- a/test/integration/manifests/load/noop-deployment.yaml +++ b/test/integration/manifests/noop-deployment-linux.yaml @@ -19,3 +19,5 @@ spec: imagePullPolicy: Always securityContext: privileged: true + nodeSelector: + "kubernetes.io/os": linux diff --git a/test/integration/manifests/noop-deployment-windows.yaml b/test/integration/manifests/noop-deployment-windows.yaml new file mode 100644 index 0000000000..96619555c9 --- /dev/null +++ b/test/integration/manifests/noop-deployment-windows.yaml @@ -0,0 +1,23 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: win-load-test + namespace: load-test +spec: + selector: + matchLabels: + os: windows + load-test: "true" + template: + metadata: + labels: + os: windows + load-test: "true" + spec: + containers: + - name: noop + image: mcr.microsoft.com/windows/nanoserver:ltsc2022 + ports: + - containerPort: 80 + nodeSelector: + "kubernetes.io/os": windows diff --git a/test/internal/k8sutils/utils.go b/test/internal/k8sutils/utils.go index 5b208d153a..95041ca693 100644 --- a/test/internal/k8sutils/utils.go +++ b/test/internal/k8sutils/utils.go @@ -340,7 +340,7 @@ func ExecCmdOnPod(ctx context.Context, clientset *kubernetes.Clientset, namespac Stdin: false, Stdout: true, Stderr: true, - TTY: true, + TTY: false, }, scheme.ParameterCodec) exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL()) @@ -353,7 +353,7 @@ func ExecCmdOnPod(ctx context.Context, clientset *kubernetes.Clientset, namespac Stdin: nil, Stdout: &stdout, Stderr: &stderr, - Tty: true, + Tty: false, }) if err != nil { return []byte{}, errors.Wrapf(err, "error in executing command %s", cmd) diff --git a/test/validate/client.go b/test/validate/client.go index bc6e0aeb54..b4cc5f5cb8 100644 --- a/test/validate/client.go +++ b/test/validate/client.go @@ -32,6 +32,8 @@ func GetValidatorClient(os string) validatorClient { switch os { case "linux": return &LinuxClient{} + case "windows": + return &WindowsClient{} default: return nil } diff --git a/test/validate/windows_validate.go b/test/validate/windows_validate.go new file mode 100644 index 0000000000..9e54f61bef --- /dev/null +++ b/test/validate/windows_validate.go @@ -0,0 +1,228 @@ +package validate + +import ( + "context" + "encoding/json" + "log" + "net" + + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const ( + privilegedWindowsDaemonSetPath = "../manifests/load/privileged-daemonset-windows.yaml" + windowsNodeSelector = "kubernetes.io/os=windows" +) + +var ( + hnsEndPpointCmd = []string{"powershell", "-c", "Get-HnsEndpoint | ConvertTo-Json"} + azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"} + azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"} +) + +type WindowsClient struct{} + +type WindowsValidator struct { + Validator +} + +type HNSEndpoint struct { + MacAddress string `json:"MacAddress"` + IPAddress net.IP `json:"IPAddress"` + IPv6Address net.IP `json:",omitempty"` + IsRemoteEndpoint bool `json:",omitempty"` +} + +type AzureVnet struct { + NetworkInfo NetworkInfo `json:"Network"` +} + +type NetworkInfo struct { + ExternalInterfaces map[string]ExternalInterface `json:"ExternalInterfaces"` +} + +type ExternalInterface struct { + Networks map[string]Network `json:"Networks"` +} + +type Network struct { + Endpoints map[string]Endpoint `json:"Endpoints"` +} + +type Endpoint struct { + IPAddresses []net.IPNet `json:"IPAddresses"` + IfName string `json:"IfName"` +} + +type AzureVnetIpam struct { + IPAM AddressSpaces `json:"IPAM"` +} + +type AddressSpaces struct { + AddrSpaces map[string]AddressSpace `json:"AddressSpaces"` +} + +type AddressSpace struct { + Pools map[string]AddressPool `json:"Pools"` +} + +type AddressPool struct { + Addresses map[string]AddressRecord `json:"Addresses"` +} + +type AddressRecord struct { + Addr net.IP + InUse bool +} + +func (w *WindowsClient) CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator { + // deploy privileged pod + privilegedDaemonSet, err := k8sutils.MustParseDaemonSet(privilegedWindowsDaemonSetPath) + if err != nil { + panic(err) + } + daemonsetClient := clienset.AppsV1().DaemonSets(privilegedNamespace) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet) + if err != nil { + panic(err) + } + err = k8sutils.WaitForPodsRunning(ctx, clienset, privilegedNamespace, privilegedLabelSelector) + if err != nil { + panic(err) + } + return &WindowsValidator{ + Validator: Validator{ + ctx: ctx, + clientset: clienset, + config: config, + namespace: namespace, + cni: cni, + restartCase: restartCase, + }, + } +} + +func (v *WindowsValidator) ValidateStateFile() error { + checks := []struct { + name string + stateFileIps func([]byte) (map[string]string, error) + podLabelSelector string + podNamespace string + cmd []string + }{ + {"hns", hnsStateFileIps, privilegedLabelSelector, privilegedNamespace, hnsEndPpointCmd}, + {"azure-vnet", azureVnetIps, privilegedLabelSelector, privilegedNamespace, azureVnetCmd}, + {"azure-vnet-ipam", azureVnetIpamIps, privilegedLabelSelector, privilegedNamespace, azureVnetIpamCmd}, + } + + for _, check := range checks { + err := v.validate(check.stateFileIps, check.cmd, check.name, check.podNamespace, check.podLabelSelector) + if err != nil { + return err + } + } + return nil +} + +func hnsStateFileIps(result []byte) (map[string]string, error) { + var hnsResult []HNSEndpoint + err := json.Unmarshal(result, &hnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal hns endpoint list") + } + + hnsPodIps := make(map[string]string) + for _, v := range hnsResult { + if !v.IsRemoteEndpoint { + hnsPodIps[v.IPAddress.String()] = v.MacAddress + } + } + return hnsPodIps, nil +} + +func azureVnetIps(result []byte) (map[string]string, error) { + var azureVnetResult AzureVnet + err := json.Unmarshal(result, &azureVnetResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal azure vnet") + } + + azureVnetPodIps := make(map[string]string) + for _, v := range azureVnetResult.NetworkInfo.ExternalInterfaces { + for _, v := range v.Networks { + for _, e := range v.Endpoints { + for _, v := range e.IPAddresses { + azureVnetPodIps[v.IP.String()] = e.IfName + } + } + } + } + return azureVnetPodIps, nil +} + +func azureVnetIpamIps(result []byte) (map[string]string, error) { + var azureVnetIpamResult AzureVnetIpam + err := json.Unmarshal(result, &azureVnetIpamResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal azure vnet ipam") + } + + azureVnetIpamPodIps := make(map[string]string) + + for _, v := range azureVnetIpamResult.IPAM.AddrSpaces { + for _, v := range v.Pools { + for _, v := range v.Addresses { + if v.InUse { + azureVnetIpamPodIps[v.Addr.String()] = v.Addr.String() + } + } + } + } + return azureVnetIpamPodIps, nil +} + +func (v *WindowsValidator) validate(stateFileIps stateFileIpsFunc, cmd []string, checkType, namespace, labelSelector string) error { + log.Println("Validating ", checkType, " state file") + nodes, err := k8sutils.GetNodeListByLabelSelector(v.ctx, v.clientset, windowsNodeSelector) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, namespace, labelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + podName := pod.Items[0].Name + // exec into the pod to get the state file + result, err := k8sutils.ExecCmdOnPod(v.ctx, v.clientset, namespace, podName, cmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + filePodIps, err := stateFileIps(result) + if err != nil { + return errors.Wrapf(err, "failed to get pod ips from state file") + } + if len(filePodIps) == 0 && v.restartCase { + log.Printf("No pods found on node %s", nodes.Items[index].Name) + continue + } + // get the pod ips + podIps := getPodIPsWithoutNodeIP(v.ctx, v.clientset, nodes.Items[index]) + + check := compareIPs(filePodIps, podIps) + + if !check { + return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) + } + } + log.Printf("State file validation for %s passed", checkType) + return nil +} + +func (v *WindowsValidator) ValidateRestartNetwork() error { + return nil +}