From f9146068949227dd77467057246462dc82db8b71 Mon Sep 17 00:00:00 2001 From: Ellis Tarn Date: Fri, 27 Oct 2023 11:08:20 -0700 Subject: [PATCH] fix: Ignore pods awaiting garbage collection during topology calculations --- .github/workflows/release.yaml | 4 ---- .../scheduling/provisioner_topology_test.go | 15 +++++++++++++++ .../provisioning/scheduling/topology.go | 5 +++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3c1ea52b6c..561d9def7c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -9,10 +9,6 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Save tag as an environment variable - run: | - tag=$(git describe --tags --exact-match) - echo "TAG=$tag" >> $GITHUB_ENV - name: Create Github Release uses: "marvinpinto/action-automatic-releases@latest" with: diff --git a/pkg/controllers/provisioning/scheduling/provisioner_topology_test.go b/pkg/controllers/provisioning/scheduling/provisioner_topology_test.go index 831815508e..4466ac732f 100644 --- a/pkg/controllers/provisioning/scheduling/provisioner_topology_test.go +++ b/pkg/controllers/provisioning/scheduling/provisioner_topology_test.go @@ -78,6 +78,21 @@ var _ = Describe("Topology", func() { ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(2)) }) + It("should ignore pods if node does not exist", func() { + topology := []v1.TopologySpreadConstraint{{ + TopologyKey: v1.LabelTopologyZone, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: labels}, + MaxSkew: 1, + }} + podAwaitingGC := test.Pod(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology, NodeName: "does-not-exist"}) + ExpectApplied(ctx, env.Client, provisioner, podAwaitingGC) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, + test.UnschedulablePods(test.PodOptions{ObjectMeta: metav1.ObjectMeta{Labels: labels}, TopologySpreadConstraints: topology}, 4)..., + ) + ExpectSkew(ctx, env.Client, "default", &topology[0]).To(ConsistOf(1, 1, 2)) + }) + Context("Zonal", func() { It("should balance pods across zones (match labels)", func() { topology := []v1.TopologySpreadConstraint{{ diff --git a/pkg/controllers/provisioning/scheduling/topology.go b/pkg/controllers/provisioning/scheduling/topology.go index c178d37ba1..6962dad5eb 100644 --- a/pkg/controllers/provisioning/scheduling/topology.go +++ b/pkg/controllers/provisioning/scheduling/topology.go @@ -23,6 +23,8 @@ import ( "github.com/aws/karpenter-core/pkg/scheduling" "github.com/aws/karpenter-core/pkg/utils/functional" + "k8s.io/apimachinery/pkg/api/errors" + "go.uber.org/multierr" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -252,6 +254,9 @@ func (t *Topology) countDomains(ctx context.Context, tg *TopologyGroup) error { } node := &v1.Node{} if err := t.kubeClient.Get(ctx, types.NamespacedName{Name: p.Spec.NodeName}, node); err != nil { + if errors.IsNotFound(err) { + continue + } return fmt.Errorf("getting node %s, %w", p.Spec.NodeName, err) } domain, ok := node.Labels[tg.Key]