From a35f830f1d2eab86d8f399429eae7ce29b7cad2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 26 Sep 2024 20:27:54 +0200 Subject: [PATCH 1/8] CA: extract a Handle to scheduleframework.Framework out of PredicateChecker This decouples PredicateChecker from the Framework initialization logic, and allows creating multiple PredicateChecker instances while only initializing the framework once. This commit also fixes how CA integrates with Framework metrics. Instead of Registering them they're only Initialized so that CA doesn't expose scheduler metrics. And the initialization is moved from multiple different places to the Handle constructor. --- .../context/autoscaling_context.go | 5 ++ cluster-autoscaler/core/autoscaler.go | 10 +++ .../filter_out_schedulable_test.go | 3 - .../core/scaledown/actuation/actuator_test.go | 4 +- .../scaledown/eligibility/eligibility_test.go | 9 +-- .../core/scaledown/planner/planner_test.go | 4 +- .../core/scaledown/unneeded/nodes_test.go | 7 +- .../scaleup/orchestrator/orchestrator_test.go | 21 +++--- .../core/scaleup/resource/manager_test.go | 3 - cluster-autoscaler/core/static_autoscaler.go | 2 + .../core/static_autoscaler_test.go | 28 ++++---- .../estimator/binpacking_estimator_test.go | 7 +- cluster-autoscaler/main.go | 11 ++- .../mixed_nodeinfos_processor_test.go | 12 ++-- .../nodes/scale_down_set_processor_test.go | 3 +- .../processors/provreq/processor_test.go | 7 +- .../orchestrator/orchestrator_test.go | 4 -- cluster-autoscaler/simulator/cluster_test.go | 13 ++-- .../delegating_shared_lister.go | 4 +- .../simulator/framework/handle.go | 70 +++++++++++++++++++ .../simulator/framework/test_utils.go | 30 ++++++++ .../predicatechecker/schedulerbased.go | 64 ++++------------- .../predicatechecker/schedulerbased_test.go | 7 +- .../simulator/predicatechecker/testchecker.go | 17 +++-- .../scheduling/hinting_simulator_test.go | 9 +-- 25 files changed, 192 insertions(+), 162 deletions(-) rename cluster-autoscaler/simulator/{predicatechecker => framework}/delegating_shared_lister.go (98%) create mode 100644 cluster-autoscaler/simulator/framework/handle.go diff --git a/cluster-autoscaler/context/autoscaling_context.go b/cluster-autoscaler/context/autoscaling_context.go index 1743e8c443c9..c0e3ef27226e 100644 --- a/cluster-autoscaler/context/autoscaling_context.go +++ b/cluster-autoscaler/context/autoscaling_context.go @@ -27,6 +27,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/expander" processor_callbacks "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/client-go/informers" @@ -44,6 +45,8 @@ type AutoscalingContext struct { AutoscalingKubeClients // CloudProvider used in CA. CloudProvider cloudprovider.CloudProvider + // FrameworkHandle can be used to interact with the scheduler framework. + FrameworkHandle *framework.Handle // TODO(kgolab) - move away too as it's not config // PredicateChecker to check if a pod can fit into a node. PredicateChecker predicatechecker.PredicateChecker @@ -100,6 +103,7 @@ func NewResourceLimiterFromAutoscalingOptions(options config.AutoscalingOptions) // NewAutoscalingContext returns an autoscaling context from all the necessary parameters passed via arguments func NewAutoscalingContext( options config.AutoscalingOptions, + fwHandle *framework.Handle, predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, autoscalingKubeClients *AutoscalingKubeClients, @@ -114,6 +118,7 @@ func NewAutoscalingContext( AutoscalingOptions: options, CloudProvider: cloudProvider, AutoscalingKubeClients: *autoscalingKubeClients, + FrameworkHandle: fwHandle, PredicateChecker: predicateChecker, ClusterSnapshot: clusterSnapshot, ExpanderStrategy: expanderStrategy, diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 0e63a3c85011..88d94a70a53d 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -34,6 +34,7 @@ import ( ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" @@ -49,6 +50,7 @@ type AutoscalerOptions struct { InformerFactory informers.SharedInformerFactory AutoscalingKubeClients *context.AutoscalingKubeClients CloudProvider cloudprovider.CloudProvider + FrameworkHandle *framework.Handle PredicateChecker predicatechecker.PredicateChecker ClusterSnapshot clustersnapshot.ClusterSnapshot ExpanderStrategy expander.Strategy @@ -86,6 +88,7 @@ func NewAutoscaler(opts AutoscalerOptions, informerFactory informers.SharedInfor } return NewStaticAutoscaler( opts.AutoscalingOptions, + opts.FrameworkHandle, opts.PredicateChecker, opts.ClusterSnapshot, opts.AutoscalingKubeClients, @@ -114,6 +117,13 @@ func initializeDefaultOptions(opts *AutoscalerOptions, informerFactory informers if opts.AutoscalingKubeClients == nil { opts.AutoscalingKubeClients = context.NewAutoscalingKubeClients(opts.AutoscalingOptions, opts.KubeClient, opts.InformerFactory) } + if opts.FrameworkHandle == nil { + fwHandle, err := framework.NewHandle(opts.InformerFactory, opts.SchedulerConfig) + if err != nil { + return err + } + opts.FrameworkHandle = fwHandle + } if opts.ClusterSnapshot == nil { opts.ClusterSnapshot = clustersnapshot.NewBasicClusterSnapshot() } diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go index 7b0054f9a2f2..728c32af9202 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go @@ -29,12 +29,9 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) func TestFilterOutSchedulable(t *testing.T) { - schedulermetrics.Register() - node := buildReadyTestNode("node", 2000, 100) matchesAllNodes := func(*framework.NodeInfo) bool { return true } matchesNoNodes := func(*framework.NodeInfo) bool { return false } diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go index 6f44abaf06b6..9a278f4f6eb0 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go @@ -24,6 +24,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" @@ -50,7 +51,6 @@ import ( . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) type nodeGroupViewInfo struct { @@ -1000,8 +1000,6 @@ func getStartDeletionTestCases(ignoreDaemonSetsUtilization bool, suffix string) } func TestStartDeletion(t *testing.T) { - schedulermetrics.Register() - testSets := []map[string]startDeletionTestCase{ // IgnoreDaemonSetsUtilization is false getStartDeletionTestCases(false, "testNg1"), diff --git a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go index 23397ab4327e..b585d5951517 100644 --- a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go +++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go @@ -21,6 +21,9 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + + apiv1 "k8s.io/api/core/v1" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable" @@ -29,10 +32,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" - apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/fake" ) @@ -149,8 +148,6 @@ func getTestCases(ignoreDaemonSetsUtilization bool, suffix string, now time.Time } func TestFilterOutUnremovable(t *testing.T) { - schedulermetrics.Register() - now := time.Now() for _, tc := range append(getTestCases(false, "IgnoreDaemonSetUtilization=false", now), getTestCases(true, "IgnoreDaemonsetUtilization=true", now)...) { diff --git a/cluster-autoscaler/core/scaledown/planner/planner_test.go b/cluster-autoscaler/core/scaledown/planner/planner_test.go index 0aa4881a9613..fc3fe854be23 100644 --- a/cluster-autoscaler/core/scaledown/planner/planner_test.go +++ b/cluster-autoscaler/core/scaledown/planner/planner_test.go @@ -22,6 +22,7 @@ import ( "time" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -44,12 +45,9 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) func TestUpdateClusterState(t *testing.T) { - schedulermetrics.Register() - testCases := []struct { name string nodes []*apiv1.Node diff --git a/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go b/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go index 51048eb57df5..05f450150fe3 100644 --- a/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go +++ b/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" @@ -33,9 +35,6 @@ import ( kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" ) func TestUpdate(t *testing.T) { @@ -129,8 +128,6 @@ func version(n simulator.NodeToBeRemoved) string { } func TestRemovableAt(t *testing.T) { - schedulermetrics.Register() - testCases := []struct { name string numEmpty int diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go index 4e5668b1d303..2fd2883f7434 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go @@ -26,13 +26,9 @@ import ( "testing" "time" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - kube_record "k8s.io/client-go/tools/record" - "k8s.io/component-base/metrics/legacyregistry" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/clusterstate" @@ -44,20 +40,21 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/estimator" "k8s.io/autoscaler/cluster-autoscaler/metrics" "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" "k8s.io/autoscaler/cluster-autoscaler/processors/status" processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/autoscaler/cluster-autoscaler/utils/units" - - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/fake" + kube_record "k8s.io/client-go/tools/record" + "k8s.io/component-base/metrics/legacyregistry" "github.com/stretchr/testify/assert" ) @@ -72,8 +69,6 @@ var defaultOptions = config.AutoscalingOptions{ // Scale up scenarios. func TestScaleUpOK(t *testing.T) { - schedulermetrics.Register() - config := &ScaleUpTestConfig{ Nodes: []NodeConfig{ {Name: "n1", Cpu: 100, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"}, diff --git a/cluster-autoscaler/core/scaleup/resource/manager_test.go b/cluster-autoscaler/core/scaleup/resource/manager_test.go index 3824edb4dcbc..4c7d64ad3930 100644 --- a/cluster-autoscaler/core/scaleup/resource/manager_test.go +++ b/cluster-autoscaler/core/scaleup/resource/manager_test.go @@ -36,7 +36,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/taints" utils_test "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) type nodeGroupConfig struct { @@ -54,8 +53,6 @@ type deltaForNodeTestCase struct { } func TestDeltaForNode(t *testing.T) { - schedulermetrics.Register() - testCases := []deltaForNodeTestCase{ { nodeGroupConfig: nodeGroupConfig{Name: "ng1", Min: 3, Max: 10, Size: 5, CPU: 8, Mem: 16}, diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index d8f0d3070bb6..55ac45adb9f8 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -131,6 +131,7 @@ func (callbacks *staticAutoscalerProcessorCallbacks) reset() { // NewStaticAutoscaler creates an instance of Autoscaler filled with provided parameters func NewStaticAutoscaler( opts config.AutoscalingOptions, + fwHandle *framework.Handle, predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, autoscalingKubeClients *context.AutoscalingKubeClients, @@ -154,6 +155,7 @@ func NewStaticAutoscaler( processorCallbacks := newStaticAutoscalerProcessorCallbacks() autoscalingContext := context.NewAutoscalingContext( opts, + fwHandle, predicateChecker, clusterSnapshot, autoscalingKubeClients, diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index 00ff2a8c6338..ec0b084a752b 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -27,6 +27,16 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" mockprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mocks" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" @@ -63,22 +73,10 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - kube_record "k8s.io/client-go/tools/record" - "k8s.io/klog/v2" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" - policyv1 "k8s.io/api/policy/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes/fake" v1appslister "k8s.io/client-go/listers/apps/v1" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" + kube_record "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" ) type podListerMock struct { @@ -313,8 +311,6 @@ func setupAutoscaler(config *autoscalerSetupConfig) (*StaticAutoscaler, error) { // TODO: Refactor tests to use setupAutoscaler func TestStaticAutoscalerRunOnce(t *testing.T) { - schedulermetrics.Register() - readyNodeLister := kubernetes.NewTestNodeLister(nil) allNodeLister := kubernetes.NewTestNodeLister(nil) allPodListerMock := &podListerMock{} diff --git a/cluster-autoscaler/estimator/binpacking_estimator_test.go b/cluster-autoscaler/estimator/binpacking_estimator_test.go index e0205ffdc854..59dd9a2a95df 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator_test.go +++ b/cluster-autoscaler/estimator/binpacking_estimator_test.go @@ -20,6 +20,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -28,9 +30,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/autoscaler/cluster-autoscaler/utils/units" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" ) func makePodEquivalenceGroup(pod *apiv1.Pod, podCount int) PodEquivalenceGroup { @@ -66,8 +65,6 @@ func makeNode(cpu, mem, podCount int64, name string, zone string) *apiv1.Node { } func TestBinpackingEstimate(t *testing.T) { - schedulermetrics.Register() - highResourcePodGroup := makePodEquivalenceGroup( BuildTestPod( "estimatee", diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index cf3ca31ccbe5..8fee1047f569 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -28,6 +28,7 @@ import ( "syscall" "time" + "github.com/spf13/pflag" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/actuation" "k8s.io/autoscaler/cluster-autoscaler/core/scaleup/orchestrator" "k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot" @@ -35,12 +36,11 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/besteffortatomic" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/checkcapacity" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqclient" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" kubelet_config "k8s.io/kubernetes/pkg/kubelet/apis/config" - "github.com/spf13/pflag" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apiserver/pkg/server/mux" @@ -88,7 +88,6 @@ import ( "k8s.io/component-base/metrics/legacyregistry" "k8s.io/klog/v2" scheduler_config "k8s.io/kubernetes/pkg/scheduler/apis/config" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) // MultiStringFlag is a flag for passing multiple parameters using same flag @@ -494,7 +493,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot } informerFactory := informers.NewSharedInformerFactoryWithOptions(kubeClient, 0, informers.WithTransform(trim)) - predicateChecker, err := predicatechecker.NewSchedulerBasedPredicateChecker(informerFactory, autoscalingOptions.SchedulerConfig) + fwHandle, err := framework.NewHandle(informerFactory, autoscalingOptions.SchedulerConfig) if err != nil { return nil, nil, err } @@ -503,11 +502,12 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, + FrameworkHandle: fwHandle, ClusterSnapshot: clustersnapshot.NewDeltaClusterSnapshot(), KubeClient: kubeClient, InformerFactory: informerFactory, DebuggingSnapshotter: debuggingSnapshotter, - PredicateChecker: predicateChecker, + PredicateChecker: predicatechecker.NewSchedulerBasedPredicateChecker(fwHandle), DeleteOptions: deleteOptions, DrainabilityRules: drainabilityRules, ScaleUpOrchestrator: orchestrator.New(), @@ -632,7 +632,6 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot } func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) { - schedulermetrics.Register() metrics.RegisterAll(*emitPerNodeGroupMetrics) context, cancel := ctx.WithCancel(ctx.Background()) defer cancel() diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go index cd3eccc390af..bc2cea5516c7 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go @@ -20,6 +20,10 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" @@ -28,12 +32,6 @@ import ( kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" - - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" ) var ( @@ -41,8 +39,6 @@ var ( ) func TestGetNodeInfosForGroups(t *testing.T) { - schedulermetrics.Register() - now := time.Now() ready1 := BuildTestNode("n1", 1000, 1000) SetNodeReadyState(ready1, true, now.Add(-2*time.Minute)) diff --git a/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go b/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go index dc43792c7183..af18b0ef072e 100644 --- a/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go +++ b/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go @@ -20,17 +20,16 @@ import ( "testing" "github.com/stretchr/testify/assert" + testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/config" . "k8s.io/autoscaler/cluster-autoscaler/core/test" "k8s.io/autoscaler/cluster-autoscaler/simulator" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) func TestAtomicResizeFilterUnremovableNodes(t *testing.T) { - schedulermetrics.Register() testCases := []struct { name string nodeGroups []struct { diff --git a/cluster-autoscaler/processors/provreq/processor_test.go b/cluster-autoscaler/processors/provreq/processor_test.go index 20eaa0e1ffba..11daf0aa2590 100644 --- a/cluster-autoscaler/processors/provreq/processor_test.go +++ b/cluster-autoscaler/processors/provreq/processor_test.go @@ -22,11 +22,9 @@ import ( "time" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" "k8s.io/autoscaler/cluster-autoscaler/config" . "k8s.io/autoscaler/cluster-autoscaler/core/test" @@ -34,6 +32,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqclient" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqwrapper" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" ) @@ -239,8 +238,6 @@ func (f *fakeInjector) TrySchedulePods(clusterSnapshot clustersnapshot.ClusterSn } func TestBookCapacity(t *testing.T) { - schedulermetrics.Register() - testCases := []struct { name string conditions []string diff --git a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go index 061d17a2f124..18b87674599f 100644 --- a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go @@ -49,13 +49,9 @@ import ( . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/client-go/kubernetes/fake" clocktesting "k8s.io/utils/clock/testing" - - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" ) func TestScaleUp(t *testing.T) { - schedulermetrics.Register() - // Set up a cluster with 200 nodes: // - 100 nodes with high cpu, low memory in autoscaled group with max 150 // - 100 nodes with high memory, low cpu not in autoscaled group diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index e493a6672b02..3612d4404772 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -21,6 +21,11 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" @@ -28,12 +33,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/drain" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" - appsv1 "k8s.io/api/apps/v1" - apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/kubernetes/pkg/kubelet/types" ) @@ -75,8 +74,6 @@ type findNodesToRemoveTestConfig struct { } func TestFindNodesToRemove(t *testing.T) { - schedulermetrics.Register() - emptyNode := BuildTestNode("n1", 1000, 2000000) // two small pods backed by ReplicaSet diff --git a/cluster-autoscaler/simulator/predicatechecker/delegating_shared_lister.go b/cluster-autoscaler/simulator/framework/delegating_shared_lister.go similarity index 98% rename from cluster-autoscaler/simulator/predicatechecker/delegating_shared_lister.go rename to cluster-autoscaler/simulator/framework/delegating_shared_lister.go index be66bb8bd326..a3aff6af67b0 100644 --- a/cluster-autoscaler/simulator/predicatechecker/delegating_shared_lister.go +++ b/cluster-autoscaler/simulator/framework/delegating_shared_lister.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package predicatechecker +package framework import ( "fmt" diff --git a/cluster-autoscaler/simulator/framework/handle.go b/cluster-autoscaler/simulator/framework/handle.go new file mode 100644 index 000000000000..f8b2250648af --- /dev/null +++ b/cluster-autoscaler/simulator/framework/handle.go @@ -0,0 +1,70 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "context" + "fmt" + + "k8s.io/client-go/informers" + schedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config" + schedulerconfiglatest "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" + schedulerplugins "k8s.io/kubernetes/pkg/scheduler/framework/plugins" + schedulerframeworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" +) + +// Handle is meant for interacting with the scheduler framework. +type Handle struct { + Framework schedulerframework.Framework + DelegatingLister *DelegatingSchedulerSharedLister +} + +// NewHandle builds a framework Handle based on the provided informers and scheduler config. +func NewHandle(informerFactory informers.SharedInformerFactory, schedConfig *schedulerconfig.KubeSchedulerConfiguration) (*Handle, error) { + if schedConfig == nil { + var err error + schedConfig, err = schedulerconfiglatest.Default() + if err != nil { + return nil, fmt.Errorf("couldn't create scheduler config: %v", err) + } + } + + if len(schedConfig.Profiles) != 1 { + return nil, fmt.Errorf("unexpected scheduler config: expected one scheduler profile only (found %d profiles)", len(schedConfig.Profiles)) + } + sharedLister := NewDelegatingSchedulerSharedLister() + + schedulermetrics.InitMetrics() + framework, err := schedulerframeworkruntime.NewFramework( + context.TODO(), + schedulerplugins.NewInTreeRegistry(), + &schedConfig.Profiles[0], + schedulerframeworkruntime.WithInformerFactory(informerFactory), + schedulerframeworkruntime.WithSnapshotSharedLister(sharedLister), + ) + + if err != nil { + return nil, fmt.Errorf("couldn't create scheduler framework; %v", err) + } + + return &Handle{ + Framework: framework, + DelegatingLister: sharedLister, + }, nil +} diff --git a/cluster-autoscaler/simulator/framework/test_utils.go b/cluster-autoscaler/simulator/framework/test_utils.go index 9cdfd45eccd1..e637d2e03eae 100644 --- a/cluster-autoscaler/simulator/framework/test_utils.go +++ b/cluster-autoscaler/simulator/framework/test_utils.go @@ -18,8 +18,16 @@ package framework import ( apiv1 "k8s.io/api/core/v1" + "k8s.io/client-go/informers" + clientsetfake "k8s.io/client-go/kubernetes/fake" + scheduler_config_latest "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" ) +// testFailer is an abstraction that covers both *testing.T and *testing.B. +type testFailer interface { + Fatalf(format string, args ...any) +} + // NewTestNodeInfo returns a new NodeInfo without any DRA information - only to be used in test code. // Production code should always take DRA objects into account. func NewTestNodeInfo(node *apiv1.Node, pods ...*apiv1.Pod) *NodeInfo { @@ -29,3 +37,25 @@ func NewTestNodeInfo(node *apiv1.Node, pods ...*apiv1.Pod) *NodeInfo { } return nodeInfo } + +// NewTestFrameworkHandle creates a Handle that can be used in tests. +func NewTestFrameworkHandle() (*Handle, error) { + defaultConfig, err := scheduler_config_latest.Default() + if err != nil { + return nil, err + } + fwHandle, err := NewHandle(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), defaultConfig) + if err != nil { + return nil, err + } + return fwHandle, nil +} + +// NewTestFrameworkHandleOrDie creates a Handle that can be used in tests. +func NewTestFrameworkHandleOrDie(t testFailer) *Handle { + handle, err := NewTestFrameworkHandle() + if err != nil { + t.Fatalf("TestFrameworkHandleOrDie: couldn't create test framework handle: %v", err) + } + return handle +} diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go index 4e37e97528a2..491e05da789a 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go @@ -24,59 +24,23 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" apiv1 "k8s.io/api/core/v1" - "k8s.io/client-go/informers" v1listers "k8s.io/client-go/listers/core/v1" klog "k8s.io/klog/v2" - "k8s.io/kubernetes/pkg/scheduler/apis/config" - scheduler_config "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" - scheduler_plugins "k8s.io/kubernetes/pkg/scheduler/framework/plugins" - schedulerframeworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime" ) // SchedulerBasedPredicateChecker checks whether all required predicates pass for given Pod and Node. // The verification is done by calling out to scheduler code. type SchedulerBasedPredicateChecker struct { - framework schedulerframework.Framework - delegatingSharedLister *DelegatingSchedulerSharedLister - nodeLister v1listers.NodeLister - podLister v1listers.PodLister - lastIndex int + fwHandle *framework.Handle + nodeLister v1listers.NodeLister + podLister v1listers.PodLister + lastIndex int } // NewSchedulerBasedPredicateChecker builds scheduler based PredicateChecker. -func NewSchedulerBasedPredicateChecker(informerFactory informers.SharedInformerFactory, schedConfig *config.KubeSchedulerConfiguration) (*SchedulerBasedPredicateChecker, error) { - if schedConfig == nil { - var err error - schedConfig, err = scheduler_config.Default() - if err != nil { - return nil, fmt.Errorf("couldn't create scheduler config: %v", err) - } - } - - if len(schedConfig.Profiles) != 1 { - return nil, fmt.Errorf("unexpected scheduler config: expected one scheduler profile only (found %d profiles)", len(schedConfig.Profiles)) - } - sharedLister := NewDelegatingSchedulerSharedLister() - - framework, err := schedulerframeworkruntime.NewFramework( - context.TODO(), - scheduler_plugins.NewInTreeRegistry(), - &schedConfig.Profiles[0], - schedulerframeworkruntime.WithInformerFactory(informerFactory), - schedulerframeworkruntime.WithSnapshotSharedLister(sharedLister), - ) - - if err != nil { - return nil, fmt.Errorf("couldn't create scheduler framework; %v", err) - } - - checker := &SchedulerBasedPredicateChecker{ - framework: framework, - delegatingSharedLister: sharedLister, - } - - return checker, nil +func NewSchedulerBasedPredicateChecker(fwHandle *framework.Handle) *SchedulerBasedPredicateChecker { + return &SchedulerBasedPredicateChecker{fwHandle: fwHandle} } // FitsAnyNode checks if the given pod can be placed on any of the given nodes. @@ -102,11 +66,11 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu return "", fmt.Errorf("error obtaining nodeInfos from schedulerLister") } - p.delegatingSharedLister.UpdateDelegate(clusterSnapshot) - defer p.delegatingSharedLister.ResetDelegate() + p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) + defer p.fwHandle.DelegatingLister.ResetDelegate() state := schedulerframework.NewCycleState() - preFilterResult, preFilterStatus, _ := p.framework.RunPreFilterPlugins(context.TODO(), state, pod) + preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { return "", fmt.Errorf("error running pre filter plugins for pod %s; %s", pod.Name, preFilterStatus.Message()) } @@ -126,7 +90,7 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu continue } - filterStatus := p.framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) + filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) if filterStatus.IsSuccess() { p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList) return nodeInfo.Node().Name, nil @@ -146,11 +110,11 @@ func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot cluster return NewPredicateError(InternalPredicateError, "", errorMessage, nil, emptyString) } - p.delegatingSharedLister.UpdateDelegate(clusterSnapshot) - defer p.delegatingSharedLister.ResetDelegate() + p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) + defer p.fwHandle.DelegatingLister.ResetDelegate() state := schedulerframework.NewCycleState() - _, preFilterStatus, _ := p.framework.RunPreFilterPlugins(context.TODO(), state, pod) + _, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { return NewPredicateError( InternalPredicateError, @@ -160,7 +124,7 @@ func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot cluster emptyString) } - filterStatus := p.framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) + filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) if !filterStatus.IsSuccess() { filterName := filterStatus.Plugin() diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index d5423777f711..aa06ec7c159f 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -22,21 +22,18 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + testconfig "k8s.io/autoscaler/cluster-autoscaler/config/test" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" scheduler "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" ) func TestCheckPredicate(t *testing.T) { - schedulermetrics.Register() - p450 := BuildTestPod("p450", 450, 500000) p600 := BuildTestPod("p600", 600, 500000) p8000 := BuildTestPod("p8000", 8000, 0) diff --git a/cluster-autoscaler/simulator/predicatechecker/testchecker.go b/cluster-autoscaler/simulator/predicatechecker/testchecker.go index dd9e1745acac..5178fc0c4233 100644 --- a/cluster-autoscaler/simulator/predicatechecker/testchecker.go +++ b/cluster-autoscaler/simulator/predicatechecker/testchecker.go @@ -17,6 +17,7 @@ limitations under the License. package predicatechecker import ( + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/client-go/informers" clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/kubernetes/pkg/scheduler/apis/config" @@ -25,21 +26,19 @@ import ( // NewTestPredicateChecker builds test version of PredicateChecker. func NewTestPredicateChecker() (PredicateChecker, error) { - schedConfig, err := scheduler_config_latest.Default() + defaultConfig, err := scheduler_config_latest.Default() if err != nil { return nil, err } - - // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient - return NewSchedulerBasedPredicateChecker(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) + return NewTestPredicateCheckerWithCustomConfig(defaultConfig) } // NewTestPredicateCheckerWithCustomConfig builds test version of PredicateChecker with custom scheduler config. func NewTestPredicateCheckerWithCustomConfig(schedConfig *config.KubeSchedulerConfiguration) (PredicateChecker, error) { - if schedConfig != nil { - // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient - return NewSchedulerBasedPredicateChecker(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) + // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient + fwHandle, err := framework.NewHandle(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) + if err != nil { + return nil, err } - - return NewTestPredicateChecker() + return NewSchedulerBasedPredicateChecker(fwHandle), nil } diff --git a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go index 7e3ec8cb3d11..7cec8b6018c6 100644 --- a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go +++ b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go @@ -20,19 +20,16 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + + apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - schedulermetrics "k8s.io/kubernetes/pkg/scheduler/metrics" - - "github.com/stretchr/testify/assert" - apiv1 "k8s.io/api/core/v1" ) func TestTrySchedulePods(t *testing.T) { - schedulermetrics.Register() - testCases := []struct { desc string nodes []*apiv1.Node From ce185226d160f35b4018efbb4cc76400df804a0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Wed, 13 Nov 2024 21:36:24 +0100 Subject: [PATCH 2/8] CA: extend ClusterSnapshot interface with predicate-checking methods To handle DRA properly, scheduling predicates will need to be run whenever Pods are scheduled in the snapshot. PredicateChecker always needs a ClusterSnapshot to work, and ClusterSnapshot scheduling methods need to run the predicates first. So it makes most sense to have PredicateChecker be a dependency for ClusterSnapshot implementations, and move the PredicateChecker methods to ClusterSnapshot. This commit mirrors PredicateChecker methods in ClusterSnapshot (with the exception of FitsAnyNode which isn't used anywhere and is trivial to do via FitsAnyNodeMatching). Further commits will remove the PredicateChecker interface and move the implementation under clustersnapshot. Dummy methods are added to current ClusterSnapshot implementations to get the tests to pass. Further commits will actually implement them. PredicateError is refactored into a broader SchedulingError so that the ClusterSnapshot methods can return a single error that the callers can use to distinguish between a failing predicate and other, unexpected errors. --- .../core/scaleup/orchestrator/orchestrator.go | 2 +- .../simulator/clustersnapshot/basic.go | 20 +++ .../clustersnapshot/clustersnapshot.go | 35 +++- .../simulator/clustersnapshot/delta.go | 20 +++ .../clustersnapshot/scheduling_error.go | 149 ++++++++++++++++++ .../simulator/predicatechecker/error.go | 107 ------------- .../simulator/predicatechecker/interface.go | 6 +- .../predicatechecker/schedulerbased.go | 60 +++---- .../predicatechecker/schedulerbased_test.go | 15 +- 9 files changed, 256 insertions(+), 158 deletions(-) create mode 100644 cluster-autoscaler/simulator/clustersnapshot/scheduling_error.go delete mode 100644 cluster-autoscaler/simulator/predicatechecker/error.go diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go index f8b5073b48ba..3b27f6b467a4 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go @@ -586,7 +586,7 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups( eg.Schedulable = true eg.SchedulableGroups = append(eg.SchedulableGroups, nodeGroup.Id()) } else { - klog.V(2).Infof("Pod %s/%s can't be scheduled on %s, predicate checking error: %v", samplePod.Namespace, samplePod.Name, nodeGroup.Id(), err.VerboseMessage()) + klog.V(2).Infof("Pod %s/%s can't be scheduled on %s, predicate checking error: %v", samplePod.Namespace, samplePod.Name, nodeGroup.Id(), err) if podCount := len(eg.Pods); podCount > 1 { klog.V(2).Infof("%d other pods similar to %s can't be scheduled on %s", podCount-1, samplePod.Name, nodeGroup.Id()) } diff --git a/cluster-autoscaler/simulator/clustersnapshot/basic.go b/cluster-autoscaler/simulator/clustersnapshot/basic.go index be8388c5ce8b..5eae2a5a892a 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/basic.go +++ b/cluster-autoscaler/simulator/clustersnapshot/basic.go @@ -31,6 +31,26 @@ type BasicClusterSnapshot struct { data []*internalBasicSnapshotData } +func (snapshot *BasicClusterSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) SchedulingError { + //TODO implement me + panic("implement me") +} + +func (snapshot *BasicClusterSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (matchingNode string, err SchedulingError) { + //TODO implement me + panic("implement me") +} + +func (snapshot *BasicClusterSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) SchedulingError { + //TODO implement me + panic("implement me") +} + +func (snapshot *BasicClusterSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { + //TODO implement me + panic("implement me") +} + type internalBasicSnapshotData struct { nodeInfoMap map[string]*schedulerframework.NodeInfo pvcNamespacePodMap map[string]map[string]bool diff --git a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go b/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go index 1c60fcc0b730..55efbb985539 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go @@ -28,19 +28,48 @@ import ( // ClusterSnapshot is abstraction of cluster state used for predicate simulations. // It exposes mutation methods and can be viewed as scheduler's SharedLister. type ClusterSnapshot interface { + ClusterSnapshotStore + + // SchedulePod tries to schedule the given Pod on the Node with the given name inside the snapshot, + // checking scheduling predicates. The pod is only scheduled if the predicates pass. If the pod is scheduled, + // all relevant DRA objects are modified to reflect that. Returns nil if the pod got scheduled, and a non-nil + // error explaining why not otherwise. The error Type() can be checked against SchedulingInternalError to distinguish + // failing predicates from unexpected errors. + SchedulePod(pod *apiv1.Pod, nodeName string) SchedulingError + // SchedulePodOnAnyNodeMatching tries to schedule the given Pod on any Node for which nodeMatches returns + // true. Scheduling predicates are checked, and the pod is scheduled only if there is a matching Node with passing + // predicates. If the pod is scheduled, all relevant DRA objects are modified to reflect that, and the name of the + // Node its scheduled on and nil are returned. If the pod can't be scheduled on any Node, an empty string and a non-nil + // error explaining why are returned. The error Type() can be checked against SchedulingInternalError to distinguish + // failing predicates from unexpected errors. + SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (matchingNode string, err SchedulingError) + // UnschedulePod removes the given Pod from the given Node inside the snapshot, and modifies all relevant DRA objects + // to reflect the removal. The pod can then be scheduled on another Node in the snapshot using the Schedule methods. + UnschedulePod(namespace string, podName string, nodeName string) error + + // CheckPredicates runs scheduler predicates to check if the given Pod would be able to schedule on the Node with the given + // name. Returns nil if predicates pass, or a non-nil error specifying why they didn't otherwise. The error Type() can be + // checked against SchedulingInternalError to distinguish failing predicates from unexpected errors. Doesn't mutate the snapshot. + CheckPredicates(pod *apiv1.Pod, nodeName string) SchedulingError +} + +// ClusterSnapshotStore is the "low-level" part of ClusterSnapshot, responsible for storing the snapshot state and mutating it directly, +// without going through scheduler predicates. ClusterSnapshotStore shouldn't be directly used outside the clustersnapshot pkg, its methods +// should be accessed via ClusterSnapshot. +type ClusterSnapshotStore interface { schedulerframework.SharedLister // SetClusterState resets the snapshot to an unforked state and replaces the contents of the snapshot // with the provided data. scheduledPods are correlated to their Nodes based on spec.NodeName. SetClusterState(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) error - // ForceAddPod adds the given Pod to the Node with the given nodeName inside the snapshot. + // ForceAddPod adds the given Pod to the Node with the given nodeName inside the snapshot without checking scheduler predicates. ForceAddPod(pod *apiv1.Pod, nodeName string) error // ForceRemovePod removes the given Pod (and all DRA objects it owns) from the snapshot. ForceRemovePod(namespace string, podName string, nodeName string) error - // AddNodeInfo adds the given NodeInfo to the snapshot. The Node and the Pods are added, as well as - // any DRA objects passed along them. + // AddNodeInfo adds the given NodeInfo to the snapshot without checking scheduler predicates. The Node and the Pods are added, + // as well as any DRA objects passed along them. AddNodeInfo(nodeInfo *framework.NodeInfo) error // RemoveNodeInfo removes the given NodeInfo from the snapshot The Node and the Pods are removed, as well as // any DRA objects owned by them. diff --git a/cluster-autoscaler/simulator/clustersnapshot/delta.go b/cluster-autoscaler/simulator/clustersnapshot/delta.go index 869e494e0226..46f89ade0e3c 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/delta.go +++ b/cluster-autoscaler/simulator/clustersnapshot/delta.go @@ -46,6 +46,26 @@ type DeltaClusterSnapshot struct { data *internalDeltaSnapshotData } +func (snapshot *DeltaClusterSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) SchedulingError { + //TODO implement me + panic("implement me") +} + +func (snapshot *DeltaClusterSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (matchingNode string, err SchedulingError) { + //TODO implement me + panic("implement me") +} + +func (snapshot *DeltaClusterSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) SchedulingError { + //TODO implement me + panic("implement me") +} + +func (snapshot *DeltaClusterSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { + //TODO implement me + panic("implement me") +} + type deltaSnapshotNodeLister DeltaClusterSnapshot type deltaSnapshotStorageLister DeltaClusterSnapshot diff --git a/cluster-autoscaler/simulator/clustersnapshot/scheduling_error.go b/cluster-autoscaler/simulator/clustersnapshot/scheduling_error.go new file mode 100644 index 000000000000..e958b75c0213 --- /dev/null +++ b/cluster-autoscaler/simulator/clustersnapshot/scheduling_error.go @@ -0,0 +1,149 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clustersnapshot + +import ( + "fmt" + "strings" + + apiv1 "k8s.io/api/core/v1" +) + +// SchedulingErrorType represents different possible schedulingError types. +type SchedulingErrorType int + +const ( + // SchedulingInternalError denotes internal unexpected error while trying to schedule a pod + SchedulingInternalError SchedulingErrorType = iota + // FailingPredicateError means that a pod couldn't be scheduled on a particular node because of a failing scheduler predicate + FailingPredicateError + // NoNodesPassingPredicatesFoundError means that a pod couldn't be scheduled on any Node because of failing scheduler predicates + NoNodesPassingPredicatesFoundError +) + +// SchedulingError represents an error encountered while trying to schedule a Pod inside ClusterSnapshot. +// An interface is exported instead of the concrete type to avoid the dreaded https://go.dev/doc/faq#nil_error. +type SchedulingError interface { + error + + // Type can be used to distinguish between different SchedulingError types. + Type() SchedulingErrorType + // Reasons provides a list of human-readable reasons explaining the error. + Reasons() []string + + // FailingPredicateName returns the name of the predicate that failed. Only applicable to the FailingPredicateError type. + FailingPredicateName() string + // FailingPredicateReasons returns a list of human-readable reasons explaining why the predicate failed. Only applicable to the FailingPredicateError type. + FailingPredicateReasons() []string +} + +type schedulingError struct { + errorType SchedulingErrorType + pod *apiv1.Pod + + // Only applicable to SchedulingInternalError: + internalErrorMsg string + + // Only applicable to FailingPredicateError: + failingPredicateName string + failingPredicateReasons []string + failingPredicateUnexpectedErrMsg string + // debugInfo contains additional info that predicate doesn't include, + // but may be useful for debugging (e.g. taints on node blocking scale-up) + failingPredicateDebugInfo string +} + +// Type returns if error was internal of names predicate failure. +func (se *schedulingError) Type() SchedulingErrorType { + return se.errorType +} + +// Error satisfies the builtin error interface. +func (se *schedulingError) Error() string { + msg := "" + + switch se.errorType { + case SchedulingInternalError: + msg = fmt.Sprintf("unexpected error: %s", se.internalErrorMsg) + case FailingPredicateError: + details := []string{ + fmt.Sprintf("predicateReasons=[%s]", strings.Join(se.FailingPredicateReasons(), ", ")), + } + if se.failingPredicateDebugInfo != "" { + details = append(details, fmt.Sprintf("debugInfo=%s", se.failingPredicateDebugInfo)) + } + if se.failingPredicateUnexpectedErrMsg != "" { + details = append(details, fmt.Sprintf("unexpectedError=%s", se.failingPredicateUnexpectedErrMsg)) + } + msg = fmt.Sprintf("predicate %q didn't pass (%s)", se.FailingPredicateName(), strings.Join(details, "; ")) + case NoNodesPassingPredicatesFoundError: + msg = fmt.Sprintf("couldn't find a matching Node with passing predicates") + default: + msg = fmt.Sprintf("SchedulingErrorType type %q unknown - this shouldn't happen", se.errorType) + } + + return fmt.Sprintf("can't schedule pod %s/%s: %s", se.pod.Namespace, se.pod.Name, msg) +} + +// Reasons returns a list of human-readable reasons for the error. +func (se *schedulingError) Reasons() []string { + switch se.errorType { + case FailingPredicateError: + return se.FailingPredicateReasons() + default: + return []string{se.Error()} + } +} + +// FailingPredicateName returns the name of the predicate which failed. +func (se *schedulingError) FailingPredicateName() string { + return se.failingPredicateName +} + +// FailingPredicateReasons returns the failure reasons from the failed predicate as a slice of strings. +func (se *schedulingError) FailingPredicateReasons() []string { + return se.failingPredicateReasons +} + +// NewSchedulingInternalError creates a new schedulingError with SchedulingInternalError type. +func NewSchedulingInternalError(pod *apiv1.Pod, errMsg string) SchedulingError { + return &schedulingError{ + errorType: SchedulingInternalError, + pod: pod, + internalErrorMsg: errMsg, + } +} + +// NewFailingPredicateError creates a new schedulingError with FailingPredicateError type. +func NewFailingPredicateError(pod *apiv1.Pod, predicateName string, predicateReasons []string, unexpectedErrMsg string, debugInfo string) SchedulingError { + return &schedulingError{ + errorType: FailingPredicateError, + pod: pod, + failingPredicateName: predicateName, + failingPredicateReasons: predicateReasons, + failingPredicateUnexpectedErrMsg: unexpectedErrMsg, + failingPredicateDebugInfo: debugInfo, + } +} + +// NewNoNodesPassingPredicatesFoundError creates a new schedulingError with NoNodesPassingPredicatesFoundError type. +func NewNoNodesPassingPredicatesFoundError(pod *apiv1.Pod) SchedulingError { + return &schedulingError{ + errorType: NoNodesPassingPredicatesFoundError, + pod: pod, + } +} diff --git a/cluster-autoscaler/simulator/predicatechecker/error.go b/cluster-autoscaler/simulator/predicatechecker/error.go deleted file mode 100644 index 9e4d6de29d71..000000000000 --- a/cluster-autoscaler/simulator/predicatechecker/error.go +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package predicatechecker - -import ( - "fmt" - "strings" -) - -// PredicateErrorType is type of predicate error -type PredicateErrorType int - -const ( - // NotSchedulablePredicateError means that one of the filters returned that pod does not fit a node - NotSchedulablePredicateError PredicateErrorType = iota - // InternalPredicateError denotes internal unexpected error while calling PredicateChecker - InternalPredicateError -) - -// PredicateError is a structure representing error returned from predicate checking simulation. -type PredicateError struct { - errorType PredicateErrorType - predicateName string - errorMessage string - reasons []string - // debugInfo contains additional info that predicate doesn't include, - // but may be useful for debugging (e.g. taints on node blocking scale-up) - debugInfo func() string -} - -// ErrorType returns if error was internal of names predicate failure. -func (pe *PredicateError) ErrorType() PredicateErrorType { - return pe.errorType -} - -// PredicateName return name of predicate which failed. -func (pe *PredicateError) PredicateName() string { - return pe.predicateName -} - -// Message returns error message. -func (pe *PredicateError) Message() string { - if pe.errorMessage == "" { - return "unknown error" - } - return pe.errorMessage -} - -// VerboseMessage generates verbose error message. Building verbose message may be expensive so number of calls should be -// limited. -func (pe *PredicateError) VerboseMessage() string { - return fmt.Sprintf( - "%s; predicateName=%s; reasons: %s; debugInfo=%s", - pe.Message(), - pe.predicateName, - strings.Join(pe.reasons, ", "), - pe.debugInfo()) -} - -// Reasons returns failure reasons from failed predicate as a slice of strings. -func (pe *PredicateError) Reasons() []string { - return pe.reasons -} - -// NewPredicateError creates a new predicate error from error and reasons. -func NewPredicateError( - errorType PredicateErrorType, - predicateName string, - errorMessage string, - reasons []string, - debugInfo func() string, -) *PredicateError { - return &PredicateError{ - errorType: errorType, - predicateName: predicateName, - errorMessage: errorMessage, - reasons: reasons, - debugInfo: debugInfo, - } -} - -// GenericPredicateError return a generic instance of PredicateError to be used in context where predicate name is not -// know. -func GenericPredicateError() *PredicateError { - return &PredicateError{ - errorType: NotSchedulablePredicateError, - errorMessage: "generic predicate failure", - } -} - -func emptyString() string { - return "" -} diff --git a/cluster-autoscaler/simulator/predicatechecker/interface.go b/cluster-autoscaler/simulator/predicatechecker/interface.go index 2d35b779172c..2236bfaa4907 100644 --- a/cluster-autoscaler/simulator/predicatechecker/interface.go +++ b/cluster-autoscaler/simulator/predicatechecker/interface.go @@ -25,7 +25,7 @@ import ( // PredicateChecker checks whether all required predicates pass for given Pod and Node. type PredicateChecker interface { - FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod) (string, error) - FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, error) - CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, nodeName string) *PredicateError + FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod) (string, clustersnapshot.SchedulingError) + FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) + CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError } diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go index 491e05da789a..6672d0c9f6e4 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go @@ -19,13 +19,14 @@ package predicatechecker import ( "context" "fmt" + "strings" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" apiv1 "k8s.io/api/core/v1" v1listers "k8s.io/client-go/listers/core/v1" - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) @@ -44,16 +45,16 @@ func NewSchedulerBasedPredicateChecker(fwHandle *framework.Handle) *SchedulerBas } // FitsAnyNode checks if the given pod can be placed on any of the given nodes. -func (p *SchedulerBasedPredicateChecker) FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod) (string, error) { +func (p *SchedulerBasedPredicateChecker) FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod) (string, clustersnapshot.SchedulingError) { return p.FitsAnyNodeMatching(clusterSnapshot, pod, func(*framework.NodeInfo) bool { return true }) } // FitsAnyNodeMatching checks if the given pod can be placed on any of the given nodes matching the provided function. -func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, error) { +func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { if clusterSnapshot == nil { - return "", fmt.Errorf("ClusterSnapshot not provided") + return "", clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") } nodeInfosList, err := clusterSnapshot.ListNodeInfos() @@ -63,7 +64,7 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu // Scheduler requires interface returning error, but no implementation // of ClusterSnapshot ever does it. klog.Errorf("Error obtaining nodeInfos from schedulerLister") - return "", fmt.Errorf("error obtaining nodeInfos from schedulerLister") + return "", clustersnapshot.NewSchedulingInternalError(pod, "error obtaining nodeInfos from schedulerLister") } p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) @@ -72,7 +73,7 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu state := schedulerframework.NewCycleState() preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { - return "", fmt.Errorf("error running pre filter plugins for pod %s; %s", pod.Name, preFilterStatus.Message()) + return "", clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") } for i := range nodeInfosList { @@ -96,18 +97,17 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu return nodeInfo.Node().Name, nil } } - return "", fmt.Errorf("cannot put pod %s on any node", pod.Name) + return "", clustersnapshot.NewNoNodesPassingPredicatesFoundError(pod) } // CheckPredicates checks if the given pod can be placed on the given node. -func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, nodeName string) *PredicateError { +func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { if clusterSnapshot == nil { - return NewPredicateError(InternalPredicateError, "", "ClusterSnapshot not provided", nil, emptyString) + return clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") } nodeInfo, err := clusterSnapshot.GetNodeInfo(nodeName) if err != nil { - errorMessage := fmt.Sprintf("Error obtaining NodeInfo for name %s; %v", nodeName, err) - return NewPredicateError(InternalPredicateError, "", errorMessage, nil, emptyString) + return clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error obtaining NodeInfo for name %q: %v", nodeName, err)) } p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) @@ -116,47 +116,31 @@ func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot cluster state := schedulerframework.NewCycleState() _, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { - return NewPredicateError( - InternalPredicateError, - "", - preFilterStatus.Message(), - preFilterStatus.Reasons(), - emptyString) + return clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") } filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) if !filterStatus.IsSuccess() { filterName := filterStatus.Plugin() - filterMessage := filterStatus.Message() filterReasons := filterStatus.Reasons() - if filterStatus.IsRejected() { - return NewPredicateError( - NotSchedulablePredicateError, - filterName, - filterMessage, - filterReasons, - p.buildDebugInfo(filterName, nodeInfo)) + unexpectedErrMsg := "" + if !filterStatus.IsRejected() { + unexpectedErrMsg = fmt.Sprintf("unexpected filter status %q", filterStatus.Code().String()) } - return NewPredicateError( - InternalPredicateError, - filterName, - filterMessage, - filterReasons, - p.buildDebugInfo(filterName, nodeInfo)) + return clustersnapshot.NewFailingPredicateError(pod, filterName, filterReasons, unexpectedErrMsg, p.failingFilterDebugInfo(filterName, nodeInfo)) } return nil } -func (p *SchedulerBasedPredicateChecker) buildDebugInfo(filterName string, nodeInfo *framework.NodeInfo) func() string { +func (p *SchedulerBasedPredicateChecker) failingFilterDebugInfo(filterName string, nodeInfo *framework.NodeInfo) string { + infoParts := []string{fmt.Sprintf("nodeName: %q", nodeInfo.Node().Name)} + switch filterName { case "TaintToleration": - taints := nodeInfo.Node().Spec.Taints - return func() string { - return fmt.Sprintf("taints on node: %#v", taints) - } - default: - return emptyString + infoParts = append(infoParts, fmt.Sprintf("nodeTaints: %#v", nodeInfo.Node().Spec.Taints)) } + + return strings.Join(infoParts, ", ") } diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index aa06ec7c159f..a454e7684f35 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -27,7 +27,7 @@ import ( testconfig "k8s.io/autoscaler/cluster-autoscaler/config/test" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - scheduler "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" + "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" apiv1 "k8s.io/api/core/v1" @@ -151,9 +151,11 @@ func TestCheckPredicate(t *testing.T) { predicateError := tt.predicateChecker.CheckPredicates(clusterSnapshot, tt.testPod, tt.node.Name) if tt.expectError { assert.NotNil(t, predicateError) - assert.Equal(t, NotSchedulablePredicateError, predicateError.ErrorType()) - assert.Equal(t, "Insufficient cpu", predicateError.Message()) - assert.Contains(t, predicateError.VerboseMessage(), "Insufficient cpu; predicateName=NodeResourcesFit") + assert.Equal(t, clustersnapshot.FailingPredicateError, predicateError.Type()) + assert.Equal(t, "NodeResourcesFit", predicateError.FailingPredicateName()) + assert.Equal(t, []string{"Insufficient cpu"}, predicateError.FailingPredicateReasons()) + assert.Contains(t, predicateError.Error(), "NodeResourcesFit") + assert.Contains(t, predicateError.Error(), "Insufficient cpu") } else { assert.Nil(t, predicateError) } @@ -291,8 +293,9 @@ func TestDebugInfo(t *testing.T) { assert.NoError(t, err) predicateErr := defaultPredicateChecker.CheckPredicates(clusterSnapshot, p1, "n1") assert.NotNil(t, predicateErr) - assert.Equal(t, "node(s) had untolerated taint {SomeTaint: WhyNot?}", predicateErr.Message()) - assert.Contains(t, predicateErr.VerboseMessage(), "RandomTaint") + assert.Contains(t, predicateErr.FailingPredicateReasons(), "node(s) had untolerated taint {SomeTaint: WhyNot?}") + assert.Contains(t, predicateErr.Error(), "node(s) had untolerated taint {SomeTaint: WhyNot?}") + assert.Contains(t, predicateErr.Error(), "RandomTaint") // with custom predicate checker From 46e9f398fe02413cc0f506139bac7d2cc93eb844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Wed, 13 Nov 2024 22:06:36 +0100 Subject: [PATCH 3/8] CA: introduce PredicateSnapshot PredicateSnapshot implements the ClusterSnapshot methods that need to run predicates on top of a ClusterSnapshotStore. testsnapshot pkg is introduced, providing functions abstracting away the snapshot creation for tests. ClusterSnapshot tests are moved near PredicateSnapshot, as it'll be the only "full" implementation. --- .../clustersnapshot/delta_benchmark_test.go | 80 ++++++++++ .../predicate/predicate_snapshot.go | 72 +++++++++ .../predicate_snapshot_benchmark_test.go} | 140 +++--------------- .../predicate_snapshot_test.go} | 134 ++++++++++------- .../simulator/clustersnapshot/test_utils.go | 43 ++++++ .../testsnapshot/test_snapshot.go | 64 ++++++++ 6 files changed, 360 insertions(+), 173 deletions(-) create mode 100644 cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go create mode 100644 cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go rename cluster-autoscaler/simulator/clustersnapshot/{clustersnapshot_benchmark_test.go => predicate/predicate_snapshot_benchmark_test.go} (55%) rename cluster-autoscaler/simulator/clustersnapshot/{clustersnapshot_test.go => predicate/predicate_snapshot_test.go} (82%) create mode 100644 cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go diff --git a/cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go b/cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go new file mode 100644 index 000000000000..5c1ce21a984d --- /dev/null +++ b/cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go @@ -0,0 +1,80 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clustersnapshot + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" +) + +func BenchmarkBuildNodeInfoList(b *testing.B) { + testCases := []struct { + nodeCount int + }{ + { + nodeCount: 1000, + }, + { + nodeCount: 5000, + }, + { + nodeCount: 15000, + }, + { + nodeCount: 100000, + }, + } + + for _, tc := range testCases { + b.Run(fmt.Sprintf("fork add 1000 to %d", tc.nodeCount), func(b *testing.B) { + nodes := CreateTestNodes(tc.nodeCount + 1000) + clusterSnapshot := NewDeltaClusterSnapshot() + if err := clusterSnapshot.SetClusterState(nodes[:tc.nodeCount], nil); err != nil { + assert.NoError(b, err) + } + clusterSnapshot.Fork() + for _, node := range nodes[tc.nodeCount:] { + if err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node)); err != nil { + assert.NoError(b, err) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + list := clusterSnapshot.data.buildNodeInfoList() + assert.Equal(b, tc.nodeCount+1000, len(list)) + } + }) + } + for _, tc := range testCases { + b.Run(fmt.Sprintf("base %d", tc.nodeCount), func(b *testing.B) { + nodes := CreateTestNodes(tc.nodeCount) + clusterSnapshot := NewDeltaClusterSnapshot() + if err := clusterSnapshot.SetClusterState(nodes, nil); err != nil { + assert.NoError(b, err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + list := clusterSnapshot.data.buildNodeInfoList() + assert.Equal(b, tc.nodeCount, len(list)) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go new file mode 100644 index 000000000000..9542b7a06535 --- /dev/null +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go @@ -0,0 +1,72 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package predicate + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" +) + +// PredicateSnapshot implements ClusterSnapshot on top of a ClusterSnapshotStore by using +// SchedulerBasedPredicateChecker to check scheduler predicates. +type PredicateSnapshot struct { + clustersnapshot.ClusterSnapshotStore + predicateChecker *predicatechecker.SchedulerBasedPredicateChecker +} + +// NewPredicateSnapshot builds a PredicateSnapshot. +func NewPredicateSnapshot(snapshotStore clustersnapshot.ClusterSnapshotStore, fwHandle *framework.Handle) *PredicateSnapshot { + return &PredicateSnapshot{ + ClusterSnapshotStore: snapshotStore, + predicateChecker: predicatechecker.NewSchedulerBasedPredicateChecker(fwHandle), + } +} + +// SchedulePod adds pod to the snapshot and schedules it to given node. +func (s *PredicateSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { + if schedErr := s.predicateChecker.CheckPredicates(s, pod, nodeName); schedErr != nil { + return schedErr + } + if err := s.ClusterSnapshotStore.ForceAddPod(pod, nodeName); err != nil { + return clustersnapshot.NewSchedulingInternalError(pod, err.Error()) + } + return nil +} + +// SchedulePodOnAnyNodeMatching adds pod to the snapshot and schedules it to any node matching the provided function. +func (s *PredicateSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, anyNodeMatching func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { + nodeName, schedErr := s.predicateChecker.FitsAnyNodeMatching(s, pod, anyNodeMatching) + if schedErr != nil { + return "", schedErr + } + if err := s.ClusterSnapshotStore.ForceAddPod(pod, nodeName); err != nil { + return "", clustersnapshot.NewSchedulingInternalError(pod, err.Error()) + } + return nodeName, nil +} + +// UnschedulePod removes the given Pod from the given Node inside the snapshot. +func (s *PredicateSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { + return s.ClusterSnapshotStore.ForceRemovePod(namespace, podName, nodeName) +} + +// CheckPredicates checks whether scheduler predicates pass for the given pod on the given node. +func (s *PredicateSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { + return s.predicateChecker.CheckPredicates(s, pod, nodeName) +} diff --git a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_benchmark_test.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_benchmark_test.go similarity index 55% rename from cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_benchmark_test.go rename to cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_benchmark_test.go index fb6468adad6f..1a267dda9deb 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_benchmark_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_benchmark_test.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,68 +14,27 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clustersnapshot +package predicate import ( "fmt" "testing" - "time" "github.com/stretchr/testify/assert" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - - apiv1 "k8s.io/api/core/v1" ) -func createTestNodesWithPrefix(prefix string, n int) []*apiv1.Node { - nodes := make([]*apiv1.Node, n, n) - for i := 0; i < n; i++ { - nodes[i] = BuildTestNode(fmt.Sprintf("%s-%d", prefix, i), 2000, 2000000) - SetNodeReadyState(nodes[i], true, time.Time{}) - } - return nodes -} - -func createTestNodes(n int) []*apiv1.Node { - return createTestNodesWithPrefix("n", n) -} - -func createTestPodsWithPrefix(prefix string, n int) []*apiv1.Pod { - pods := make([]*apiv1.Pod, n, n) - for i := 0; i < n; i++ { - pods[i] = BuildTestPod(fmt.Sprintf("%s-%d", prefix, i), 1000, 2000000) - } - return pods -} - -func createTestPods(n int) []*apiv1.Pod { - return createTestPodsWithPrefix("p", n) -} - -func assignPodsToNodes(pods []*apiv1.Pod, nodes []*apiv1.Node) { - if len(nodes) == 0 { - return - } - - j := 0 - for i := 0; i < len(pods); i++ { - if j >= len(nodes) { - j = 0 - } - pods[i].Spec.NodeName = nodes[j].Name - j++ - } -} - func BenchmarkAddNodeInfo(b *testing.B) { testCases := []int{1, 10, 100, 1000, 5000, 15000, 100000} for snapshotName, snapshotFactory := range snapshots { for _, tc := range testCases { - nodes := createTestNodes(tc) - clusterSnapshot := snapshotFactory() + nodes := clustersnapshot.CreateTestNodes(tc) + clusterSnapshot, err := snapshotFactory() + assert.NoError(b, err) b.ResetTimer() b.Run(fmt.Sprintf("%s: AddNodeInfo() %d", snapshotName, tc), func(b *testing.B) { for i := 0; i < b.N; i++ { @@ -99,9 +58,10 @@ func BenchmarkListNodeInfos(b *testing.B) { for snapshotName, snapshotFactory := range snapshots { for _, tc := range testCases { - nodes := createTestNodes(tc) - clusterSnapshot := snapshotFactory() - err := clusterSnapshot.SetClusterState(nodes, nil) + nodes := clustersnapshot.CreateTestNodes(tc) + clusterSnapshot, err := snapshotFactory() + assert.NoError(b, err) + err = clusterSnapshot.SetClusterState(nodes, nil) if err != nil { assert.NoError(b, err) } @@ -126,11 +86,12 @@ func BenchmarkAddPods(b *testing.B) { for snapshotName, snapshotFactory := range snapshots { for _, tc := range testCases { - nodes := createTestNodes(tc) - pods := createTestPods(tc * 30) - assignPodsToNodes(pods, nodes) - clusterSnapshot := snapshotFactory() - err := clusterSnapshot.SetClusterState(nodes, nil) + nodes := clustersnapshot.CreateTestNodes(tc) + pods := clustersnapshot.CreateTestPods(tc * 30) + clustersnapshot.AssignTestPodsToNodes(pods, nodes) + clusterSnapshot, err := snapshotFactory() + assert.NoError(b, err) + err = clusterSnapshot.SetClusterState(nodes, nil) assert.NoError(b, err) b.ResetTimer() b.Run(fmt.Sprintf("%s: ForceAddPod() 30*%d", snapshotName, tc), func(b *testing.B) { @@ -160,12 +121,13 @@ func BenchmarkForkAddRevert(b *testing.B) { for snapshotName, snapshotFactory := range snapshots { for _, ntc := range nodeTestCases { - nodes := createTestNodes(ntc) + nodes := clustersnapshot.CreateTestNodes(ntc) for _, ptc := range podTestCases { - pods := createTestPods(ntc * ptc) - assignPodsToNodes(pods, nodes) - clusterSnapshot := snapshotFactory() - err := clusterSnapshot.SetClusterState(nodes, pods) + pods := clustersnapshot.CreateTestPods(ntc * ptc) + clustersnapshot.AssignTestPodsToNodes(pods, nodes) + clusterSnapshot, err := snapshotFactory() + assert.NoError(b, err) + err = clusterSnapshot.SetClusterState(nodes, pods) assert.NoError(b, err) tmpNode1 := BuildTestNode("tmp-1", 2000, 2000000) tmpNode2 := BuildTestNode("tmp-2", 2000, 2000000) @@ -190,61 +152,3 @@ func BenchmarkForkAddRevert(b *testing.B) { } } } - -func BenchmarkBuildNodeInfoList(b *testing.B) { - testCases := []struct { - nodeCount int - }{ - { - nodeCount: 1000, - }, - { - nodeCount: 5000, - }, - { - nodeCount: 15000, - }, - { - nodeCount: 100000, - }, - } - - for _, tc := range testCases { - b.Run(fmt.Sprintf("fork add 1000 to %d", tc.nodeCount), func(b *testing.B) { - nodes := createTestNodes(tc.nodeCount + 1000) - snapshot := NewDeltaClusterSnapshot() - if err := snapshot.SetClusterState(nodes[:tc.nodeCount], nil); err != nil { - assert.NoError(b, err) - } - snapshot.Fork() - for _, node := range nodes[tc.nodeCount:] { - if err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)); err != nil { - assert.NoError(b, err) - } - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - list := snapshot.data.buildNodeInfoList() - if len(list) != tc.nodeCount+1000 { - assert.Equal(b, len(list), tc.nodeCount+1000) - } - } - }) - } - for _, tc := range testCases { - b.Run(fmt.Sprintf("base %d", tc.nodeCount), func(b *testing.B) { - nodes := createTestNodes(tc.nodeCount) - snapshot := NewDeltaClusterSnapshot() - if err := snapshot.SetClusterState(nodes, nil); err != nil { - assert.NoError(b, err) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - list := snapshot.data.buildNodeInfoList() - if len(list) != tc.nodeCount { - assert.Equal(b, len(list), tc.nodeCount) - } - } - }) - } -} diff --git a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_test.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go similarity index 82% rename from cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_test.go rename to cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go index 4eeb67253558..f65c3166cdd5 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clustersnapshot +package predicate import ( "fmt" @@ -23,6 +23,7 @@ import ( "time" apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -30,9 +31,21 @@ import ( "github.com/stretchr/testify/assert" ) -var snapshots = map[string]func() ClusterSnapshot{ - "basic": func() ClusterSnapshot { return NewBasicClusterSnapshot() }, - "delta": func() ClusterSnapshot { return NewDeltaClusterSnapshot() }, +var snapshots = map[string]func() (clustersnapshot.ClusterSnapshot, error){ + "basic": func() (clustersnapshot.ClusterSnapshot, error) { + fwHandle, err := framework.NewTestFrameworkHandle() + if err != nil { + return nil, err + } + return NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), fwHandle), nil + }, + "delta": func() (clustersnapshot.ClusterSnapshot, error) { + fwHandle, err := framework.NewTestFrameworkHandle() + if err != nil { + return nil, err + } + return NewPredicateSnapshot(clustersnapshot.NewDeltaClusterSnapshot(), fwHandle), nil + }, } func nodeNames(nodes []*apiv1.Node) []string { @@ -61,7 +74,7 @@ func compareStates(t *testing.T, a, b snapshotState) { assert.ElementsMatch(t, a.pods, b.pods) } -func getSnapshotState(t *testing.T, snapshot ClusterSnapshot) snapshotState { +func getSnapshotState(t *testing.T, snapshot clustersnapshot.ClusterSnapshot) snapshotState { nodes, err := snapshot.ListNodeInfos() assert.NoError(t, err) var pods []*apiv1.Pod @@ -73,16 +86,17 @@ func getSnapshotState(t *testing.T, snapshot ClusterSnapshot) snapshotState { return snapshotState{extractNodes(nodes), pods} } -func startSnapshot(t *testing.T, snapshotFactory func() ClusterSnapshot, state snapshotState) ClusterSnapshot { - snapshot := snapshotFactory() - err := snapshot.SetClusterState(state.nodes, state.pods) +func startSnapshot(t *testing.T, snapshotFactory func() (clustersnapshot.ClusterSnapshot, error), state snapshotState) clustersnapshot.ClusterSnapshot { + snapshot, err := snapshotFactory() + assert.NoError(t, err) + err = snapshot.SetClusterState(state.nodes, state.pods) assert.NoError(t, err) return snapshot } type modificationTestCase struct { name string - op func(ClusterSnapshot) + op func(clustersnapshot.ClusterSnapshot) state snapshotState modifiedState snapshotState } @@ -95,7 +109,7 @@ func validTestCases(t *testing.T) []modificationTestCase { testCases := []modificationTestCase{ { name: "add empty nodeInfo", - op: func(snapshot ClusterSnapshot) { + op: func(snapshot clustersnapshot.ClusterSnapshot) { err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) }, @@ -105,7 +119,7 @@ func validTestCases(t *testing.T) []modificationTestCase { }, { name: "add nodeInfo", - op: func(snapshot ClusterSnapshot) { + op: func(snapshot clustersnapshot.ClusterSnapshot) { err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod)) assert.NoError(t, err) }, @@ -119,7 +133,7 @@ func validTestCases(t *testing.T) []modificationTestCase { state: snapshotState{ nodes: []*apiv1.Node{node}, }, - op: func(snapshot ClusterSnapshot) { + op: func(snapshot clustersnapshot.ClusterSnapshot) { err := snapshot.RemoveNodeInfo(node.Name) assert.NoError(t, err) }, @@ -129,7 +143,7 @@ func validTestCases(t *testing.T) []modificationTestCase { state: snapshotState{ nodes: []*apiv1.Node{node}, }, - op: func(snapshot ClusterSnapshot) { + op: func(snapshot clustersnapshot.ClusterSnapshot) { err := snapshot.RemoveNodeInfo(node.Name) assert.NoError(t, err) @@ -145,10 +159,10 @@ func validTestCases(t *testing.T) []modificationTestCase { state: snapshotState{ nodes: []*apiv1.Node{node}, }, - op: func(snapshot ClusterSnapshot) { - err := snapshot.ForceAddPod(pod, node.Name) - assert.NoError(t, err) - err = snapshot.RemoveNodeInfo(node.Name) + op: func(snapshot clustersnapshot.ClusterSnapshot) { + schedErr := snapshot.ForceAddPod(pod, node.Name) + assert.NoError(t, schedErr) + err := snapshot.RemoveNodeInfo(node.Name) assert.NoError(t, err) }, }, @@ -284,20 +298,20 @@ func TestSetClusterState(t *testing.T) { extraNodeCount := localRand.Intn(100) extraPodCount := localRand.Intn(1000) - nodes := createTestNodes(nodeCount) - pods := createTestPods(podCount) - assignPodsToNodes(pods, nodes) + nodes := clustersnapshot.CreateTestNodes(nodeCount) + pods := clustersnapshot.CreateTestPods(podCount) + clustersnapshot.AssignTestPodsToNodes(pods, nodes) state := snapshotState{nodes, pods} - extraNodes := createTestNodesWithPrefix("extra", extraNodeCount) + extraNodes := clustersnapshot.CreateTestNodesWithPrefix("extra", extraNodeCount) allNodes := make([]*apiv1.Node, len(nodes)+len(extraNodes), len(nodes)+len(extraNodes)) copy(allNodes, nodes) copy(allNodes[len(nodes):], extraNodes) - extraPods := createTestPodsWithPrefix("extra", extraPodCount) - assignPodsToNodes(extraPods, allNodes) + extraPods := clustersnapshot.CreateTestPodsWithPrefix("extra", extraPodCount) + clustersnapshot.AssignTestPodsToNodes(extraPods, allNodes) allPods := make([]*apiv1.Pod, len(pods)+len(extraPods), len(pods)+len(extraPods)) copy(allPods, pods) @@ -318,8 +332,8 @@ func TestSetClusterState(t *testing.T) { snapshot := startSnapshot(t, snapshotFactory, state) compareStates(t, state, getSnapshotState(t, snapshot)) - newNodes, newPods := createTestNodes(13), createTestPods(37) - assignPodsToNodes(newPods, newNodes) + newNodes, newPods := clustersnapshot.CreateTestNodes(13), clustersnapshot.CreateTestPods(37) + clustersnapshot.AssignTestPodsToNodes(newPods, newNodes) assert.NoError(t, snapshot.SetClusterState(newNodes, newPods)) compareStates(t, snapshotState{nodes: newNodes, pods: newPods}, getSnapshotState(t, snapshot)) @@ -357,19 +371,19 @@ func TestNode404(t *testing.T) { // Anything and everything that returns errNodeNotFound should be tested here. ops := []struct { name string - op func(ClusterSnapshot) error + op func(clustersnapshot.ClusterSnapshot) error }{ - {"add pod", func(snapshot ClusterSnapshot) error { + {"add pod", func(snapshot clustersnapshot.ClusterSnapshot) error { return snapshot.ForceAddPod(BuildTestPod("p1", 0, 0), "node") }}, - {"remove pod", func(snapshot ClusterSnapshot) error { + {"remove pod", func(snapshot clustersnapshot.ClusterSnapshot) error { return snapshot.ForceRemovePod("default", "p1", "node") }}, - {"get node", func(snapshot ClusterSnapshot) error { + {"get node", func(snapshot clustersnapshot.ClusterSnapshot) error { _, err := snapshot.NodeInfos().Get("node") return err }}, - {"remove nodeInfo", func(snapshot ClusterSnapshot) error { + {"remove nodeInfo", func(snapshot clustersnapshot.ClusterSnapshot) error { return snapshot.RemoveNodeInfo("node") }}, } @@ -378,19 +392,21 @@ func TestNode404(t *testing.T) { for _, op := range ops { t.Run(fmt.Sprintf("%s: %s empty", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) // Empty snapshot - shouldn't be able to operate on nodes that are not here. - err := op.op(snapshot) + err = op.op(snapshot) assert.Error(t, err) }) t.Run(fmt.Sprintf("%s: %s fork", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) node := BuildTestNode("node", 10, 100) - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) snapshot.Fork() @@ -413,10 +429,11 @@ func TestNode404(t *testing.T) { t.Run(fmt.Sprintf("%s: %s base", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) node := BuildTestNode("node", 10, 100) - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) err = snapshot.RemoveNodeInfo("node") @@ -437,9 +454,9 @@ func TestNodeAlreadyExists(t *testing.T) { ops := []struct { name string - op func(ClusterSnapshot) error + op func(clustersnapshot.ClusterSnapshot) error }{ - {"add nodeInfo", func(snapshot ClusterSnapshot) error { + {"add nodeInfo", func(snapshot clustersnapshot.ClusterSnapshot) error { return snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod)) }}, } @@ -448,9 +465,10 @@ func TestNodeAlreadyExists(t *testing.T) { for _, op := range ops { t.Run(fmt.Sprintf("%s: %s base", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) // Node already in base. @@ -460,9 +478,10 @@ func TestNodeAlreadyExists(t *testing.T) { t.Run(fmt.Sprintf("%s: %s base, forked", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) snapshot.Fork() @@ -475,11 +494,12 @@ func TestNodeAlreadyExists(t *testing.T) { t.Run(fmt.Sprintf("%s: %s fork", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) snapshot.Fork() - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) // Node already in fork. @@ -488,11 +508,12 @@ func TestNodeAlreadyExists(t *testing.T) { }) t.Run(fmt.Sprintf("%s: %s committed", name, op.name), func(t *testing.T) { - snapshot := snapshotFactory() + snapshot, err := snapshotFactory() + assert.NoError(t, err) snapshot.Fork() - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node)) assert.NoError(t, err) err = snapshot.Commit() @@ -628,8 +649,9 @@ func TestPVCUsedByPods(t *testing.T) { for snapshotName, snapshotFactory := range snapshots { for _, tc := range testcase { t.Run(fmt.Sprintf("%s with snapshot (%s)", tc.desc, snapshotName), func(t *testing.T) { - snapshot := snapshotFactory() - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(tc.node, tc.pods...)) + snapshot, err := snapshotFactory() + assert.NoError(t, err) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(tc.node, tc.pods...)) assert.NoError(t, err) volumeExists := snapshot.StorageInfos().IsPVCUsedByPods(schedulerframework.GetNamespacedName("default", tc.claimName)) @@ -698,8 +720,9 @@ func TestPVCClearAndFork(t *testing.T) { for snapshotName, snapshotFactory := range snapshots { t.Run(fmt.Sprintf("fork and revert snapshot with pvc pods with snapshot: %s", snapshotName), func(t *testing.T) { - snapshot := snapshotFactory() - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod1)) + snapshot, err := snapshotFactory() + assert.NoError(t, err) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod1)) assert.NoError(t, err) volumeExists := snapshot.StorageInfos().IsPVCUsedByPods(schedulerframework.GetNamespacedName("default", "claim1")) assert.Equal(t, true, volumeExists) @@ -723,8 +746,9 @@ func TestPVCClearAndFork(t *testing.T) { }) t.Run(fmt.Sprintf("clear snapshot with pvc pods with snapshot: %s", snapshotName), func(t *testing.T) { - snapshot := snapshotFactory() - err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod1)) + snapshot, err := snapshotFactory() + assert.NoError(t, err) + err = snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, pod1)) assert.NoError(t, err) volumeExists := snapshot.StorageInfos().IsPVCUsedByPods(schedulerframework.GetNamespacedName("default", "claim1")) assert.Equal(t, true, volumeExists) @@ -752,7 +776,7 @@ func TestWithForkedSnapshot(t *testing.T) { return false, err } t.Run(fmt.Sprintf("%s: %s WithForkedSnapshot for failed function", name, tc.name), func(t *testing.T) { - err1, err2 := WithForkedSnapshot(snapshot, failedFunc) + err1, err2 := clustersnapshot.WithForkedSnapshot(snapshot, failedFunc) assert.Error(t, err1) assert.NoError(t, err2) @@ -760,7 +784,7 @@ func TestWithForkedSnapshot(t *testing.T) { compareStates(t, tc.state, getSnapshotState(t, snapshot)) }) t.Run(fmt.Sprintf("%s: %s WithForkedSnapshot for success function", name, tc.name), func(t *testing.T) { - err1, err2 := WithForkedSnapshot(snapshot, successFunc) + err1, err2 := clustersnapshot.WithForkedSnapshot(snapshot, successFunc) assert.Error(t, err1) assert.NoError(t, err2) diff --git a/cluster-autoscaler/simulator/clustersnapshot/test_utils.go b/cluster-autoscaler/simulator/clustersnapshot/test_utils.go index f0cd8c67546e..bd7c62408135 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/test_utils.go +++ b/cluster-autoscaler/simulator/clustersnapshot/test_utils.go @@ -17,12 +17,16 @@ limitations under the License. package clustersnapshot import ( + "fmt" + "math" "testing" + "time" "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) // InitializeClusterSnapshotOrDie clears cluster snapshot and then initializes it with given set of nodes and pods. @@ -53,3 +57,42 @@ func InitializeClusterSnapshotOrDie( } } } + +// CreateTestNodesWithPrefix creates n test Nodes with the given name prefix. +func CreateTestNodesWithPrefix(prefix string, n int) []*apiv1.Node { + nodes := make([]*apiv1.Node, n, n) + for i := 0; i < n; i++ { + nodes[i] = test.BuildTestNode(fmt.Sprintf("%s-%d", prefix, i), math.MaxInt, math.MaxInt) + test.SetNodeReadyState(nodes[i], true, time.Time{}) + } + return nodes +} + +// CreateTestNodes creates n test Nodes. +func CreateTestNodes(n int) []*apiv1.Node { + return CreateTestNodesWithPrefix("n", n) +} + +// CreateTestPodsWithPrefix creates n test Pods with the given name prefix. +func CreateTestPodsWithPrefix(prefix string, n int) []*apiv1.Pod { + pods := make([]*apiv1.Pod, n, n) + for i := 0; i < n; i++ { + pods[i] = test.BuildTestPod(fmt.Sprintf("%s-%d", prefix, i), 1, 1) + } + return pods +} + +// CreateTestPods creates n test Pods. +func CreateTestPods(n int) []*apiv1.Pod { + return CreateTestPodsWithPrefix("p", n) +} + +// AssignTestPodsToNodes distributes test pods evenly across test nodes. +func AssignTestPodsToNodes(pods []*apiv1.Pod, nodes []*apiv1.Node) { + if len(nodes) == 0 { + return + } + for i := 0; i < len(pods); i++ { + pods[i].Spec.NodeName = nodes[i%len(nodes)].Name + } +} diff --git a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go new file mode 100644 index 000000000000..91a37a04d71d --- /dev/null +++ b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go @@ -0,0 +1,64 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testsnapshot + +import ( + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" +) + +// testFailer is an abstraction that covers both *testing.T and *testing.B. +type testFailer interface { + Fatalf(format string, args ...any) +} + +// NewTestSnapshot returns an instance of ClusterSnapshot that can be used in tests. +func NewTestSnapshot() (clustersnapshot.ClusterSnapshot, error) { + testFwHandle, err := framework.NewTestFrameworkHandle() + if err != nil { + return nil, err + } + return predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), testFwHandle), nil +} + +// NewTestSnapshotOrDie returns an instance of ClusterSnapshot that can be used in tests. +func NewTestSnapshotOrDie(t testFailer) clustersnapshot.ClusterSnapshot { + snapshot, err := NewTestSnapshot() + if err != nil { + t.Fatalf("NewTestSnapshotOrDie: couldn't create test ClusterSnapshot: %v", err) + } + return snapshot +} + +// NewCustomTestSnapshot returns an instance of ClusterSnapshot with a specific ClusterSnapshotStore that can be used in tests. +func NewCustomTestSnapshot(snapshotStore clustersnapshot.ClusterSnapshotStore) (clustersnapshot.ClusterSnapshot, error) { + testFwHandle, err := framework.NewTestFrameworkHandle() + if err != nil { + return nil, err + } + return predicate.NewPredicateSnapshot(snapshotStore, testFwHandle), nil +} + +// NewCustomTestSnapshotOrDie returns an instance of ClusterSnapshot with a specific ClusterSnapshotStore that can be used in tests. +func NewCustomTestSnapshotOrDie(t testFailer, snapshotStore clustersnapshot.ClusterSnapshotStore) clustersnapshot.ClusterSnapshot { + result, err := NewCustomTestSnapshot(snapshotStore) + if err != nil { + t.Fatalf("NewCustomTestSnapshotOrDie: couldn't create test ClusterSnapshot: %v", err) + } + return result +} From 540725286f08e1a89d9fca3631b023005d8362f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 14 Nov 2024 14:27:21 +0100 Subject: [PATCH 4/8] CA: migrate the codebase to use PredicateSnapshot --- cluster-autoscaler/core/autoscaler.go | 3 ++- .../currently_drained_nodes_test.go | 4 +++- .../filter_out_expendable_test.go | 4 ++-- .../filter_out_schedulable_test.go | 11 +++++++--- .../core/scaledown/actuation/actuator.go | 3 ++- .../core/scaledown/actuation/drain_test.go | 4 +++- cluster-autoscaler/core/test/common.go | 7 +++++-- .../estimator/binpacking_estimator_test.go | 6 +++--- cluster-autoscaler/main.go | 3 ++- .../mixed_nodeinfos_processor_test.go | 10 +++++----- .../pod_injection_processor_test.go | 3 ++- cluster-autoscaler/simulator/cluster_test.go | 5 +++-- .../simulator/clustersnapshot/basic.go | 20 ------------------- .../simulator/clustersnapshot/delta.go | 20 ------------------- .../predicatechecker/schedulerbased_test.go | 1 - .../scheduling/hinting_simulator_test.go | 5 +++-- 16 files changed, 43 insertions(+), 66 deletions(-) diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 88d94a70a53d..bd1e1545f84a 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -33,6 +33,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/observers/loopstart" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" @@ -125,7 +126,7 @@ func initializeDefaultOptions(opts *AutoscalerOptions, informerFactory informers opts.FrameworkHandle = fwHandle } if opts.ClusterSnapshot == nil { - opts.ClusterSnapshot = clustersnapshot.NewBasicClusterSnapshot() + opts.ClusterSnapshot = predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), opts.FrameworkHandle) } if opts.RemainingPdbTracker == nil { opts.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker() diff --git a/cluster-autoscaler/core/podlistprocessor/currently_drained_nodes_test.go b/cluster-autoscaler/core/podlistprocessor/currently_drained_nodes_test.go index 0aebcbd4d671..cf5051897eca 100644 --- a/cluster-autoscaler/core/podlistprocessor/currently_drained_nodes_test.go +++ b/cluster-autoscaler/core/podlistprocessor/currently_drained_nodes_test.go @@ -21,11 +21,13 @@ import ( "time" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) @@ -267,7 +269,7 @@ func TestCurrentlyDrainedNodesPodListProcessor(t *testing.T) { t.Run(tc.name, func(t *testing.T) { ctx := context.AutoscalingContext{ ScaleDownActuator: &mockActuator{&mockActuationStatus{tc.drainedNodes}}, - ClusterSnapshot: clustersnapshot.NewBasicClusterSnapshot(), + ClusterSnapshot: testsnapshot.NewTestSnapshotOrDie(t), } clustersnapshot.InitializeClusterSnapshotOrDie(t, ctx.ClusterSnapshot, tc.nodes, tc.pods) diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_expendable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_expendable_test.go index 94f6915e3028..280b53768636 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_expendable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_expendable_test.go @@ -25,7 +25,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/context" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) @@ -109,7 +109,7 @@ func TestFilterOutExpendable(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { processor := NewFilterOutExpendablePodListProcessor() - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := testsnapshot.NewTestSnapshotOrDie(t) err := snapshot.SetClusterState(tc.nodes, nil) assert.NoError(t, err) diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go index 728c32af9202..9c40769e2e24 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go @@ -25,6 +25,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" @@ -173,7 +174,7 @@ func TestFilterOutSchedulable(t *testing.T) { for tn, tc := range testCases { t.Run(tn, func(t *testing.T) { - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) @@ -250,8 +251,12 @@ func BenchmarkFilterOutSchedulable(b *testing.B) { }, } snapshots := map[string]func() clustersnapshot.ClusterSnapshot{ - "basic": func() clustersnapshot.ClusterSnapshot { return clustersnapshot.NewBasicClusterSnapshot() }, - "delta": func() clustersnapshot.ClusterSnapshot { return clustersnapshot.NewDeltaClusterSnapshot() }, + "basic": func() clustersnapshot.ClusterSnapshot { + return testsnapshot.NewCustomTestSnapshotOrDie(b, clustersnapshot.NewBasicClusterSnapshot()) + }, + "delta": func() clustersnapshot.ClusterSnapshot { + return testsnapshot.NewCustomTestSnapshotOrDie(b, clustersnapshot.NewDeltaClusterSnapshot()) + }, } for snapshotName, snapshotFactory := range snapshots { for _, tc := range tests { diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go index a85410172684..232bde8f8f46 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go @@ -33,6 +33,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/observers/nodegroupchange" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" @@ -356,7 +357,7 @@ func (a *Actuator) taintNode(node *apiv1.Node) error { } func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterSnapshot, error) { - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), a.ctx.FrameworkHandle) pods, err := a.ctx.AllPodLister().List() if err != nil { return nil, err diff --git a/cluster-autoscaler/core/scaledown/actuation/drain_test.go b/cluster-autoscaler/core/scaledown/actuation/drain_test.go index 6ba905761db5..962a707990dc 100644 --- a/cluster-autoscaler/core/scaledown/actuation/drain_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/drain_test.go @@ -26,6 +26,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" policyv1beta1 "k8s.io/api/policy/v1beta1" "k8s.io/apimachinery/pkg/api/errors" @@ -37,6 +38,7 @@ import ( . "k8s.io/autoscaler/cluster-autoscaler/core/test" "k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/daemonset" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -611,7 +613,7 @@ func TestPodsToEvict(t *testing.T) { }, } { t.Run(tn, func(t *testing.T) { - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := testsnapshot.NewTestSnapshotOrDie(t) node := BuildTestNode("test-node", 1000, 1000) err := snapshot.AddNodeInfo(framework.NewTestNodeInfo(node, tc.pods...)) if err != nil { diff --git a/cluster-autoscaler/core/test/common.go b/cluster-autoscaler/core/test/common.go index 7dfc57b765ae..990525c5dc9a 100644 --- a/cluster-autoscaler/core/test/common.go +++ b/cluster-autoscaler/core/test/common.go @@ -36,7 +36,7 @@ import ( processor_callbacks "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/status" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" @@ -183,7 +183,10 @@ func NewScaleTestAutoscalingContext( if debuggingSnapshotter == nil { debuggingSnapshotter = debuggingsnapshot.NewDebuggingSnapshotter(false) } - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot, err := testsnapshot.NewTestSnapshot() + if err != nil { + return context.AutoscalingContext{}, err + } return context.AutoscalingContext{ AutoscalingOptions: options, AutoscalingKubeClients: context.AutoscalingKubeClients{ diff --git a/cluster-autoscaler/estimator/binpacking_estimator_test.go b/cluster-autoscaler/estimator/binpacking_estimator_test.go index 59dd9a2a95df..a0250e8a61e9 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator_test.go +++ b/cluster-autoscaler/estimator/binpacking_estimator_test.go @@ -25,7 +25,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -209,7 +209,7 @@ func TestBinpackingEstimate(t *testing.T) { } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) // Add one node in different zone to trigger topology spread constraints err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(makeNode(100, 100, 10, "oldnode", "zone-jupiter"))) assert.NoError(t, err) @@ -265,7 +265,7 @@ func BenchmarkBinpackingEstimate(b *testing.B) { } for i := 0; i < b.N; i++ { - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(b) err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(makeNode(100, 100, 10, "oldnode", "zone-jupiter"))) assert.NoError(b, err) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 8fee1047f569..109ad776da02 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -36,6 +36,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/besteffortatomic" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/checkcapacity" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqclient" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" @@ -503,7 +504,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, FrameworkHandle: fwHandle, - ClusterSnapshot: clustersnapshot.NewDeltaClusterSnapshot(), + ClusterSnapshot: predicate.NewPredicateSnapshot(clustersnapshot.NewDeltaClusterSnapshot(), fwHandle), KubeClient: kubeClient, InformerFactory: informerFactory, DebuggingSnapshotter: debuggingSnapshotter, diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go index bc2cea5516c7..2b9e0ef7bd52 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go @@ -26,7 +26,7 @@ import ( apiv1 "k8s.io/api/core/v1" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/context" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -80,7 +80,7 @@ func TestGetNodeInfosForGroups(t *testing.T) { assert.NoError(t, err) nodes := []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1} - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := testsnapshot.NewTestSnapshotOrDie(t) err = snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) @@ -114,7 +114,7 @@ func TestGetNodeInfosForGroups(t *testing.T) { // Test for a nodegroup without nodes and TemplateNodeInfo not implemented by cloud proivder ctx = context.AutoscalingContext{ CloudProvider: provider2, - ClusterSnapshot: clustersnapshot.NewBasicClusterSnapshot(), + ClusterSnapshot: testsnapshot.NewTestSnapshotOrDie(t), PredicateChecker: predicateChecker, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, @@ -171,7 +171,7 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) { assert.NoError(t, err) nodes := []*apiv1.Node{unready4, unready3, ready2, ready1} - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := testsnapshot.NewTestSnapshotOrDie(t) err = snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) @@ -265,7 +265,7 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { assert.NoError(t, err) nodes := []*apiv1.Node{ready1} - snapshot := clustersnapshot.NewBasicClusterSnapshot() + snapshot := testsnapshot.NewTestSnapshotOrDie(t) err = snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) diff --git a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go index 13a98c8d78c8..f2f578570841 100644 --- a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go +++ b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go @@ -29,6 +29,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/context" podinjectionbackoff "k8s.io/autoscaler/cluster-autoscaler/processors/podinjection/backoff" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -112,7 +113,7 @@ func TestTargetCountInjectionPodListProcessor(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { p := NewPodInjectionPodListProcessor(podinjectionbackoff.NewFakePodControllerRegistry()) - clusterSnapshot := clustersnapshot.NewDeltaClusterSnapshot() + clusterSnapshot := testsnapshot.NewCustomTestSnapshotOrDie(t, clustersnapshot.NewDeltaClusterSnapshot()) err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node, tc.scheduledPods...)) assert.NoError(t, err) ctx := context.AutoscalingContext{ diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index 3612d4404772..eaee19b59924 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -27,6 +27,7 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" @@ -56,7 +57,7 @@ func TestFindEmptyNodes(t *testing.T) { types.ConfigMirrorAnnotationKey: "", } - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, []*apiv1.Node{nodes[0], nodes[1], nodes[2], nodes[3]}, []*apiv1.Pod{pod1, pod2}) testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) r := NewRemovalSimulator(nil, clusterSnapshot, nil, testDeleteOptions(), nil, false) @@ -138,7 +139,7 @@ func TestFindNodesToRemove(t *testing.T) { PodsToReschedule: []*apiv1.Pod{pod1, pod2}, } - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) diff --git a/cluster-autoscaler/simulator/clustersnapshot/basic.go b/cluster-autoscaler/simulator/clustersnapshot/basic.go index 5eae2a5a892a..be8388c5ce8b 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/basic.go +++ b/cluster-autoscaler/simulator/clustersnapshot/basic.go @@ -31,26 +31,6 @@ type BasicClusterSnapshot struct { data []*internalBasicSnapshotData } -func (snapshot *BasicClusterSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) SchedulingError { - //TODO implement me - panic("implement me") -} - -func (snapshot *BasicClusterSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (matchingNode string, err SchedulingError) { - //TODO implement me - panic("implement me") -} - -func (snapshot *BasicClusterSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) SchedulingError { - //TODO implement me - panic("implement me") -} - -func (snapshot *BasicClusterSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { - //TODO implement me - panic("implement me") -} - type internalBasicSnapshotData struct { nodeInfoMap map[string]*schedulerframework.NodeInfo pvcNamespacePodMap map[string]map[string]bool diff --git a/cluster-autoscaler/simulator/clustersnapshot/delta.go b/cluster-autoscaler/simulator/clustersnapshot/delta.go index 46f89ade0e3c..869e494e0226 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/delta.go +++ b/cluster-autoscaler/simulator/clustersnapshot/delta.go @@ -46,26 +46,6 @@ type DeltaClusterSnapshot struct { data *internalDeltaSnapshotData } -func (snapshot *DeltaClusterSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) SchedulingError { - //TODO implement me - panic("implement me") -} - -func (snapshot *DeltaClusterSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (matchingNode string, err SchedulingError) { - //TODO implement me - panic("implement me") -} - -func (snapshot *DeltaClusterSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) SchedulingError { - //TODO implement me - panic("implement me") -} - -func (snapshot *DeltaClusterSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { - //TODO implement me - panic("implement me") -} - type deltaSnapshotNodeLister DeltaClusterSnapshot type deltaSnapshotStorageLister DeltaClusterSnapshot diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index a454e7684f35..8e2ed4d94084 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -284,7 +284,6 @@ func TestDebugInfo(t *testing.T) { SetNodeReadyState(node1, true, time.Time{}) clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() - err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node1)) assert.NoError(t, err) diff --git a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go index 7cec8b6018c6..717fca941394 100644 --- a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go +++ b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go @@ -24,6 +24,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -133,7 +134,7 @@ func TestTrySchedulePods(t *testing.T) { tc := tc t.Run(tc.desc, func(t *testing.T) { t.Parallel() - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, tc.nodes, tc.pods) @@ -210,7 +211,7 @@ func TestPodSchedulesOnHintedNode(t *testing.T) { tc := tc t.Run(tc.desc, func(t *testing.T) { t.Parallel() - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) predicateChecker, err := predicatechecker.NewTestPredicateChecker() assert.NoError(t, err) nodes := make([]*apiv1.Node, 0, len(tc.nodeNames)) From 67773a5509a111d372a7032a7e2500d4a28d74a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 14 Nov 2024 14:40:19 +0100 Subject: [PATCH 5/8] CA: move BasicClusterSnapshot and DeltaClusterSnapshot to a dedicated subpkg --- cluster-autoscaler/core/autoscaler.go | 3 ++- .../filter_out_schedulable_test.go | 5 +++-- .../core/scaledown/actuation/actuator.go | 3 ++- cluster-autoscaler/main.go | 5 +++-- .../podinjection/pod_injection_processor_test.go | 5 +++-- .../predicate/predicate_snapshot_test.go | 5 +++-- .../clustersnapshot/{ => store}/basic.go | 13 +++++++------ .../clustersnapshot/{ => store}/delta.go | 15 ++++++++------- .../{ => store}/delta_benchmark_test.go | 7 ++++--- .../clustersnapshot/testsnapshot/test_snapshot.go | 3 ++- .../predicatechecker/schedulerbased_test.go | 7 ++++--- 11 files changed, 41 insertions(+), 30 deletions(-) rename cluster-autoscaler/simulator/clustersnapshot/{ => store}/basic.go (97%) rename cluster-autoscaler/simulator/clustersnapshot/{ => store}/delta.go (97%) rename cluster-autoscaler/simulator/clustersnapshot/{ => store}/delta_benchmark_test.go (90%) diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index bd1e1545f84a..229a6104c877 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -34,6 +34,7 @@ import ( ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" @@ -126,7 +127,7 @@ func initializeDefaultOptions(opts *AutoscalerOptions, informerFactory informers opts.FrameworkHandle = fwHandle } if opts.ClusterSnapshot == nil { - opts.ClusterSnapshot = predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), opts.FrameworkHandle) + opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), opts.FrameworkHandle) } if opts.RemainingPdbTracker == nil { opts.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker() diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go index 9c40769e2e24..361211f6a83a 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go @@ -25,6 +25,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" @@ -252,10 +253,10 @@ func BenchmarkFilterOutSchedulable(b *testing.B) { } snapshots := map[string]func() clustersnapshot.ClusterSnapshot{ "basic": func() clustersnapshot.ClusterSnapshot { - return testsnapshot.NewCustomTestSnapshotOrDie(b, clustersnapshot.NewBasicClusterSnapshot()) + return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewBasicClusterSnapshot()) }, "delta": func() clustersnapshot.ClusterSnapshot { - return testsnapshot.NewCustomTestSnapshotOrDie(b, clustersnapshot.NewDeltaClusterSnapshot()) + return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewDeltaClusterSnapshot()) }, } for snapshotName, snapshotFactory := range snapshots { diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go index 232bde8f8f46..f6aa898afa71 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go @@ -34,6 +34,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" @@ -357,7 +358,7 @@ func (a *Actuator) taintNode(node *apiv1.Node) error { } func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterSnapshot, error) { - snapshot := predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), a.ctx.FrameworkHandle) + snapshot := predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), a.ctx.FrameworkHandle) pods, err := a.ctx.AllPodLister().List() if err != nil { return nil, err diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 109ad776da02..bc9235e6be3a 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -29,6 +29,7 @@ import ( "time" "github.com/spf13/pflag" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/actuation" "k8s.io/autoscaler/cluster-autoscaler/core/scaleup/orchestrator" "k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot" @@ -37,6 +38,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/checkcapacity" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqclient" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" @@ -69,7 +71,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates/previouscandidates" "k8s.io/autoscaler/cluster-autoscaler/processors/status" provreqorchestrator "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/orchestrator" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -504,7 +505,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, FrameworkHandle: fwHandle, - ClusterSnapshot: predicate.NewPredicateSnapshot(clustersnapshot.NewDeltaClusterSnapshot(), fwHandle), + ClusterSnapshot: predicate.NewPredicateSnapshot(store.NewDeltaClusterSnapshot(), fwHandle), KubeClient: kubeClient, InformerFactory: informerFactory, DebuggingSnapshotter: debuggingSnapshotter, diff --git a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go index f2f578570841..3bdf0374e1b2 100644 --- a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go +++ b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" @@ -28,7 +29,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/autoscaler/cluster-autoscaler/context" podinjectionbackoff "k8s.io/autoscaler/cluster-autoscaler/processors/podinjection/backoff" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -113,7 +114,7 @@ func TestTargetCountInjectionPodListProcessor(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { p := NewPodInjectionPodListProcessor(podinjectionbackoff.NewFakePodControllerRegistry()) - clusterSnapshot := testsnapshot.NewCustomTestSnapshotOrDie(t, clustersnapshot.NewDeltaClusterSnapshot()) + clusterSnapshot := testsnapshot.NewCustomTestSnapshotOrDie(t, store.NewDeltaClusterSnapshot()) err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node, tc.scheduledPods...)) assert.NoError(t, err) ctx := context.AutoscalingContext{ diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go index f65c3166cdd5..f7ea217be590 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go @@ -24,6 +24,7 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -37,14 +38,14 @@ var snapshots = map[string]func() (clustersnapshot.ClusterSnapshot, error){ if err != nil { return nil, err } - return NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), fwHandle), nil + return NewPredicateSnapshot(store.NewBasicClusterSnapshot(), fwHandle), nil }, "delta": func() (clustersnapshot.ClusterSnapshot, error) { fwHandle, err := framework.NewTestFrameworkHandle() if err != nil { return nil, err } - return NewPredicateSnapshot(clustersnapshot.NewDeltaClusterSnapshot(), fwHandle), nil + return NewPredicateSnapshot(store.NewDeltaClusterSnapshot(), fwHandle), nil }, } diff --git a/cluster-autoscaler/simulator/clustersnapshot/basic.go b/cluster-autoscaler/simulator/clustersnapshot/store/basic.go similarity index 97% rename from cluster-autoscaler/simulator/clustersnapshot/basic.go rename to cluster-autoscaler/simulator/clustersnapshot/store/basic.go index be8388c5ce8b..bc9032a5e2bf 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/basic.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/basic.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,12 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clustersnapshot +package store import ( "fmt" apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -70,7 +71,7 @@ func (data *internalBasicSnapshotData) getNodeInfo(nodeName string) (*schedulerf if v, ok := data.nodeInfoMap[nodeName]; ok { return v, nil } - return nil, ErrNodeNotFound + return nil, clustersnapshot.ErrNodeNotFound } func (data *internalBasicSnapshotData) isPVCUsedByPods(key string) bool { @@ -155,7 +156,7 @@ func (data *internalBasicSnapshotData) addNode(node *apiv1.Node) error { func (data *internalBasicSnapshotData) removeNodeInfo(nodeName string) error { if _, found := data.nodeInfoMap[nodeName]; !found { - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } for _, pod := range data.nodeInfoMap[nodeName].Pods { data.removePvcUsedByPod(pod.Pod) @@ -166,7 +167,7 @@ func (data *internalBasicSnapshotData) removeNodeInfo(nodeName string) error { func (data *internalBasicSnapshotData) addPod(pod *apiv1.Pod, nodeName string) error { if _, found := data.nodeInfoMap[nodeName]; !found { - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } data.nodeInfoMap[nodeName].AddPod(pod) data.addPvcUsedByPod(pod) @@ -176,7 +177,7 @@ func (data *internalBasicSnapshotData) addPod(pod *apiv1.Pod, nodeName string) e func (data *internalBasicSnapshotData) removePod(namespace, podName, nodeName string) error { nodeInfo, found := data.nodeInfoMap[nodeName] if !found { - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } logger := klog.Background() for _, podInfo := range nodeInfo.Pods { diff --git a/cluster-autoscaler/simulator/clustersnapshot/delta.go b/cluster-autoscaler/simulator/clustersnapshot/store/delta.go similarity index 97% rename from cluster-autoscaler/simulator/clustersnapshot/delta.go rename to cluster-autoscaler/simulator/clustersnapshot/store/delta.go index 869e494e0226..6baf1b0ff6e9 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/delta.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/delta.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,12 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clustersnapshot +package store import ( "fmt" apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -191,7 +192,7 @@ func (data *internalDeltaSnapshotData) removeNodeInfo(nodeName string) error { if _, deleted := data.deletedNodeInfos[nodeName]; deleted { // If node was deleted within this delta, fail with error. - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } _, foundInBase := data.baseData.getNodeInfo(nodeName) @@ -202,7 +203,7 @@ func (data *internalDeltaSnapshotData) removeNodeInfo(nodeName string) error { if !foundInBase && !foundInDelta { // Node not found in the chain. - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } // Maybe consider deleting from the lists instead. Maybe not. @@ -230,7 +231,7 @@ func (data *internalDeltaSnapshotData) nodeInfoToModify(nodeName string) (*sched func (data *internalDeltaSnapshotData) addPod(pod *apiv1.Pod, nodeName string) error { ni, found := data.nodeInfoToModify(nodeName) if !found { - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } ni.AddPod(pod) @@ -246,7 +247,7 @@ func (data *internalDeltaSnapshotData) removePod(namespace, name, nodeName strin // probably means things are very bad anyway. ni, found := data.nodeInfoToModify(nodeName) if !found { - return ErrNodeNotFound + return clustersnapshot.ErrNodeNotFound } podFound := false @@ -371,7 +372,7 @@ func (snapshot *DeltaClusterSnapshot) getNodeInfo(nodeName string) (*schedulerfr data := snapshot.data node, found := data.getNodeInfo(nodeName) if !found { - return nil, ErrNodeNotFound + return nil, clustersnapshot.ErrNodeNotFound } return node, nil } diff --git a/cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go b/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go similarity index 90% rename from cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go rename to cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go index 5c1ce21a984d..efef5aaff92a 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/delta_benchmark_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package clustersnapshot +package store import ( "fmt" @@ -22,6 +22,7 @@ import ( "github.com/stretchr/testify/assert" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" ) @@ -45,7 +46,7 @@ func BenchmarkBuildNodeInfoList(b *testing.B) { for _, tc := range testCases { b.Run(fmt.Sprintf("fork add 1000 to %d", tc.nodeCount), func(b *testing.B) { - nodes := CreateTestNodes(tc.nodeCount + 1000) + nodes := clustersnapshot.CreateTestNodes(tc.nodeCount + 1000) clusterSnapshot := NewDeltaClusterSnapshot() if err := clusterSnapshot.SetClusterState(nodes[:tc.nodeCount], nil); err != nil { assert.NoError(b, err) @@ -65,7 +66,7 @@ func BenchmarkBuildNodeInfoList(b *testing.B) { } for _, tc := range testCases { b.Run(fmt.Sprintf("base %d", tc.nodeCount), func(b *testing.B) { - nodes := CreateTestNodes(tc.nodeCount) + nodes := clustersnapshot.CreateTestNodes(tc.nodeCount) clusterSnapshot := NewDeltaClusterSnapshot() if err := clusterSnapshot.SetClusterState(nodes, nil); err != nil { assert.NoError(b, err) diff --git a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go index 91a37a04d71d..2854447b9294 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go @@ -19,6 +19,7 @@ package testsnapshot import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" ) @@ -33,7 +34,7 @@ func NewTestSnapshot() (clustersnapshot.ClusterSnapshot, error) { if err != nil { return nil, err } - return predicate.NewPredicateSnapshot(clustersnapshot.NewBasicClusterSnapshot(), testFwHandle), nil + return predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), testFwHandle), nil } // NewTestSnapshotOrDie returns an instance of ClusterSnapshot that can be used in tests. diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index 8e2ed4d94084..dc2682d927a2 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -26,6 +26,7 @@ import ( testconfig "k8s.io/autoscaler/cluster-autoscaler/config/test" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -144,7 +145,7 @@ func TestCheckPredicate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var err error - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicClusterSnapshot() err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(tt.node, tt.scheduledPods...)) assert.NoError(t, err) @@ -246,7 +247,7 @@ func TestFitsAnyNode(t *testing.T) { }, } - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicClusterSnapshot() err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n1000)) assert.NoError(t, err) err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n2000)) @@ -283,7 +284,7 @@ func TestDebugInfo(t *testing.T) { } SetNodeReadyState(node1, true, time.Time{}) - clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicClusterSnapshot() err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node1)) assert.NoError(t, err) From 0ace148d3d2ea4214d4f1fedd89cec290fc62735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 14 Nov 2024 14:47:16 +0100 Subject: [PATCH 6/8] CA: rename BasicClusterSnapshot and DeltaClusterSnapshot to reflect the ClusterSnapshotStore change --- cluster-autoscaler/core/autoscaler.go | 2 +- .../filter_out_schedulable_test.go | 4 +- .../core/scaledown/actuation/actuator.go | 2 +- cluster-autoscaler/main.go | 2 +- .../pod_injection_processor_test.go | 2 +- .../predicate/predicate_snapshot_test.go | 4 +- .../simulator/clustersnapshot/store/basic.go | 68 +++++++++---------- .../simulator/clustersnapshot/store/delta.go | 62 ++++++++--------- .../store/delta_benchmark_test.go | 16 ++--- .../testsnapshot/test_snapshot.go | 2 +- .../predicatechecker/schedulerbased_test.go | 6 +- 11 files changed, 85 insertions(+), 85 deletions(-) diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 229a6104c877..225bf4776da1 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -127,7 +127,7 @@ func initializeDefaultOptions(opts *AutoscalerOptions, informerFactory informers opts.FrameworkHandle = fwHandle } if opts.ClusterSnapshot == nil { - opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), opts.FrameworkHandle) + opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), opts.FrameworkHandle) } if opts.RemainingPdbTracker == nil { opts.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker() diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go index 361211f6a83a..a811a438b8c4 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go @@ -253,10 +253,10 @@ func BenchmarkFilterOutSchedulable(b *testing.B) { } snapshots := map[string]func() clustersnapshot.ClusterSnapshot{ "basic": func() clustersnapshot.ClusterSnapshot { - return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewBasicClusterSnapshot()) + return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewBasicSnapshotStore()) }, "delta": func() clustersnapshot.ClusterSnapshot { - return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewDeltaClusterSnapshot()) + return testsnapshot.NewCustomTestSnapshotOrDie(b, store.NewDeltaSnapshotStore()) }, } for snapshotName, snapshotFactory := range snapshots { diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go index f6aa898afa71..14b0382359b7 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go @@ -358,7 +358,7 @@ func (a *Actuator) taintNode(node *apiv1.Node) error { } func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterSnapshot, error) { - snapshot := predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), a.ctx.FrameworkHandle) + snapshot := predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), a.ctx.FrameworkHandle) pods, err := a.ctx.AllPodLister().List() if err != nil { return nil, err diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index bc9235e6be3a..23efbc54edfb 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -505,7 +505,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, FrameworkHandle: fwHandle, - ClusterSnapshot: predicate.NewPredicateSnapshot(store.NewDeltaClusterSnapshot(), fwHandle), + ClusterSnapshot: predicate.NewPredicateSnapshot(store.NewDeltaSnapshotStore(), fwHandle), KubeClient: kubeClient, InformerFactory: informerFactory, DebuggingSnapshotter: debuggingSnapshotter, diff --git a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go index 3bdf0374e1b2..5ef275fd3dcf 100644 --- a/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go +++ b/cluster-autoscaler/processors/podinjection/pod_injection_processor_test.go @@ -114,7 +114,7 @@ func TestTargetCountInjectionPodListProcessor(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { p := NewPodInjectionPodListProcessor(podinjectionbackoff.NewFakePodControllerRegistry()) - clusterSnapshot := testsnapshot.NewCustomTestSnapshotOrDie(t, store.NewDeltaClusterSnapshot()) + clusterSnapshot := testsnapshot.NewCustomTestSnapshotOrDie(t, store.NewDeltaSnapshotStore()) err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node, tc.scheduledPods...)) assert.NoError(t, err) ctx := context.AutoscalingContext{ diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go index f7ea217be590..1cb7036cb844 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot_test.go @@ -38,14 +38,14 @@ var snapshots = map[string]func() (clustersnapshot.ClusterSnapshot, error){ if err != nil { return nil, err } - return NewPredicateSnapshot(store.NewBasicClusterSnapshot(), fwHandle), nil + return NewPredicateSnapshot(store.NewBasicSnapshotStore(), fwHandle), nil }, "delta": func() (clustersnapshot.ClusterSnapshot, error) { fwHandle, err := framework.NewTestFrameworkHandle() if err != nil { return nil, err } - return NewPredicateSnapshot(store.NewDeltaClusterSnapshot(), fwHandle), nil + return NewPredicateSnapshot(store.NewDeltaSnapshotStore(), fwHandle), nil }, } diff --git a/cluster-autoscaler/simulator/clustersnapshot/store/basic.go b/cluster-autoscaler/simulator/clustersnapshot/store/basic.go index bc9032a5e2bf..6a0159a2e5d9 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/store/basic.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/basic.go @@ -26,9 +26,9 @@ import ( schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) -// BasicClusterSnapshot is simple, reference implementation of ClusterSnapshot. +// BasicSnapshotStore is simple, reference implementation of ClusterSnapshotStore. // It is inefficient. But hopefully bug-free and good for initial testing. -type BasicClusterSnapshot struct { +type BasicSnapshotStore struct { data []*internalBasicSnapshotData } @@ -194,19 +194,19 @@ func (data *internalBasicSnapshotData) removePod(namespace, podName, nodeName st return fmt.Errorf("pod %s/%s not in snapshot", namespace, podName) } -// NewBasicClusterSnapshot creates instances of BasicClusterSnapshot. -func NewBasicClusterSnapshot() *BasicClusterSnapshot { - snapshot := &BasicClusterSnapshot{} +// NewBasicSnapshotStore creates instances of BasicSnapshotStore. +func NewBasicSnapshotStore() *BasicSnapshotStore { + snapshot := &BasicSnapshotStore{} snapshot.clear() return snapshot } -func (snapshot *BasicClusterSnapshot) getInternalData() *internalBasicSnapshotData { +func (snapshot *BasicSnapshotStore) getInternalData() *internalBasicSnapshotData { return snapshot.data[len(snapshot.data)-1] } // GetNodeInfo gets a NodeInfo. -func (snapshot *BasicClusterSnapshot) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { +func (snapshot *BasicSnapshotStore) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { schedNodeInfo, err := snapshot.getInternalData().getNodeInfo(nodeName) if err != nil { return nil, err @@ -215,13 +215,13 @@ func (snapshot *BasicClusterSnapshot) GetNodeInfo(nodeName string) (*framework.N } // ListNodeInfos lists NodeInfos. -func (snapshot *BasicClusterSnapshot) ListNodeInfos() ([]*framework.NodeInfo, error) { +func (snapshot *BasicSnapshotStore) ListNodeInfos() ([]*framework.NodeInfo, error) { schedNodeInfos := snapshot.getInternalData().listNodeInfos() return framework.WrapSchedulerNodeInfos(schedNodeInfos), nil } // AddNodeInfo adds a NodeInfo. -func (snapshot *BasicClusterSnapshot) AddNodeInfo(nodeInfo *framework.NodeInfo) error { +func (snapshot *BasicSnapshotStore) AddNodeInfo(nodeInfo *framework.NodeInfo) error { if err := snapshot.getInternalData().addNode(nodeInfo.Node()); err != nil { return err } @@ -234,7 +234,7 @@ func (snapshot *BasicClusterSnapshot) AddNodeInfo(nodeInfo *framework.NodeInfo) } // SetClusterState sets the cluster state. -func (snapshot *BasicClusterSnapshot) SetClusterState(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) error { +func (snapshot *BasicSnapshotStore) SetClusterState(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) error { snapshot.clear() knownNodes := make(map[string]bool) @@ -255,33 +255,33 @@ func (snapshot *BasicClusterSnapshot) SetClusterState(nodes []*apiv1.Node, sched } // RemoveNodeInfo removes nodes (and pods scheduled to it) from the snapshot. -func (snapshot *BasicClusterSnapshot) RemoveNodeInfo(nodeName string) error { +func (snapshot *BasicSnapshotStore) RemoveNodeInfo(nodeName string) error { return snapshot.getInternalData().removeNodeInfo(nodeName) } // ForceAddPod adds pod to the snapshot and schedules it to given node. -func (snapshot *BasicClusterSnapshot) ForceAddPod(pod *apiv1.Pod, nodeName string) error { +func (snapshot *BasicSnapshotStore) ForceAddPod(pod *apiv1.Pod, nodeName string) error { return snapshot.getInternalData().addPod(pod, nodeName) } // ForceRemovePod removes pod from the snapshot. -func (snapshot *BasicClusterSnapshot) ForceRemovePod(namespace, podName, nodeName string) error { +func (snapshot *BasicSnapshotStore) ForceRemovePod(namespace, podName, nodeName string) error { return snapshot.getInternalData().removePod(namespace, podName, nodeName) } // IsPVCUsedByPods returns if the pvc is used by any pod -func (snapshot *BasicClusterSnapshot) IsPVCUsedByPods(key string) bool { +func (snapshot *BasicSnapshotStore) IsPVCUsedByPods(key string) bool { return snapshot.getInternalData().isPVCUsedByPods(key) } // Fork creates a fork of snapshot state. All modifications can later be reverted to moment of forking via Revert() -func (snapshot *BasicClusterSnapshot) Fork() { +func (snapshot *BasicSnapshotStore) Fork() { forkData := snapshot.getInternalData().clone() snapshot.data = append(snapshot.data, forkData) } // Revert reverts snapshot state to moment of forking. -func (snapshot *BasicClusterSnapshot) Revert() { +func (snapshot *BasicSnapshotStore) Revert() { if len(snapshot.data) == 1 { return } @@ -289,7 +289,7 @@ func (snapshot *BasicClusterSnapshot) Revert() { } // Commit commits changes done after forking. -func (snapshot *BasicClusterSnapshot) Commit() error { +func (snapshot *BasicSnapshotStore) Commit() error { if len(snapshot.data) <= 1 { // do nothing return nil @@ -299,47 +299,47 @@ func (snapshot *BasicClusterSnapshot) Commit() error { } // clear reset cluster snapshot to empty, unforked state -func (snapshot *BasicClusterSnapshot) clear() { +func (snapshot *BasicSnapshotStore) clear() { baseData := newInternalBasicSnapshotData() snapshot.data = []*internalBasicSnapshotData{baseData} } // implementation of SharedLister interface -type basicClusterSnapshotNodeLister BasicClusterSnapshot -type basicClusterSnapshotStorageLister BasicClusterSnapshot +type basicSnapshotStoreNodeLister BasicSnapshotStore +type basicSnapshotStoreStorageLister BasicSnapshotStore // NodeInfos exposes snapshot as NodeInfoLister. -func (snapshot *BasicClusterSnapshot) NodeInfos() schedulerframework.NodeInfoLister { - return (*basicClusterSnapshotNodeLister)(snapshot) +func (snapshot *BasicSnapshotStore) NodeInfos() schedulerframework.NodeInfoLister { + return (*basicSnapshotStoreNodeLister)(snapshot) } // StorageInfos exposes snapshot as StorageInfoLister. -func (snapshot *BasicClusterSnapshot) StorageInfos() schedulerframework.StorageInfoLister { - return (*basicClusterSnapshotStorageLister)(snapshot) +func (snapshot *BasicSnapshotStore) StorageInfos() schedulerframework.StorageInfoLister { + return (*basicSnapshotStoreStorageLister)(snapshot) } // List returns the list of nodes in the snapshot. -func (snapshot *basicClusterSnapshotNodeLister) List() ([]*schedulerframework.NodeInfo, error) { - return (*BasicClusterSnapshot)(snapshot).getInternalData().listNodeInfos(), nil +func (snapshot *basicSnapshotStoreNodeLister) List() ([]*schedulerframework.NodeInfo, error) { + return (*BasicSnapshotStore)(snapshot).getInternalData().listNodeInfos(), nil } // HavePodsWithAffinityList returns the list of nodes with at least one pods with inter-pod affinity -func (snapshot *basicClusterSnapshotNodeLister) HavePodsWithAffinityList() ([]*schedulerframework.NodeInfo, error) { - return (*BasicClusterSnapshot)(snapshot).getInternalData().listNodeInfosThatHavePodsWithAffinityList() +func (snapshot *basicSnapshotStoreNodeLister) HavePodsWithAffinityList() ([]*schedulerframework.NodeInfo, error) { + return (*BasicSnapshotStore)(snapshot).getInternalData().listNodeInfosThatHavePodsWithAffinityList() } // HavePodsWithRequiredAntiAffinityList returns the list of NodeInfos of nodes with pods with required anti-affinity terms. -func (snapshot *basicClusterSnapshotNodeLister) HavePodsWithRequiredAntiAffinityList() ([]*schedulerframework.NodeInfo, error) { - return (*BasicClusterSnapshot)(snapshot).getInternalData().listNodeInfosThatHavePodsWithRequiredAntiAffinityList() +func (snapshot *basicSnapshotStoreNodeLister) HavePodsWithRequiredAntiAffinityList() ([]*schedulerframework.NodeInfo, error) { + return (*BasicSnapshotStore)(snapshot).getInternalData().listNodeInfosThatHavePodsWithRequiredAntiAffinityList() } // Returns the NodeInfo of the given node name. -func (snapshot *basicClusterSnapshotNodeLister) Get(nodeName string) (*schedulerframework.NodeInfo, error) { - return (*BasicClusterSnapshot)(snapshot).getInternalData().getNodeInfo(nodeName) +func (snapshot *basicSnapshotStoreNodeLister) Get(nodeName string) (*schedulerframework.NodeInfo, error) { + return (*BasicSnapshotStore)(snapshot).getInternalData().getNodeInfo(nodeName) } // Returns the IsPVCUsedByPods in a given key. -func (snapshot *basicClusterSnapshotStorageLister) IsPVCUsedByPods(key string) bool { - return (*BasicClusterSnapshot)(snapshot).getInternalData().isPVCUsedByPods(key) +func (snapshot *basicSnapshotStoreStorageLister) IsPVCUsedByPods(key string) bool { + return (*BasicSnapshotStore)(snapshot).getInternalData().isPVCUsedByPods(key) } diff --git a/cluster-autoscaler/simulator/clustersnapshot/store/delta.go b/cluster-autoscaler/simulator/clustersnapshot/store/delta.go index 6baf1b0ff6e9..63951104ecf0 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/store/delta.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/delta.go @@ -26,7 +26,7 @@ import ( schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) -// DeltaClusterSnapshot is an implementation of ClusterSnapshot optimized for typical Cluster Autoscaler usage - (fork, add stuff, revert), repeated many times per loop. +// DeltaSnapshotStore is an implementation of ClusterSnapshotStore optimized for typical Cluster Autoscaler usage - (fork, add stuff, revert), repeated many times per loop. // // Complexity of some notable operations: // @@ -43,12 +43,12 @@ import ( // (when forked affects delta, but not base.) // pod affinity - causes scheduler framework to list pods with non-empty selector, // so basic caching doesn't help. -type DeltaClusterSnapshot struct { +type DeltaSnapshotStore struct { data *internalDeltaSnapshotData } -type deltaSnapshotNodeLister DeltaClusterSnapshot -type deltaSnapshotStorageLister DeltaClusterSnapshot +type deltaSnapshotStoreNodeLister DeltaSnapshotStore +type deltaSnapshotStoreStorageLister DeltaSnapshotStore type internalDeltaSnapshotData struct { baseData *internalDeltaSnapshotData @@ -318,12 +318,12 @@ func (data *internalDeltaSnapshotData) commit() (*internalDeltaSnapshotData, err } // List returns list of all node infos. -func (snapshot *deltaSnapshotNodeLister) List() ([]*schedulerframework.NodeInfo, error) { +func (snapshot *deltaSnapshotStoreNodeLister) List() ([]*schedulerframework.NodeInfo, error) { return snapshot.data.getNodeInfoList(), nil } // HavePodsWithAffinityList returns list of all node infos with pods that have affinity constrints. -func (snapshot *deltaSnapshotNodeLister) HavePodsWithAffinityList() ([]*schedulerframework.NodeInfo, error) { +func (snapshot *deltaSnapshotStoreNodeLister) HavePodsWithAffinityList() ([]*schedulerframework.NodeInfo, error) { data := snapshot.data if data.havePodsWithAffinity != nil { return data.havePodsWithAffinity, nil @@ -341,7 +341,7 @@ func (snapshot *deltaSnapshotNodeLister) HavePodsWithAffinityList() ([]*schedule } // HavePodsWithRequiredAntiAffinityList returns the list of NodeInfos of nodes with pods with required anti-affinity terms. -func (snapshot *deltaSnapshotNodeLister) HavePodsWithRequiredAntiAffinityList() ([]*schedulerframework.NodeInfo, error) { +func (snapshot *deltaSnapshotStoreNodeLister) HavePodsWithRequiredAntiAffinityList() ([]*schedulerframework.NodeInfo, error) { data := snapshot.data if data.havePodsWithRequiredAntiAffinity != nil { return data.havePodsWithRequiredAntiAffinity, nil @@ -359,16 +359,16 @@ func (snapshot *deltaSnapshotNodeLister) HavePodsWithRequiredAntiAffinityList() } // Get returns node info by node name. -func (snapshot *deltaSnapshotNodeLister) Get(nodeName string) (*schedulerframework.NodeInfo, error) { - return (*DeltaClusterSnapshot)(snapshot).getNodeInfo(nodeName) +func (snapshot *deltaSnapshotStoreNodeLister) Get(nodeName string) (*schedulerframework.NodeInfo, error) { + return (*DeltaSnapshotStore)(snapshot).getNodeInfo(nodeName) } // IsPVCUsedByPods returns if PVC is used by pods -func (snapshot *deltaSnapshotStorageLister) IsPVCUsedByPods(key string) bool { - return (*DeltaClusterSnapshot)(snapshot).IsPVCUsedByPods(key) +func (snapshot *deltaSnapshotStoreStorageLister) IsPVCUsedByPods(key string) bool { + return (*DeltaSnapshotStore)(snapshot).IsPVCUsedByPods(key) } -func (snapshot *DeltaClusterSnapshot) getNodeInfo(nodeName string) (*schedulerframework.NodeInfo, error) { +func (snapshot *DeltaSnapshotStore) getNodeInfo(nodeName string) (*schedulerframework.NodeInfo, error) { data := snapshot.data node, found := data.getNodeInfo(nodeName) if !found { @@ -378,24 +378,24 @@ func (snapshot *DeltaClusterSnapshot) getNodeInfo(nodeName string) (*schedulerfr } // NodeInfos returns node lister. -func (snapshot *DeltaClusterSnapshot) NodeInfos() schedulerframework.NodeInfoLister { - return (*deltaSnapshotNodeLister)(snapshot) +func (snapshot *DeltaSnapshotStore) NodeInfos() schedulerframework.NodeInfoLister { + return (*deltaSnapshotStoreNodeLister)(snapshot) } // StorageInfos returns storage lister -func (snapshot *DeltaClusterSnapshot) StorageInfos() schedulerframework.StorageInfoLister { - return (*deltaSnapshotStorageLister)(snapshot) +func (snapshot *DeltaSnapshotStore) StorageInfos() schedulerframework.StorageInfoLister { + return (*deltaSnapshotStoreStorageLister)(snapshot) } -// NewDeltaClusterSnapshot creates instances of DeltaClusterSnapshot. -func NewDeltaClusterSnapshot() *DeltaClusterSnapshot { - snapshot := &DeltaClusterSnapshot{} +// NewDeltaSnapshotStore creates instances of DeltaSnapshotStore. +func NewDeltaSnapshotStore() *DeltaSnapshotStore { + snapshot := &DeltaSnapshotStore{} snapshot.clear() return snapshot } // GetNodeInfo gets a NodeInfo. -func (snapshot *DeltaClusterSnapshot) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { +func (snapshot *DeltaSnapshotStore) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { schedNodeInfo, err := snapshot.getNodeInfo(nodeName) if err != nil { return nil, err @@ -404,13 +404,13 @@ func (snapshot *DeltaClusterSnapshot) GetNodeInfo(nodeName string) (*framework.N } // ListNodeInfos lists NodeInfos. -func (snapshot *DeltaClusterSnapshot) ListNodeInfos() ([]*framework.NodeInfo, error) { +func (snapshot *DeltaSnapshotStore) ListNodeInfos() ([]*framework.NodeInfo, error) { schedNodeInfos := snapshot.data.getNodeInfoList() return framework.WrapSchedulerNodeInfos(schedNodeInfos), nil } // AddNodeInfo adds a NodeInfo. -func (snapshot *DeltaClusterSnapshot) AddNodeInfo(nodeInfo *framework.NodeInfo) error { +func (snapshot *DeltaSnapshotStore) AddNodeInfo(nodeInfo *framework.NodeInfo) error { if err := snapshot.data.addNode(nodeInfo.Node()); err != nil { return err } @@ -423,7 +423,7 @@ func (snapshot *DeltaClusterSnapshot) AddNodeInfo(nodeInfo *framework.NodeInfo) } // SetClusterState sets the cluster state. -func (snapshot *DeltaClusterSnapshot) SetClusterState(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) error { +func (snapshot *DeltaSnapshotStore) SetClusterState(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) error { snapshot.clear() knownNodes := make(map[string]bool) @@ -444,34 +444,34 @@ func (snapshot *DeltaClusterSnapshot) SetClusterState(nodes []*apiv1.Node, sched } // RemoveNodeInfo removes nodes (and pods scheduled to it) from the snapshot. -func (snapshot *DeltaClusterSnapshot) RemoveNodeInfo(nodeName string) error { +func (snapshot *DeltaSnapshotStore) RemoveNodeInfo(nodeName string) error { return snapshot.data.removeNodeInfo(nodeName) } // ForceAddPod adds pod to the snapshot and schedules it to given node. -func (snapshot *DeltaClusterSnapshot) ForceAddPod(pod *apiv1.Pod, nodeName string) error { +func (snapshot *DeltaSnapshotStore) ForceAddPod(pod *apiv1.Pod, nodeName string) error { return snapshot.data.addPod(pod, nodeName) } // ForceRemovePod removes pod from the snapshot. -func (snapshot *DeltaClusterSnapshot) ForceRemovePod(namespace, podName, nodeName string) error { +func (snapshot *DeltaSnapshotStore) ForceRemovePod(namespace, podName, nodeName string) error { return snapshot.data.removePod(namespace, podName, nodeName) } // IsPVCUsedByPods returns if the pvc is used by any pod -func (snapshot *DeltaClusterSnapshot) IsPVCUsedByPods(key string) bool { +func (snapshot *DeltaSnapshotStore) IsPVCUsedByPods(key string) bool { return snapshot.data.isPVCUsedByPods(key) } // Fork creates a fork of snapshot state. All modifications can later be reverted to moment of forking via Revert() // Time: O(1) -func (snapshot *DeltaClusterSnapshot) Fork() { +func (snapshot *DeltaSnapshotStore) Fork() { snapshot.data = snapshot.data.fork() } // Revert reverts snapshot state to moment of forking. // Time: O(1) -func (snapshot *DeltaClusterSnapshot) Revert() { +func (snapshot *DeltaSnapshotStore) Revert() { if snapshot.data.baseData != nil { snapshot.data = snapshot.data.baseData } @@ -479,7 +479,7 @@ func (snapshot *DeltaClusterSnapshot) Revert() { // Commit commits changes done after forking. // Time: O(n), where n = size of delta (number of nodes added, modified or deleted since forking) -func (snapshot *DeltaClusterSnapshot) Commit() error { +func (snapshot *DeltaSnapshotStore) Commit() error { newData, err := snapshot.data.commit() if err != nil { return err @@ -490,6 +490,6 @@ func (snapshot *DeltaClusterSnapshot) Commit() error { // Clear reset cluster snapshot to empty, unforked state // Time: O(1) -func (snapshot *DeltaClusterSnapshot) clear() { +func (snapshot *DeltaSnapshotStore) clear() { snapshot.data = newInternalDeltaSnapshotData() } diff --git a/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go b/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go index efef5aaff92a..9fd9765733a9 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/store/delta_benchmark_test.go @@ -47,19 +47,19 @@ func BenchmarkBuildNodeInfoList(b *testing.B) { for _, tc := range testCases { b.Run(fmt.Sprintf("fork add 1000 to %d", tc.nodeCount), func(b *testing.B) { nodes := clustersnapshot.CreateTestNodes(tc.nodeCount + 1000) - clusterSnapshot := NewDeltaClusterSnapshot() - if err := clusterSnapshot.SetClusterState(nodes[:tc.nodeCount], nil); err != nil { + deltaStore := NewDeltaSnapshotStore() + if err := deltaStore.SetClusterState(nodes[:tc.nodeCount], nil); err != nil { assert.NoError(b, err) } - clusterSnapshot.Fork() + deltaStore.Fork() for _, node := range nodes[tc.nodeCount:] { - if err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node)); err != nil { + if err := deltaStore.AddNodeInfo(framework.NewTestNodeInfo(node)); err != nil { assert.NoError(b, err) } } b.ResetTimer() for i := 0; i < b.N; i++ { - list := clusterSnapshot.data.buildNodeInfoList() + list := deltaStore.data.buildNodeInfoList() assert.Equal(b, tc.nodeCount+1000, len(list)) } }) @@ -67,13 +67,13 @@ func BenchmarkBuildNodeInfoList(b *testing.B) { for _, tc := range testCases { b.Run(fmt.Sprintf("base %d", tc.nodeCount), func(b *testing.B) { nodes := clustersnapshot.CreateTestNodes(tc.nodeCount) - clusterSnapshot := NewDeltaClusterSnapshot() - if err := clusterSnapshot.SetClusterState(nodes, nil); err != nil { + deltaStore := NewDeltaSnapshotStore() + if err := deltaStore.SetClusterState(nodes, nil); err != nil { assert.NoError(b, err) } b.ResetTimer() for i := 0; i < b.N; i++ { - list := clusterSnapshot.data.buildNodeInfoList() + list := deltaStore.data.buildNodeInfoList() assert.Equal(b, tc.nodeCount, len(list)) } }) diff --git a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go index 2854447b9294..c974e38c71de 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/testsnapshot/test_snapshot.go @@ -34,7 +34,7 @@ func NewTestSnapshot() (clustersnapshot.ClusterSnapshot, error) { if err != nil { return nil, err } - return predicate.NewPredicateSnapshot(store.NewBasicClusterSnapshot(), testFwHandle), nil + return predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), testFwHandle), nil } // NewTestSnapshotOrDie returns an instance of ClusterSnapshot that can be used in tests. diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index dc2682d927a2..5fa51f14b5ff 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -145,7 +145,7 @@ func TestCheckPredicate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var err error - clusterSnapshot := store.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicSnapshotStore() err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(tt.node, tt.scheduledPods...)) assert.NoError(t, err) @@ -247,7 +247,7 @@ func TestFitsAnyNode(t *testing.T) { }, } - clusterSnapshot := store.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicSnapshotStore() err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n1000)) assert.NoError(t, err) err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n2000)) @@ -284,7 +284,7 @@ func TestDebugInfo(t *testing.T) { } SetNodeReadyState(node1, true, time.Time{}) - clusterSnapshot := store.NewBasicClusterSnapshot() + clusterSnapshot := store.NewBasicSnapshotStore() err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(node1)) assert.NoError(t, err) From 68762892280dd0c3c98e9df1611949ab8e5ab3d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 14 Nov 2024 15:06:41 +0100 Subject: [PATCH 7/8] CA: remove PredicateChecker, use the new ClusterSnapshot methods instead --- .../context/autoscaling_context.go | 6 -- cluster-autoscaler/core/autoscaler.go | 3 - .../podlistprocessor/filter_out_expendable.go | 1 + .../filter_out_schedulable.go | 5 +- .../filter_out_schedulable_test.go | 10 +-- .../podlistprocessor/pod_list_processor.go | 5 +- .../core/scaledown/planner/planner.go | 4 +- .../core/scaleup/orchestrator/orchestrator.go | 3 +- cluster-autoscaler/core/static_autoscaler.go | 3 - cluster-autoscaler/core/test/common.go | 6 -- .../estimator/binpacking_estimator.go | 62 +++++++-------- .../estimator/binpacking_estimator_test.go | 9 +-- cluster-autoscaler/estimator/estimator.go | 6 +- cluster-autoscaler/main.go | 6 +- .../mixed_nodeinfos_processor_test.go | 35 +++------ .../processors/provreq/processor.go | 5 +- cluster-autoscaler/processors/test/common.go | 2 +- .../orchestrator/orchestrator.go | 2 +- cluster-autoscaler/simulator/cluster.go | 10 +-- cluster-autoscaler/simulator/cluster_test.go | 7 +- .../simulator/predicatechecker/interface.go | 31 -------- .../predicatechecker/schedulerbased_test.go | 39 ++++++++-- .../simulator/predicatechecker/testchecker.go | 44 ----------- .../simulator/scheduling/hinting_simulator.go | 77 +++++++++++-------- .../scheduling/hinting_simulator_test.go | 9 +-- 25 files changed, 143 insertions(+), 247 deletions(-) delete mode 100644 cluster-autoscaler/simulator/predicatechecker/interface.go delete mode 100644 cluster-autoscaler/simulator/predicatechecker/testchecker.go diff --git a/cluster-autoscaler/context/autoscaling_context.go b/cluster-autoscaler/context/autoscaling_context.go index c0e3ef27226e..31e2af8f4fce 100644 --- a/cluster-autoscaler/context/autoscaling_context.go +++ b/cluster-autoscaler/context/autoscaling_context.go @@ -28,7 +28,6 @@ import ( processor_callbacks "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/client-go/informers" kube_client "k8s.io/client-go/kubernetes" @@ -47,9 +46,6 @@ type AutoscalingContext struct { CloudProvider cloudprovider.CloudProvider // FrameworkHandle can be used to interact with the scheduler framework. FrameworkHandle *framework.Handle - // TODO(kgolab) - move away too as it's not config - // PredicateChecker to check if a pod can fit into a node. - PredicateChecker predicatechecker.PredicateChecker // ClusterSnapshot denotes cluster snapshot used for predicate checking. ClusterSnapshot clustersnapshot.ClusterSnapshot // ExpanderStrategy is the strategy used to choose which node group to expand when scaling up @@ -104,7 +100,6 @@ func NewResourceLimiterFromAutoscalingOptions(options config.AutoscalingOptions) func NewAutoscalingContext( options config.AutoscalingOptions, fwHandle *framework.Handle, - predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, autoscalingKubeClients *AutoscalingKubeClients, cloudProvider cloudprovider.CloudProvider, @@ -119,7 +114,6 @@ func NewAutoscalingContext( CloudProvider: cloudProvider, AutoscalingKubeClients: *autoscalingKubeClients, FrameworkHandle: fwHandle, - PredicateChecker: predicateChecker, ClusterSnapshot: clusterSnapshot, ExpanderStrategy: expanderStrategy, ProcessorCallbacks: processorCallbacks, diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 225bf4776da1..352ceaabfc9c 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -38,7 +38,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/client-go/informers" @@ -53,7 +52,6 @@ type AutoscalerOptions struct { AutoscalingKubeClients *context.AutoscalingKubeClients CloudProvider cloudprovider.CloudProvider FrameworkHandle *framework.Handle - PredicateChecker predicatechecker.PredicateChecker ClusterSnapshot clustersnapshot.ClusterSnapshot ExpanderStrategy expander.Strategy EstimatorBuilder estimator.EstimatorBuilder @@ -91,7 +89,6 @@ func NewAutoscaler(opts AutoscalerOptions, informerFactory informers.SharedInfor return NewStaticAutoscaler( opts.AutoscalingOptions, opts.FrameworkHandle, - opts.PredicateChecker, opts.ClusterSnapshot, opts.AutoscalingKubeClients, opts.Processors, diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_expendable.go b/cluster-autoscaler/core/podlistprocessor/filter_out_expendable.go index 550f8a10520f..faba2261ba60 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_expendable.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_expendable.go @@ -56,6 +56,7 @@ func (p *filterOutExpendable) Process(context *context.AutoscalingContext, pods // CA logic from before migration to scheduler framework. So let's keep it for now func (p *filterOutExpendable) addPreemptingPodsToSnapshot(pods []*apiv1.Pod, ctx *context.AutoscalingContext) error { for _, p := range pods { + // TODO(DRA): Figure out if/how to use the predicate-checking SchedulePod() here instead - otherwise this doesn't work with DRA pods. if err := ctx.ClusterSnapshot.ForceAddPod(p, p.Status.NominatedNodeName); err != nil { klog.Errorf("Failed to update snapshot with pod %s/%s waiting for preemption: %v", p.Namespace, p.Name, err) return caerrors.ToAutoscalerError(caerrors.InternalError, err) diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable.go index f56fb19d98c0..3283cdeda2d8 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable.go @@ -26,7 +26,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/metrics" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" corev1helpers "k8s.io/component-helpers/scheduling/corev1" klog "k8s.io/klog/v2" @@ -38,9 +37,9 @@ type filterOutSchedulablePodListProcessor struct { } // NewFilterOutSchedulablePodListProcessor creates a PodListProcessor filtering out schedulable pods -func NewFilterOutSchedulablePodListProcessor(predicateChecker predicatechecker.PredicateChecker, nodeFilter func(*framework.NodeInfo) bool) *filterOutSchedulablePodListProcessor { +func NewFilterOutSchedulablePodListProcessor(nodeFilter func(*framework.NodeInfo) bool) *filterOutSchedulablePodListProcessor { return &filterOutSchedulablePodListProcessor{ - schedulingSimulator: scheduling.NewHintingSimulator(predicateChecker), + schedulingSimulator: scheduling.NewHintingSimulator(), nodeFilter: nodeFilter, } } diff --git a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go index a811a438b8c4..ced543db51b2 100644 --- a/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go +++ b/cluster-autoscaler/core/podlistprocessor/filter_out_schedulable_test.go @@ -28,7 +28,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) @@ -176,8 +175,6 @@ func TestFilterOutSchedulable(t *testing.T) { for tn, tc := range testCases { t.Run(tn, func(t *testing.T) { clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) var allExpectedScheduledPods []*apiv1.Pod allExpectedScheduledPods = append(allExpectedScheduledPods, tc.expectedScheduledPods...) @@ -193,7 +190,7 @@ func TestFilterOutSchedulable(t *testing.T) { clusterSnapshot.Fork() - processor := NewFilterOutSchedulablePodListProcessor(predicateChecker, tc.nodeFilter) + processor := NewFilterOutSchedulablePodListProcessor(tc.nodeFilter) unschedulablePods, err := processor.filterOutSchedulableByPacking(tc.unschedulableCandidates, clusterSnapshot) assert.NoError(t, err) @@ -282,9 +279,6 @@ func BenchmarkFilterOutSchedulable(b *testing.B) { } } - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(b, err) - clusterSnapshot := snapshotFactory() if err := clusterSnapshot.SetClusterState(nodes, scheduledPods); err != nil { assert.NoError(b, err) @@ -293,7 +287,7 @@ func BenchmarkFilterOutSchedulable(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - processor := NewFilterOutSchedulablePodListProcessor(predicateChecker, scheduling.ScheduleAnywhere) + processor := NewFilterOutSchedulablePodListProcessor(scheduling.ScheduleAnywhere) if stillPending, err := processor.filterOutSchedulableByPacking(pendingPods, clusterSnapshot); err != nil { assert.NoError(b, err) } else if len(stillPending) < tc.pendingPods { diff --git a/cluster-autoscaler/core/podlistprocessor/pod_list_processor.go b/cluster-autoscaler/core/podlistprocessor/pod_list_processor.go index 9557b134c2cc..cff1f6587d04 100644 --- a/cluster-autoscaler/core/podlistprocessor/pod_list_processor.go +++ b/cluster-autoscaler/core/podlistprocessor/pod_list_processor.go @@ -19,17 +19,16 @@ package podlistprocessor import ( "k8s.io/autoscaler/cluster-autoscaler/processors/pods" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" ) // NewDefaultPodListProcessor returns a default implementation of the pod list // processor, which wraps and sequentially runs other sub-processors. -func NewDefaultPodListProcessor(predicateChecker predicatechecker.PredicateChecker, nodeFilter func(*framework.NodeInfo) bool) *pods.CombinedPodListProcessor { +func NewDefaultPodListProcessor(nodeFilter func(*framework.NodeInfo) bool) *pods.CombinedPodListProcessor { return pods.NewCombinedPodListProcessor([]pods.PodListProcessor{ NewClearTPURequestsPodListProcessor(), NewFilterOutExpendablePodListProcessor(), NewCurrentlyDrainedNodesPodListProcessor(), - NewFilterOutSchedulablePodListProcessor(predicateChecker, nodeFilter), + NewFilterOutSchedulablePodListProcessor(nodeFilter), NewFilterOutDaemonSetPodListProcessor(), }) } diff --git a/cluster-autoscaler/core/scaledown/planner/planner.go b/cluster-autoscaler/core/scaledown/planner/planner.go index 2898e240cb05..32be506ca7ab 100644 --- a/cluster-autoscaler/core/scaledown/planner/planner.go +++ b/cluster-autoscaler/core/scaledown/planner/planner.go @@ -89,8 +89,8 @@ func New(context *context.AutoscalingContext, processors *processors.Autoscaling context: context, unremovableNodes: unremovable.NewNodes(), unneededNodes: unneeded.NewNodes(processors.NodeGroupConfigProcessor, resourceLimitsFinder), - rs: simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, deleteOptions, drainabilityRules, true), - actuationInjector: scheduling.NewHintingSimulator(context.PredicateChecker), + rs: simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, deleteOptions, drainabilityRules, true), + actuationInjector: scheduling.NewHintingSimulator(), eligibilityChecker: eligibility.NewChecker(processors.NodeGroupConfigProcessor), nodeUtilizationMap: make(map[string]utilization.Info), resourceLimitsFinder: resourceLimitsFinder, diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go index 3b27f6b467a4..133804084d4c 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go @@ -465,7 +465,6 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption( estimateStart := time.Now() expansionEstimator := o.estimatorBuilder( - o.autoscalingContext.PredicateChecker, o.autoscalingContext.ClusterSnapshot, estimator.NewEstimationContext(o.autoscalingContext.MaxNodesTotal, option.SimilarNodeGroups, currentNodeCount), ) @@ -577,7 +576,7 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups( var schedulablePodGroups []estimator.PodEquivalenceGroup for _, eg := range podEquivalenceGroups { samplePod := eg.Pods[0] - if err := o.autoscalingContext.PredicateChecker.CheckPredicates(o.autoscalingContext.ClusterSnapshot, samplePod, nodeInfo.Node().Name); err == nil { + if err := o.autoscalingContext.ClusterSnapshot.CheckPredicates(samplePod, nodeInfo.Node().Name); err == nil { // Add pods to option. schedulablePodGroups = append(schedulablePodGroups, estimator.PodEquivalenceGroup{ Pods: eg.Pods, diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 55ac45adb9f8..2e2ae109991e 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -48,7 +48,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -132,7 +131,6 @@ func (callbacks *staticAutoscalerProcessorCallbacks) reset() { func NewStaticAutoscaler( opts config.AutoscalingOptions, fwHandle *framework.Handle, - predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, autoscalingKubeClients *context.AutoscalingKubeClients, processors *ca_processors.AutoscalingProcessors, @@ -156,7 +154,6 @@ func NewStaticAutoscaler( autoscalingContext := context.NewAutoscalingContext( opts, fwHandle, - predicateChecker, clusterSnapshot, autoscalingKubeClients, cloudProvider, diff --git a/cluster-autoscaler/core/test/common.go b/cluster-autoscaler/core/test/common.go index 990525c5dc9a..5abeffa27e85 100644 --- a/cluster-autoscaler/core/test/common.go +++ b/cluster-autoscaler/core/test/common.go @@ -38,7 +38,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/status" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -175,10 +174,6 @@ func NewScaleTestAutoscalingContext( if err != nil { return context.AutoscalingContext{}, err } - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - if err != nil { - return context.AutoscalingContext{}, err - } remainingPdbTracker := pdb.NewBasicRemainingPdbTracker() if debuggingSnapshotter == nil { debuggingSnapshotter = debuggingsnapshot.NewDebuggingSnapshotter(false) @@ -196,7 +191,6 @@ func NewScaleTestAutoscalingContext( ListerRegistry: listers, }, CloudProvider: provider, - PredicateChecker: predicateChecker, ClusterSnapshot: clusterSnapshot, ExpanderStrategy: random.NewStrategy(), ProcessorCallbacks: processorCallbacks, diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go index cca580addd3f..c0154155379d 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator.go +++ b/cluster-autoscaler/estimator/binpacking_estimator.go @@ -24,13 +24,11 @@ import ( core_utils "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/klog/v2" ) // BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods. type BinpackingNodeEstimator struct { - predicateChecker predicatechecker.PredicateChecker clusterSnapshot clustersnapshot.ClusterSnapshot limiter EstimationLimiter podOrderer EstimationPodOrderer @@ -48,9 +46,13 @@ type estimationState struct { newNodesWithPods map[string]bool } +func (s *estimationState) trackScheduledPod(pod *apiv1.Pod, nodeName string) { + s.newNodesWithPods[nodeName] = true + s.scheduledPods = append(s.scheduledPods, pod) +} + // NewBinpackingNodeEstimator builds a new BinpackingNodeEstimator. func NewBinpackingNodeEstimator( - predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, limiter EstimationLimiter, podOrderer EstimationPodOrderer, @@ -58,7 +60,6 @@ func NewBinpackingNodeEstimator( estimationAnalyserFunc EstimationAnalyserFunc, ) *BinpackingNodeEstimator { return &BinpackingNodeEstimator{ - predicateChecker: predicateChecker, clusterSnapshot: clusterSnapshot, limiter: limiter, podOrderer: podOrderer, @@ -135,17 +136,19 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes( for index = 0; index < len(pods); index++ { pod := pods[index] - // Check schedulability on all nodes created during simulation - nodeName, err := e.predicateChecker.FitsAnyNodeMatching(e.clusterSnapshot, pod, func(nodeInfo *framework.NodeInfo) bool { + // Try to schedule the pod on all nodes created during simulation + nodeName, err := e.clusterSnapshot.SchedulePodOnAnyNodeMatching(pod, func(nodeInfo *framework.NodeInfo) bool { return estimationState.newNodeNames[nodeInfo.Node().Name] }) - if err != nil { - break - } - - if err := e.tryToAddNode(estimationState, pod, nodeName); err != nil { + if err != nil && err.Type() == clustersnapshot.SchedulingInternalError { + // Unexpected error. return nil, err + } else if err != nil { + // The pod couldn't be scheduled on any Node because of scheduling predicates. + break } + // The pod was scheduled on nodeName. + estimationState.trackScheduledPod(pod, nodeName) } return pods[index:], nil } @@ -159,13 +162,16 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes( found := false if estimationState.lastNodeName != "" { - // Check schedulability on only newly created node - if err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName); err == nil { + // Try to schedule the pod on only newly created node. + if err := e.clusterSnapshot.SchedulePod(pod, estimationState.lastNodeName); err == nil { + // The pod was scheduled on the newly created node. found = true - if err := e.tryToAddNode(estimationState, pod, estimationState.lastNodeName); err != nil { - return err - } + estimationState.trackScheduledPod(pod, estimationState.lastNodeName) + } else if err.Type() == clustersnapshot.SchedulingInternalError { + // Unexpected error. + return err } + // The pod can't be scheduled on the newly created node because of scheduling predicates. } if !found { @@ -195,12 +201,15 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes( // Note that this may still fail (ex. if topology spreading with zonal topologyKey is used); // in this case we can't help the pending pod. We keep the node in clusterSnapshot to avoid // adding and removing node to snapshot for each such pod. - if err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName); err != nil { - break - } - if err := e.tryToAddNode(estimationState, pod, estimationState.lastNodeName); err != nil { + if err := e.clusterSnapshot.SchedulePod(pod, estimationState.lastNodeName); err != nil && err.Type() == clustersnapshot.SchedulingInternalError { + // Unexpected error. return err + } else if err != nil { + // The pod can't be scheduled on the new node because of scheduling predicates. + break } + // The pod got scheduled on the new node. + estimationState.trackScheduledPod(pod, estimationState.lastNodeName) } } return nil @@ -219,16 +228,3 @@ func (e *BinpackingNodeEstimator) addNewNodeToSnapshot( estimationState.newNodeNames[estimationState.lastNodeName] = true return nil } - -func (e *BinpackingNodeEstimator) tryToAddNode( - estimationState *estimationState, - pod *apiv1.Pod, - nodeName string, -) error { - if err := e.clusterSnapshot.ForceAddPod(pod, nodeName); err != nil { - return fmt.Errorf("Error adding pod %v.%v to node %v in ClusterSnapshot; %v", pod.Namespace, pod.Name, nodeName, err) - } - estimationState.newNodesWithPods[nodeName] = true - estimationState.scheduledPods = append(estimationState.scheduledPods, pod) - return nil -} diff --git a/cluster-autoscaler/estimator/binpacking_estimator_test.go b/cluster-autoscaler/estimator/binpacking_estimator_test.go index a0250e8a61e9..ac205f16ba46 100644 --- a/cluster-autoscaler/estimator/binpacking_estimator_test.go +++ b/cluster-autoscaler/estimator/binpacking_estimator_test.go @@ -27,7 +27,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/autoscaler/cluster-autoscaler/utils/units" ) @@ -214,11 +213,9 @@ func TestBinpackingEstimate(t *testing.T) { err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(makeNode(100, 100, 10, "oldnode", "zone-jupiter"))) assert.NoError(t, err) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) limiter := NewThresholdBasedEstimationLimiter([]Threshold{NewStaticThreshold(tc.maxNodes, time.Duration(0))}) processor := NewDecreasingPodOrderer() - estimator := NewBinpackingNodeEstimator(predicateChecker, clusterSnapshot, limiter, processor, nil /* EstimationContext */, nil /* EstimationAnalyserFunc */) + estimator := NewBinpackingNodeEstimator(clusterSnapshot, limiter, processor, nil /* EstimationContext */, nil /* EstimationAnalyserFunc */) node := makeNode(tc.millicores, tc.memory, 10, "template", "zone-mars") nodeInfo := framework.NewTestNodeInfo(node) @@ -269,11 +266,9 @@ func BenchmarkBinpackingEstimate(b *testing.B) { err := clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(makeNode(100, 100, 10, "oldnode", "zone-jupiter"))) assert.NoError(b, err) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(b, err) limiter := NewThresholdBasedEstimationLimiter([]Threshold{NewStaticThreshold(maxNodes, time.Duration(0))}) processor := NewDecreasingPodOrderer() - estimator := NewBinpackingNodeEstimator(predicateChecker, clusterSnapshot, limiter, processor, nil /* EstimationContext */, nil /* EstimationAnalyserFunc */) + estimator := NewBinpackingNodeEstimator(clusterSnapshot, limiter, processor, nil /* EstimationContext */, nil /* EstimationAnalyserFunc */) node := makeNode(millicores, memory, podsPerNode, "template", "zone-mars") nodeInfo := framework.NewTestNodeInfo(node) diff --git a/cluster-autoscaler/estimator/estimator.go b/cluster-autoscaler/estimator/estimator.go index b8e3db070349..19752115a3e6 100644 --- a/cluster-autoscaler/estimator/estimator.go +++ b/cluster-autoscaler/estimator/estimator.go @@ -23,7 +23,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" ) const ( @@ -57,7 +56,7 @@ type Estimator interface { } // EstimatorBuilder creates a new estimator object. -type EstimatorBuilder func(predicatechecker.PredicateChecker, clustersnapshot.ClusterSnapshot, EstimationContext) Estimator +type EstimatorBuilder func(clustersnapshot.ClusterSnapshot, EstimationContext) Estimator // EstimationAnalyserFunc to be run at the end of the estimation logic. type EstimationAnalyserFunc func(clustersnapshot.ClusterSnapshot, cloudprovider.NodeGroup, map[string]bool) @@ -67,10 +66,9 @@ func NewEstimatorBuilder(name string, limiter EstimationLimiter, orderer Estimat switch name { case BinpackingEstimatorName: return func( - predicateChecker predicatechecker.PredicateChecker, clusterSnapshot clustersnapshot.ClusterSnapshot, context EstimationContext) Estimator { - return NewBinpackingNodeEstimator(predicateChecker, clusterSnapshot, limiter, orderer, context, estimationAnalyserFunc) + return NewBinpackingNodeEstimator(clusterSnapshot, limiter, orderer, context, estimationAnalyserFunc) }, nil } return nil, fmt.Errorf("unknown estimator: %s", name) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 23efbc54edfb..1927c8df248d 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -40,7 +40,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" kubelet_config "k8s.io/kubernetes/pkg/kubelet/apis/config" @@ -509,7 +508,6 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot KubeClient: kubeClient, InformerFactory: informerFactory, DebuggingSnapshotter: debuggingSnapshotter, - PredicateChecker: predicatechecker.NewSchedulerBasedPredicateChecker(fwHandle), DeleteOptions: deleteOptions, DrainabilityRules: drainabilityRules, ScaleUpOrchestrator: orchestrator.New(), @@ -517,7 +515,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts.Processors = ca_processors.DefaultProcessors(autoscalingOptions) opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nodeInfoCacheExpireTime, *forceDaemonSets) - podListProcessor := podlistprocessor.NewDefaultPodListProcessor(opts.PredicateChecker, scheduling.ScheduleAnywhere) + podListProcessor := podlistprocessor.NewDefaultPodListProcessor(scheduling.ScheduleAnywhere) var ProvisioningRequestInjector *provreq.ProvisioningRequestPodsInjector if autoscalingOptions.ProvisioningRequestEnabled { @@ -548,7 +546,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot scaleUpOrchestrator := provreqorchestrator.NewWrapperOrchestrator(provreqOrchestrator) opts.ScaleUpOrchestrator = scaleUpOrchestrator - provreqProcesor := provreq.NewProvReqProcessor(client, opts.PredicateChecker) + provreqProcesor := provreq.NewProvReqProcessor(client) opts.LoopStartNotifier = loopstart.NewObserversList([]loopstart.Observer{provreqProcesor}) podListProcessor.AddProcessor(provreqProcesor) diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go index 2b9e0ef7bd52..172b756e6875 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go @@ -28,7 +28,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -76,18 +75,14 @@ func TestGetNodeInfosForGroups(t *testing.T) { podLister := kube_util.NewTestPodLister([]*apiv1.Pod{}) registry := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) - nodes := []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1} snapshot := testsnapshot.NewTestSnapshotOrDie(t) - err = snapshot.SetClusterState(nodes, nil) + err := snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) ctx := context.AutoscalingContext{ - CloudProvider: provider1, - ClusterSnapshot: snapshot, - PredicateChecker: predicateChecker, + CloudProvider: provider1, + ClusterSnapshot: snapshot, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, @@ -113,9 +108,8 @@ func TestGetNodeInfosForGroups(t *testing.T) { // Test for a nodegroup without nodes and TemplateNodeInfo not implemented by cloud proivder ctx = context.AutoscalingContext{ - CloudProvider: provider2, - ClusterSnapshot: testsnapshot.NewTestSnapshotOrDie(t), - PredicateChecker: predicateChecker, + CloudProvider: provider2, + ClusterSnapshot: testsnapshot.NewTestSnapshotOrDie(t), AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, @@ -167,19 +161,15 @@ func TestGetNodeInfosForGroupsCache(t *testing.T) { podLister := kube_util.NewTestPodLister([]*apiv1.Pod{}) registry := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) - nodes := []*apiv1.Node{unready4, unready3, ready2, ready1} snapshot := testsnapshot.NewTestSnapshotOrDie(t) - err = snapshot.SetClusterState(nodes, nil) + err := snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) // Fill cache ctx := context.AutoscalingContext{ - CloudProvider: provider1, - ClusterSnapshot: snapshot, - PredicateChecker: predicateChecker, + CloudProvider: provider1, + ClusterSnapshot: snapshot, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, @@ -261,18 +251,15 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { provider := testprovider.NewTestAutoprovisioningCloudProvider(nil, nil, nil, nil, nil, nil) podLister := kube_util.NewTestPodLister([]*apiv1.Pod{}) registry := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) nodes := []*apiv1.Node{ready1} snapshot := testsnapshot.NewTestSnapshotOrDie(t) - err = snapshot.SetClusterState(nodes, nil) + err := snapshot.SetClusterState(nodes, nil) assert.NoError(t, err) ctx := context.AutoscalingContext{ - CloudProvider: provider, - ClusterSnapshot: snapshot, - PredicateChecker: predicateChecker, + CloudProvider: provider, + ClusterSnapshot: snapshot, AutoscalingKubeClients: context.AutoscalingKubeClients{ ListerRegistry: registry, }, diff --git a/cluster-autoscaler/processors/provreq/processor.go b/cluster-autoscaler/processors/provreq/processor.go index 56f52257547c..1463b1e9f6db 100644 --- a/cluster-autoscaler/processors/provreq/processor.go +++ b/cluster-autoscaler/processors/provreq/processor.go @@ -32,7 +32,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/provreqwrapper" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" "k8s.io/autoscaler/cluster-autoscaler/utils/klogx" "k8s.io/klog/v2" @@ -58,8 +57,8 @@ type provReqProcessor struct { } // NewProvReqProcessor return ProvisioningRequestProcessor. -func NewProvReqProcessor(client *provreqclient.ProvisioningRequestClient, predicateChecker predicatechecker.PredicateChecker) *provReqProcessor { - return &provReqProcessor{now: time.Now, maxUpdated: defaultMaxUpdated, client: client, injector: scheduling.NewHintingSimulator(predicateChecker)} +func NewProvReqProcessor(client *provreqclient.ProvisioningRequestClient) *provReqProcessor { + return &provReqProcessor{now: time.Now, maxUpdated: defaultMaxUpdated, client: client, injector: scheduling.NewHintingSimulator()} } // Refresh implements loop.Observer interface and will be run at the start diff --git a/cluster-autoscaler/processors/test/common.go b/cluster-autoscaler/processors/test/common.go index dd45d6569f91..07f69c16a346 100644 --- a/cluster-autoscaler/processors/test/common.go +++ b/cluster-autoscaler/processors/test/common.go @@ -39,7 +39,7 @@ import ( // NewTestProcessors returns a set of simple processors for use in tests. func NewTestProcessors(context *context.AutoscalingContext) *processors.AutoscalingProcessors { return &processors.AutoscalingProcessors{ - PodListProcessor: podlistprocessor.NewDefaultPodListProcessor(context.PredicateChecker, scheduling.ScheduleAnywhere), + PodListProcessor: podlistprocessor.NewDefaultPodListProcessor(scheduling.ScheduleAnywhere), NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{}, BinpackingLimiter: binpacking.NewTimeLimiter(context.MaxNodeGroupBinpackingDuration), NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{}), diff --git a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator.go b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator.go index 4008b5992246..6174749c46d7 100644 --- a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator.go +++ b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator.go @@ -69,7 +69,7 @@ func (o *provReqOrchestrator) Initialize( ) { o.initialized = true o.context = autoscalingContext - o.injector = scheduling.NewHintingSimulator(autoscalingContext.PredicateChecker) + o.injector = scheduling.NewHintingSimulator() for _, mode := range o.provisioningClasses { mode.Initialize(autoscalingContext, processors, clusterStateRegistry, estimatorBuilder, taintConfig, o.injector) } diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index e81d9ecea0ff..cf96e1fcc6b9 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -26,7 +26,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -105,15 +104,14 @@ type RemovalSimulator struct { } // NewRemovalSimulator returns a new RemovalSimulator. -func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot clustersnapshot.ClusterSnapshot, predicateChecker predicatechecker.PredicateChecker, - deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, persistSuccessfulSimulations bool) *RemovalSimulator { +func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot clustersnapshot.ClusterSnapshot, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, persistSuccessfulSimulations bool) *RemovalSimulator { return &RemovalSimulator{ listers: listers, clusterSnapshot: clusterSnapshot, canPersist: persistSuccessfulSimulations, deleteOptions: deleteOptions, drainabilityRules: drainabilityRules, - schedulingSimulator: scheduling.NewHintingSimulator(predicateChecker), + schedulingSimulator: scheduling.NewHintingSimulator(), } } @@ -221,9 +219,9 @@ func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, n pods = tpu.ClearTPURequests(pods) - // remove pods from clusterSnapshot first + // Unschedule the pods from the Node in the snapshot first, so that they can be scheduled elsewhere by TrySchedulePods(). for _, pod := range pods { - if err := r.clusterSnapshot.ForceRemovePod(pod.Namespace, pod.Name, removedNode); err != nil { + if err := r.clusterSnapshot.UnschedulePod(pod.Namespace, pod.Name, removedNode); err != nil { // just log error klog.Errorf("Simulating removal of %s/%s return error; %v", pod.Namespace, pod.Name, err) } diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index eaee19b59924..2fbf1f55973f 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -30,7 +30,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -60,7 +59,7 @@ func TestFindEmptyNodes(t *testing.T) { clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, []*apiv1.Node{nodes[0], nodes[1], nodes[2], nodes[3]}, []*apiv1.Pod{pod1, pod2}) testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - r := NewRemovalSimulator(nil, clusterSnapshot, nil, testDeleteOptions(), nil, false) + r := NewRemovalSimulator(nil, clusterSnapshot, testDeleteOptions(), nil, false) emptyNodes := r.FindEmptyNodesToRemove(nodeNames, testTime) assert.Equal(t, []string{nodeNames[0], nodeNames[2], nodeNames[3]}, emptyNodes) } @@ -140,8 +139,6 @@ func TestFindNodesToRemove(t *testing.T) { } clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) tests := []findNodesToRemoveTestConfig{ { @@ -188,7 +185,7 @@ func TestFindNodesToRemove(t *testing.T) { destinations = append(destinations, node.Name) } clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, test.allNodes, test.pods) - r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, testDeleteOptions(), nil, false) + r := NewRemovalSimulator(registry, clusterSnapshot, testDeleteOptions(), nil, false) toRemove, unremovable := r.FindNodesToRemove(test.candidates, destinations, time.Now(), nil) fmt.Printf("Test scenario: %s, found len(toRemove)=%v, expected len(test.toRemove)=%v\n", test.name, len(toRemove), len(test.toRemove)) assert.Equal(t, test.toRemove, toRemove) diff --git a/cluster-autoscaler/simulator/predicatechecker/interface.go b/cluster-autoscaler/simulator/predicatechecker/interface.go deleted file mode 100644 index 2236bfaa4907..000000000000 --- a/cluster-autoscaler/simulator/predicatechecker/interface.go +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package predicatechecker - -import ( - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - - apiv1 "k8s.io/api/core/v1" -) - -// PredicateChecker checks whether all required predicates pass for given Pod and Node. -type PredicateChecker interface { - FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod) (string, clustersnapshot.SchedulingError) - FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) - CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError -} diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go index 5fa51f14b5ff..d023b5846a26 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go @@ -23,6 +23,10 @@ import ( "time" "github.com/stretchr/testify/assert" + "k8s.io/client-go/informers" + clientsetfake "k8s.io/client-go/kubernetes/fake" + "k8s.io/kubernetes/pkg/scheduler/apis/config" + scheduler_config_latest "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" testconfig "k8s.io/autoscaler/cluster-autoscaler/config/test" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" @@ -45,7 +49,7 @@ func TestCheckPredicate(t *testing.T) { n1000Unschedulable := BuildTestNode("n1000", 1000, 2000000) SetNodeReadyState(n1000Unschedulable, true, time.Time{}) - defaultPredicateChecker, err := NewTestPredicateChecker() + defaultPredicateChecker, err := newTestPredicateChecker() assert.NoError(t, err) // temp dir @@ -64,7 +68,7 @@ func TestCheckPredicate(t *testing.T) { customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := NewTestPredicateCheckerWithCustomConfig(customConfig) + customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) assert.NoError(t, err) tests := []struct { @@ -72,7 +76,7 @@ func TestCheckPredicate(t *testing.T) { node *apiv1.Node scheduledPods []*apiv1.Pod testPod *apiv1.Pod - predicateChecker PredicateChecker + predicateChecker *SchedulerBasedPredicateChecker expectError bool }{ // default predicate checker test cases @@ -172,7 +176,7 @@ func TestFitsAnyNode(t *testing.T) { n1000 := BuildTestNode("n1000", 1000, 2000000) n2000 := BuildTestNode("n2000", 2000, 2000000) - defaultPredicateChecker, err := NewTestPredicateChecker() + defaultPredicateChecker, err := newTestPredicateChecker() assert.NoError(t, err) // temp dir @@ -191,12 +195,12 @@ func TestFitsAnyNode(t *testing.T) { customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := NewTestPredicateCheckerWithCustomConfig(customConfig) + customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) assert.NoError(t, err) testCases := []struct { name string - predicateChecker PredicateChecker + predicateChecker *SchedulerBasedPredicateChecker pod *apiv1.Pod expectedNodes []string expectError bool @@ -289,7 +293,7 @@ func TestDebugInfo(t *testing.T) { assert.NoError(t, err) // with default predicate checker - defaultPredicateChecker, err := NewTestPredicateChecker() + defaultPredicateChecker, err := newTestPredicateChecker() assert.NoError(t, err) predicateErr := defaultPredicateChecker.CheckPredicates(clusterSnapshot, p1, "n1") assert.NotNil(t, predicateErr) @@ -315,8 +319,27 @@ func TestDebugInfo(t *testing.T) { customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := NewTestPredicateCheckerWithCustomConfig(customConfig) + customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) assert.NoError(t, err) predicateErr = customPredicateChecker.CheckPredicates(clusterSnapshot, p1, "n1") assert.Nil(t, predicateErr) } + +// newTestPredicateChecker builds test version of PredicateChecker. +func newTestPredicateChecker() (*SchedulerBasedPredicateChecker, error) { + defaultConfig, err := scheduler_config_latest.Default() + if err != nil { + return nil, err + } + return newTestPredicateCheckerWithCustomConfig(defaultConfig) +} + +// newTestPredicateCheckerWithCustomConfig builds test version of PredicateChecker with custom scheduler config. +func newTestPredicateCheckerWithCustomConfig(schedConfig *config.KubeSchedulerConfiguration) (*SchedulerBasedPredicateChecker, error) { + // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient + fwHandle, err := framework.NewHandle(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) + if err != nil { + return nil, err + } + return NewSchedulerBasedPredicateChecker(fwHandle), nil +} diff --git a/cluster-autoscaler/simulator/predicatechecker/testchecker.go b/cluster-autoscaler/simulator/predicatechecker/testchecker.go deleted file mode 100644 index 5178fc0c4233..000000000000 --- a/cluster-autoscaler/simulator/predicatechecker/testchecker.go +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package predicatechecker - -import ( - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/client-go/informers" - clientsetfake "k8s.io/client-go/kubernetes/fake" - "k8s.io/kubernetes/pkg/scheduler/apis/config" - scheduler_config_latest "k8s.io/kubernetes/pkg/scheduler/apis/config/latest" -) - -// NewTestPredicateChecker builds test version of PredicateChecker. -func NewTestPredicateChecker() (PredicateChecker, error) { - defaultConfig, err := scheduler_config_latest.Default() - if err != nil { - return nil, err - } - return NewTestPredicateCheckerWithCustomConfig(defaultConfig) -} - -// NewTestPredicateCheckerWithCustomConfig builds test version of PredicateChecker with custom scheduler config. -func NewTestPredicateCheckerWithCustomConfig(schedConfig *config.KubeSchedulerConfiguration) (PredicateChecker, error) { - // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient - fwHandle, err := framework.NewHandle(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) - if err != nil { - return nil, err - } - return NewSchedulerBasedPredicateChecker(fwHandle), nil -} diff --git a/cluster-autoscaler/simulator/scheduling/hinting_simulator.go b/cluster-autoscaler/simulator/scheduling/hinting_simulator.go index 2f24bb8bf4ba..8a35877a2c44 100644 --- a/cluster-autoscaler/simulator/scheduling/hinting_simulator.go +++ b/cluster-autoscaler/simulator/scheduling/hinting_simulator.go @@ -17,11 +17,8 @@ limitations under the License. package scheduling import ( - "fmt" - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" "k8s.io/autoscaler/cluster-autoscaler/utils/klogx" apiv1 "k8s.io/api/core/v1" @@ -35,15 +32,13 @@ type Status struct { // HintingSimulator is a helper object for simulating scheduler behavior. type HintingSimulator struct { - predicateChecker predicatechecker.PredicateChecker - hints *Hints + hints *Hints } // NewHintingSimulator returns a new HintingSimulator. -func NewHintingSimulator(predicateChecker predicatechecker.PredicateChecker) *HintingSimulator { +func NewHintingSimulator() *HintingSimulator { return &HintingSimulator{ - predicateChecker: predicateChecker, - hints: NewHints(), + hints: NewHints(), } } @@ -62,20 +57,20 @@ func (s *HintingSimulator) TrySchedulePods(clusterSnapshot clustersnapshot.Clust loggingQuota := klogx.PodsLoggingQuota() for _, pod := range pods { klogx.V(5).UpTo(loggingQuota).Infof("Looking for place for %s/%s", pod.Namespace, pod.Name) - nodeName, err := s.findNodeWithHints(clusterSnapshot, pod, isNodeAcceptable) + nodeName, err := s.tryScheduleUsingHints(clusterSnapshot, pod, isNodeAcceptable) if err != nil { return nil, 0, err } if nodeName == "" { - nodeName = s.findNode(similarPods, clusterSnapshot, pod, loggingQuota, isNodeAcceptable) + nodeName, err = s.trySchedule(similarPods, clusterSnapshot, pod, loggingQuota, isNodeAcceptable) + if err != nil { + return nil, 0, err + } } if nodeName != "" { klogx.V(4).UpTo(loggingQuota).Infof("Pod %s/%s can be moved to %s", pod.Namespace, pod.Name, nodeName) - if err := clusterSnapshot.ForceAddPod(pod, nodeName); err != nil { - return nil, 0, fmt.Errorf("simulating scheduling of %s/%s to %s return error; %v", pod.Namespace, pod.Name, nodeName, err) - } statuses = append(statuses, Status{Pod: pod, NodeName: nodeName}) } else if breakOnFailure { break @@ -85,40 +80,56 @@ func (s *HintingSimulator) TrySchedulePods(clusterSnapshot clustersnapshot.Clust return statuses, similarPods.OverflowingControllerCount(), nil } -func (s *HintingSimulator) findNodeWithHints(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, isNodeAcceptable func(*framework.NodeInfo) bool) (string, error) { +// tryScheduleUsingHints tries to schedule the provided Pod in the provided clusterSnapshot using hints. If the pod is scheduled, the name of its Node is returned. If the +// pod couldn't be scheduled using hints, an empty string and nil error is returned. Error is only returned for unexpected errors. +func (s *HintingSimulator) tryScheduleUsingHints(clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, isNodeAcceptable func(*framework.NodeInfo) bool) (string, error) { hk := HintKeyFromPod(pod) - if hintedNode, hasHint := s.hints.Get(hk); hasHint { - if err := s.predicateChecker.CheckPredicates(clusterSnapshot, pod, hintedNode); err == nil { - s.hints.Set(hk, hintedNode) + hintedNode, hasHint := s.hints.Get(hk) + if !hasHint { + return "", nil + } - nodeInfo, err := clusterSnapshot.GetNodeInfo(hintedNode) - if err != nil { - return "", err - } + nodeInfo, err := clusterSnapshot.GetNodeInfo(hintedNode) + if err != nil { + return "", err + } + if !isNodeAcceptable(nodeInfo) { + return "", nil + } - if isNodeAcceptable(nodeInfo) { - return hintedNode, nil - } - } + if err := clusterSnapshot.SchedulePod(pod, hintedNode); err != nil && err.Type() == clustersnapshot.SchedulingInternalError { + // Unexpected error. + return "", err + } else if err != nil { + // The pod can't be scheduled on the hintedNode because of scheduling predicates. + return "", nil } - return "", nil + // The pod was scheduled on hintedNode. + s.hints.Set(hk, hintedNode) + return hintedNode, nil } -func (s *HintingSimulator) findNode(similarPods *SimilarPodsScheduling, clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, loggingQuota *klogx.Quota, isNodeAcceptable func(*framework.NodeInfo) bool) string { +// trySchedule tries to schedule the provided Pod in the provided clusterSnapshot on any Node passing isNodeAcceptable. If the pod is scheduled, the name of its Node is returned. If no Node +// with passing scheduling predicates could be found, an empty string and nil error is returned. Error is only returned for unexpected errors. +func (s *HintingSimulator) trySchedule(similarPods *SimilarPodsScheduling, clusterSnapshot clustersnapshot.ClusterSnapshot, pod *apiv1.Pod, loggingQuota *klogx.Quota, isNodeAcceptable func(*framework.NodeInfo) bool) (string, error) { if similarPods.IsSimilarUnschedulable(pod) { klogx.V(4).UpTo(loggingQuota).Infof("failed to find place for %s/%s based on similar pods scheduling", pod.Namespace, pod.Name) - return "" + return "", nil } - newNodeName, err := s.predicateChecker.FitsAnyNodeMatching(clusterSnapshot, pod, isNodeAcceptable) - if err != nil { + newNodeName, err := clusterSnapshot.SchedulePodOnAnyNodeMatching(pod, isNodeAcceptable) + if err != nil && err.Type() == clustersnapshot.SchedulingInternalError { + // Unexpected error. + return "", err + } else if err != nil { + // The pod couldn't be scheduled on any Node because of scheduling predicates. klogx.V(4).UpTo(loggingQuota).Infof("failed to find place for %s/%s: %v", pod.Namespace, pod.Name, err) similarPods.SetUnschedulable(pod) - return "" + return "", nil } - + // The pod was scheduled on newNodeName. s.hints.Set(HintKeyFromPod(pod), newNodeName) - return newNodeName + return newNodeName, nil } // DropOldHints drops old scheduling hints. diff --git a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go index 717fca941394..d4b86d256862 100644 --- a/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go +++ b/cluster-autoscaler/simulator/scheduling/hinting_simulator_test.go @@ -26,7 +26,6 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) @@ -135,10 +134,8 @@ func TestTrySchedulePods(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { t.Parallel() clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, tc.nodes, tc.pods) - s := NewHintingSimulator(predicateChecker) + s := NewHintingSimulator() statuses, _, err := s.TrySchedulePods(clusterSnapshot, tc.newPods, tc.acceptableNodes, false) if tc.wantErr { assert.Error(t, err) @@ -212,15 +209,13 @@ func TestPodSchedulesOnHintedNode(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { t.Parallel() clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t) - predicateChecker, err := predicatechecker.NewTestPredicateChecker() - assert.NoError(t, err) nodes := make([]*apiv1.Node, 0, len(tc.nodeNames)) for _, n := range tc.nodeNames { nodes = append(nodes, buildReadyNode(n, 9999, 9999)) } clustersnapshot.InitializeClusterSnapshotOrDie(t, clusterSnapshot, nodes, []*apiv1.Pod{}) pods := make([]*apiv1.Pod, 0, len(tc.podNodes)) - s := NewHintingSimulator(predicateChecker) + s := NewHintingSimulator() var expectedStatuses []Status for p, n := range tc.podNodes { pod := BuildTestPod(p, 1, 1) From 054d5d2e7c7cf28ca83d2eb872977624f7b4fcd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 14 Nov 2024 15:45:49 +0100 Subject: [PATCH 8/8] CA: refactor SchedulerBasedPredicateChecker into SchedulerPluginRunner For DRA, this component will have to call the Reserve phase in addition to just checking predicates/filters. The new version also makes more sense in the context of PredicateSnapshot, which is the only context now. While refactoring, I noticed that CheckPredicates for some reason doesn't check the provided Node against the eligible Nodes returned from PreFilter (while FitsAnyNodeMatching does do that). This seems like a bug, so the check is added. The checks in FitsAnyNodeMatching are also reordered so that the cheapest ones are checked earliest. --- .../predicate/plugin_runner.go | 147 +++++++++++ .../predicate/plugin_runner_test.go} | 240 ++++++++---------- .../predicate/predicate_snapshot.go | 11 +- .../predicatechecker/schedulerbased.go | 146 ----------- 4 files changed, 264 insertions(+), 280 deletions(-) create mode 100644 cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go rename cluster-autoscaler/simulator/{predicatechecker/schedulerbased_test.go => clustersnapshot/predicate/plugin_runner_test.go} (51%) delete mode 100644 cluster-autoscaler/simulator/predicatechecker/schedulerbased.go diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go new file mode 100644 index 000000000000..150b245fdd71 --- /dev/null +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go @@ -0,0 +1,147 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package predicate + +import ( + "context" + "fmt" + "strings" + + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + + apiv1 "k8s.io/api/core/v1" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +// SchedulerPluginRunner can be used to run various phases of scheduler plugins through the scheduler framework. +type SchedulerPluginRunner struct { + fwHandle *framework.Handle + snapshotStore clustersnapshot.ClusterSnapshotStore + lastIndex int +} + +// NewSchedulerPluginRunner builds a SchedulerPluginRunner. +func NewSchedulerPluginRunner(fwHandle *framework.Handle, snapshotStore clustersnapshot.ClusterSnapshotStore) *SchedulerPluginRunner { + return &SchedulerPluginRunner{fwHandle: fwHandle, snapshotStore: snapshotStore} +} + +// RunFiltersUntilPassingNode runs the scheduler framework PreFilter phase once, and then keeps running the Filter phase for all nodes in the cluster that match the provided +// function - until a Node where the Filters pass is found. Filters are only run for matching Nodes. If no matching Node with passing Filters is found, an error is returned. +// +// The node iteration always starts from the next Node from the last Node that was found by this method. TODO: Extract the iteration strategy out of SchedulerPluginRunner. +func (p *SchedulerPluginRunner) RunFiltersUntilPassingNode(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { + nodeInfosList, err := p.snapshotStore.ListNodeInfos() + if err != nil { + return "", clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error listing NodeInfos: %v", err)) + } + + p.fwHandle.DelegatingLister.UpdateDelegate(p.snapshotStore) + defer p.fwHandle.DelegatingLister.ResetDelegate() + + state := schedulerframework.NewCycleState() + // Run the PreFilter phase of the framework for the Pod. This allows plugins to precompute some things (for all Nodes in the cluster at once) and + // save them in the CycleState. During the Filter phase, plugins can retrieve the precomputes from the CycleState and use them for answering the Filter + // for a given Node. + preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) + if !preFilterStatus.IsSuccess() { + // If any of the plugin PreFilter methods isn't successful, the corresponding Filter method can't be run, so the whole scheduling cycle is aborted. + // Match that behavior here. + return "", clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") + } + + for i := range nodeInfosList { + // Determine which NodeInfo to check next. + nodeInfo := nodeInfosList[(p.lastIndex+i)%len(nodeInfosList)] + + // Plugins can filter some Nodes out during the PreFilter phase, if they're sure the Nodes won't work for the Pod at that stage. + // Filters are only run for Nodes that haven't been filtered out during the PreFilter phase. Match that behavior here - skip such Nodes. + if !preFilterResult.AllNodes() && !preFilterResult.NodeNames.Has(nodeInfo.Node().Name) { + continue + } + + // Nodes with the Unschedulable bit set will be rejected by one of the plugins during the Filter phase below. We can check that quickly here + // and short-circuit to avoid running the expensive Filter phase at all in this case. + if nodeInfo.Node().Spec.Unschedulable { + continue + } + + // Check if the NodeInfo matches the provided filtering condition. This should be less expensive than running the Filter phase below, so + // check this first. + if !nodeMatches(nodeInfo) { + continue + } + + // Run the Filter phase of the framework. Plugins retrieve the state they saved during PreFilter from CycleState, and answer whether the + // given Pod can be scheduled on the given Node. + filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) + if filterStatus.IsSuccess() { + // Filter passed for all plugins, so this pod can be scheduled on this Node. + p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList) + return nodeInfo.Node().Name, nil + } + // Filter didn't pass for some plugin, so this Node won't work - move on to the next one. + } + return "", clustersnapshot.NewNoNodesPassingPredicatesFoundError(pod) +} + +// RunFiltersOnNode runs the scheduler framework PreFilter and Filter phases to check if the given pod can be scheduled on the given node. +func (p *SchedulerPluginRunner) RunFiltersOnNode(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { + nodeInfo, err := p.snapshotStore.GetNodeInfo(nodeName) + if err != nil { + return clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error obtaining NodeInfo for name %q: %v", nodeName, err)) + } + + p.fwHandle.DelegatingLister.UpdateDelegate(p.snapshotStore) + defer p.fwHandle.DelegatingLister.ResetDelegate() + + state := schedulerframework.NewCycleState() + // Run the PreFilter phase of the framework for the Pod and check the results. See the corresponding comments in RunFiltersUntilPassingNode() for more info. + preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) + if !preFilterStatus.IsSuccess() { + return clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") + } + if !preFilterResult.AllNodes() && !preFilterResult.NodeNames.Has(nodeInfo.Node().Name) { + return clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter filtered the Node out", "") + } + + // Run the Filter phase of the framework for the Pod and the Node and check the results. See the corresponding comments in RunFiltersUntilPassingNode() for more info. + filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) + if !filterStatus.IsSuccess() { + filterName := filterStatus.Plugin() + filterReasons := filterStatus.Reasons() + unexpectedErrMsg := "" + if !filterStatus.IsRejected() { + unexpectedErrMsg = fmt.Sprintf("unexpected filter status %q", filterStatus.Code().String()) + } + return clustersnapshot.NewFailingPredicateError(pod, filterName, filterReasons, unexpectedErrMsg, p.failingFilterDebugInfo(filterName, nodeInfo)) + } + + // PreFilter and Filter phases checked, this Pod can be scheduled on this Node. + return nil +} + +func (p *SchedulerPluginRunner) failingFilterDebugInfo(filterName string, nodeInfo *framework.NodeInfo) string { + infoParts := []string{fmt.Sprintf("nodeName: %q", nodeInfo.Node().Name)} + + switch filterName { + case "TaintToleration": + infoParts = append(infoParts, fmt.Sprintf("nodeTaints: %#v", nodeInfo.Node().Spec.Taints)) + } + + return strings.Join(infoParts, ", ") +} diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner_test.go similarity index 51% rename from cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go rename to cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner_test.go index d023b5846a26..3e5323d0a4d5 100644 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased_test.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner_test.go @@ -1,5 +1,5 @@ /* -Copyright 2020 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package predicatechecker +package predicate import ( "os" @@ -23,6 +23,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "k8s.io/client-go/informers" clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/kubernetes/pkg/scheduler/apis/config" @@ -38,7 +39,7 @@ import ( apiv1 "k8s.io/api/core/v1" ) -func TestCheckPredicate(t *testing.T) { +func TestRunFiltersOnNode(t *testing.T) { p450 := BuildTestPod("p450", 450, 500000) p600 := BuildTestPod("p600", 600, 500000) p8000 := BuildTestPod("p8000", 8000, 0) @@ -49,9 +50,6 @@ func TestCheckPredicate(t *testing.T) { n1000Unschedulable := BuildTestNode("n1000", 1000, 2000000) SetNodeReadyState(n1000Unschedulable, true, time.Time{}) - defaultPredicateChecker, err := newTestPredicateChecker() - assert.NoError(t, err) - // temp dir tmpDir, err := os.MkdirTemp("", "scheduler-configs") if err != nil { @@ -65,95 +63,90 @@ func TestCheckPredicate(t *testing.T) { os.FileMode(0600)); err != nil { t.Fatal(err) } - customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) - assert.NoError(t, err) tests := []struct { - name string - node *apiv1.Node - scheduledPods []*apiv1.Pod - testPod *apiv1.Pod - predicateChecker *SchedulerBasedPredicateChecker - expectError bool + name string + customConfig *config.KubeSchedulerConfiguration + node *apiv1.Node + scheduledPods []*apiv1.Pod + testPod *apiv1.Pod + expectError bool }{ // default predicate checker test cases { - name: "default - other pod - insuficient cpu", - node: n1000, - scheduledPods: []*apiv1.Pod{p450}, - testPod: p600, - expectError: true, - predicateChecker: defaultPredicateChecker, + name: "default - other pod - insuficient cpu", + node: n1000, + scheduledPods: []*apiv1.Pod{p450}, + testPod: p600, + expectError: true, }, { - name: "default - other pod - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{p450}, - testPod: p500, - expectError: false, - predicateChecker: defaultPredicateChecker, + name: "default - other pod - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{p450}, + testPod: p500, + expectError: false, }, { - name: "default - empty - insuficient cpu", - node: n1000, - scheduledPods: []*apiv1.Pod{}, - testPod: p8000, - expectError: true, - predicateChecker: defaultPredicateChecker, + name: "default - empty - insuficient cpu", + node: n1000, + scheduledPods: []*apiv1.Pod{}, + testPod: p8000, + expectError: true, }, { - name: "default - empty - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{}, - testPod: p600, - expectError: false, - predicateChecker: defaultPredicateChecker, + name: "default - empty - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{}, + testPod: p600, + expectError: false, }, // custom predicate checker test cases { - name: "custom - other pod - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{p450}, - testPod: p600, - expectError: false, - predicateChecker: customPredicateChecker, + name: "custom - other pod - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{p450}, + testPod: p600, + expectError: false, + customConfig: customConfig, }, { - name: "custom -other pod - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{p450}, - testPod: p500, - expectError: false, - predicateChecker: customPredicateChecker, + name: "custom -other pod - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{p450}, + testPod: p500, + expectError: false, + customConfig: customConfig, }, { - name: "custom -empty - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{}, - testPod: p8000, - expectError: false, - predicateChecker: customPredicateChecker, + name: "custom -empty - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{}, + testPod: p8000, + expectError: false, + customConfig: customConfig, }, { - name: "custom -empty - ok", - node: n1000, - scheduledPods: []*apiv1.Pod{}, - testPod: p600, - expectError: false, - predicateChecker: customPredicateChecker, + name: "custom -empty - ok", + node: n1000, + scheduledPods: []*apiv1.Pod{}, + testPod: p600, + expectError: false, + customConfig: customConfig, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var err error - clusterSnapshot := store.NewBasicSnapshotStore() - err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(tt.node, tt.scheduledPods...)) + snapshotStore := store.NewBasicSnapshotStore() + err := snapshotStore.AddNodeInfo(framework.NewTestNodeInfo(tt.node, tt.scheduledPods...)) assert.NoError(t, err) - predicateError := tt.predicateChecker.CheckPredicates(clusterSnapshot, tt.testPod, tt.node.Name) + pluginRunner, err := newTestPluginRunner(snapshotStore, tt.customConfig) + assert.NoError(t, err) + + predicateError := pluginRunner.RunFiltersOnNode(tt.testPod, tt.node.Name) if tt.expectError { assert.NotNil(t, predicateError) assert.Equal(t, clustersnapshot.FailingPredicateError, predicateError.Type()) @@ -168,7 +161,7 @@ func TestCheckPredicate(t *testing.T) { } } -func TestFitsAnyNode(t *testing.T) { +func TestRunFilterUntilPassingNode(t *testing.T) { p900 := BuildTestPod("p900", 900, 1000) p1900 := BuildTestPod("p1900", 1900, 1000) p2100 := BuildTestPod("p2100", 2100, 1000) @@ -176,9 +169,6 @@ func TestFitsAnyNode(t *testing.T) { n1000 := BuildTestNode("n1000", 1000, 2000000) n2000 := BuildTestNode("n2000", 2000, 2000000) - defaultPredicateChecker, err := newTestPredicateChecker() - assert.NoError(t, err) - // temp dir tmpDir, err := os.MkdirTemp("", "scheduler-configs") if err != nil { @@ -192,74 +182,71 @@ func TestFitsAnyNode(t *testing.T) { os.FileMode(0600)); err != nil { t.Fatal(err) } - customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) - assert.NoError(t, err) testCases := []struct { - name string - predicateChecker *SchedulerBasedPredicateChecker - pod *apiv1.Pod - expectedNodes []string - expectError bool + name string + customConfig *config.KubeSchedulerConfiguration + pod *apiv1.Pod + expectedNodes []string + expectError bool }{ // default predicate checker test cases { - name: "default - small pod - no error", - predicateChecker: defaultPredicateChecker, - pod: p900, - expectedNodes: []string{"n1000", "n2000"}, - expectError: false, + name: "default - small pod - no error", + pod: p900, + expectedNodes: []string{"n1000", "n2000"}, + expectError: false, }, { - name: "default - medium pod - no error", - predicateChecker: defaultPredicateChecker, - pod: p1900, - expectedNodes: []string{"n2000"}, - expectError: false, + name: "default - medium pod - no error", + pod: p1900, + expectedNodes: []string{"n2000"}, + expectError: false, }, { - name: "default - large pod - insufficient cpu", - predicateChecker: defaultPredicateChecker, - pod: p2100, - expectError: true, + name: "default - large pod - insufficient cpu", + pod: p2100, + expectError: true, }, // custom predicate checker test cases { - name: "custom - small pod - no error", - predicateChecker: customPredicateChecker, - pod: p900, - expectedNodes: []string{"n1000", "n2000"}, - expectError: false, + name: "custom - small pod - no error", + customConfig: customConfig, + pod: p900, + expectedNodes: []string{"n1000", "n2000"}, + expectError: false, }, { - name: "custom - medium pod - no error", - predicateChecker: customPredicateChecker, - pod: p1900, - expectedNodes: []string{"n1000", "n2000"}, - expectError: false, + name: "custom - medium pod - no error", + customConfig: customConfig, + pod: p1900, + expectedNodes: []string{"n1000", "n2000"}, + expectError: false, }, { - name: "custom - large pod - insufficient cpu", - predicateChecker: customPredicateChecker, - pod: p2100, - expectedNodes: []string{"n1000", "n2000"}, - expectError: false, + name: "custom - large pod - insufficient cpu", + customConfig: customConfig, + pod: p2100, + expectedNodes: []string{"n1000", "n2000"}, + expectError: false, }, } - clusterSnapshot := store.NewBasicSnapshotStore() - err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n1000)) + snapshotStore := store.NewBasicSnapshotStore() + err = snapshotStore.AddNodeInfo(framework.NewTestNodeInfo(n1000)) assert.NoError(t, err) - err = clusterSnapshot.AddNodeInfo(framework.NewTestNodeInfo(n2000)) + err = snapshotStore.AddNodeInfo(framework.NewTestNodeInfo(n2000)) assert.NoError(t, err) for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - nodeName, err := tc.predicateChecker.FitsAnyNode(clusterSnapshot, tc.pod) + pluginRunner, err := newTestPluginRunner(snapshotStore, tc.customConfig) + assert.NoError(t, err) + + nodeName, err := pluginRunner.RunFiltersUntilPassingNode(tc.pod, func(info *framework.NodeInfo) bool { return true }) if tc.expectError { assert.Error(t, err) } else { @@ -268,7 +255,6 @@ func TestFitsAnyNode(t *testing.T) { } }) } - } func TestDebugInfo(t *testing.T) { @@ -293,9 +279,9 @@ func TestDebugInfo(t *testing.T) { assert.NoError(t, err) // with default predicate checker - defaultPredicateChecker, err := newTestPredicateChecker() + defaultPluginRunner, err := newTestPluginRunner(clusterSnapshot, nil) assert.NoError(t, err) - predicateErr := defaultPredicateChecker.CheckPredicates(clusterSnapshot, p1, "n1") + predicateErr := defaultPluginRunner.RunFiltersOnNode(p1, "n1") assert.NotNil(t, predicateErr) assert.Contains(t, predicateErr.FailingPredicateReasons(), "node(s) had untolerated taint {SomeTaint: WhyNot?}") assert.Contains(t, predicateErr.Error(), "node(s) had untolerated taint {SomeTaint: WhyNot?}") @@ -319,27 +305,25 @@ func TestDebugInfo(t *testing.T) { customConfig, err := scheduler.ConfigFromPath(customConfigFile) assert.NoError(t, err) - customPredicateChecker, err := newTestPredicateCheckerWithCustomConfig(customConfig) + customPluginRunner, err := newTestPluginRunner(clusterSnapshot, customConfig) assert.NoError(t, err) - predicateErr = customPredicateChecker.CheckPredicates(clusterSnapshot, p1, "n1") + predicateErr = customPluginRunner.RunFiltersOnNode(p1, "n1") assert.Nil(t, predicateErr) } -// newTestPredicateChecker builds test version of PredicateChecker. -func newTestPredicateChecker() (*SchedulerBasedPredicateChecker, error) { - defaultConfig, err := scheduler_config_latest.Default() - if err != nil { - return nil, err +// newTestPluginRunner builds test version of SchedulerPluginRunner. +func newTestPluginRunner(snapshotStore clustersnapshot.ClusterSnapshotStore, schedConfig *config.KubeSchedulerConfiguration) (*SchedulerPluginRunner, error) { + if schedConfig == nil { + defaultConfig, err := scheduler_config_latest.Default() + if err != nil { + return nil, err + } + schedConfig = defaultConfig } - return newTestPredicateCheckerWithCustomConfig(defaultConfig) -} -// newTestPredicateCheckerWithCustomConfig builds test version of PredicateChecker with custom scheduler config. -func newTestPredicateCheckerWithCustomConfig(schedConfig *config.KubeSchedulerConfiguration) (*SchedulerBasedPredicateChecker, error) { - // just call out to NewSchedulerBasedPredicateChecker but use fake kubeClient fwHandle, err := framework.NewHandle(informers.NewSharedInformerFactory(clientsetfake.NewSimpleClientset(), 0), schedConfig) if err != nil { return nil, err } - return NewSchedulerBasedPredicateChecker(fwHandle), nil + return NewSchedulerPluginRunner(fwHandle, snapshotStore), nil } diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go index 9542b7a06535..7d6e62f7dea2 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go @@ -20,27 +20,26 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" ) // PredicateSnapshot implements ClusterSnapshot on top of a ClusterSnapshotStore by using // SchedulerBasedPredicateChecker to check scheduler predicates. type PredicateSnapshot struct { clustersnapshot.ClusterSnapshotStore - predicateChecker *predicatechecker.SchedulerBasedPredicateChecker + pluginRunner *SchedulerPluginRunner } // NewPredicateSnapshot builds a PredicateSnapshot. func NewPredicateSnapshot(snapshotStore clustersnapshot.ClusterSnapshotStore, fwHandle *framework.Handle) *PredicateSnapshot { return &PredicateSnapshot{ ClusterSnapshotStore: snapshotStore, - predicateChecker: predicatechecker.NewSchedulerBasedPredicateChecker(fwHandle), + pluginRunner: NewSchedulerPluginRunner(fwHandle, snapshotStore), } } // SchedulePod adds pod to the snapshot and schedules it to given node. func (s *PredicateSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { - if schedErr := s.predicateChecker.CheckPredicates(s, pod, nodeName); schedErr != nil { + if schedErr := s.pluginRunner.RunFiltersOnNode(pod, nodeName); schedErr != nil { return schedErr } if err := s.ClusterSnapshotStore.ForceAddPod(pod, nodeName); err != nil { @@ -51,7 +50,7 @@ func (s *PredicateSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) cluster // SchedulePodOnAnyNodeMatching adds pod to the snapshot and schedules it to any node matching the provided function. func (s *PredicateSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, anyNodeMatching func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { - nodeName, schedErr := s.predicateChecker.FitsAnyNodeMatching(s, pod, anyNodeMatching) + nodeName, schedErr := s.pluginRunner.RunFiltersUntilPassingNode(pod, anyNodeMatching) if schedErr != nil { return "", schedErr } @@ -68,5 +67,5 @@ func (s *PredicateSnapshot) UnschedulePod(namespace string, podName string, node // CheckPredicates checks whether scheduler predicates pass for the given pod on the given node. func (s *PredicateSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { - return s.predicateChecker.CheckPredicates(s, pod, nodeName) + return s.pluginRunner.RunFiltersOnNode(pod, nodeName) } diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go deleted file mode 100644 index 6672d0c9f6e4..000000000000 --- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go +++ /dev/null @@ -1,146 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package predicatechecker - -import ( - "context" - "fmt" - "strings" - - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" - "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" - - apiv1 "k8s.io/api/core/v1" - v1listers "k8s.io/client-go/listers/core/v1" - "k8s.io/klog/v2" - schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" -) - -// SchedulerBasedPredicateChecker checks whether all required predicates pass for given Pod and Node. -// The verification is done by calling out to scheduler code. -type SchedulerBasedPredicateChecker struct { - fwHandle *framework.Handle - nodeLister v1listers.NodeLister - podLister v1listers.PodLister - lastIndex int -} - -// NewSchedulerBasedPredicateChecker builds scheduler based PredicateChecker. -func NewSchedulerBasedPredicateChecker(fwHandle *framework.Handle) *SchedulerBasedPredicateChecker { - return &SchedulerBasedPredicateChecker{fwHandle: fwHandle} -} - -// FitsAnyNode checks if the given pod can be placed on any of the given nodes. -func (p *SchedulerBasedPredicateChecker) FitsAnyNode(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod) (string, clustersnapshot.SchedulingError) { - return p.FitsAnyNodeMatching(clusterSnapshot, pod, func(*framework.NodeInfo) bool { - return true - }) -} - -// FitsAnyNodeMatching checks if the given pod can be placed on any of the given nodes matching the provided function. -func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { - if clusterSnapshot == nil { - return "", clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") - } - - nodeInfosList, err := clusterSnapshot.ListNodeInfos() - if err != nil { - // This should never happen. - // - // Scheduler requires interface returning error, but no implementation - // of ClusterSnapshot ever does it. - klog.Errorf("Error obtaining nodeInfos from schedulerLister") - return "", clustersnapshot.NewSchedulingInternalError(pod, "error obtaining nodeInfos from schedulerLister") - } - - p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) - defer p.fwHandle.DelegatingLister.ResetDelegate() - - state := schedulerframework.NewCycleState() - preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) - if !preFilterStatus.IsSuccess() { - return "", clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") - } - - for i := range nodeInfosList { - nodeInfo := nodeInfosList[(p.lastIndex+i)%len(nodeInfosList)] - if !nodeMatches(nodeInfo) { - continue - } - - if !preFilterResult.AllNodes() && !preFilterResult.NodeNames.Has(nodeInfo.Node().Name) { - continue - } - - // Be sure that the node is schedulable. - if nodeInfo.Node().Spec.Unschedulable { - continue - } - - filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) - if filterStatus.IsSuccess() { - p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList) - return nodeInfo.Node().Name, nil - } - } - return "", clustersnapshot.NewNoNodesPassingPredicatesFoundError(pod) -} - -// CheckPredicates checks if the given pod can be placed on the given node. -func (p *SchedulerBasedPredicateChecker) CheckPredicates(clusterSnapshot clustersnapshot.ClusterSnapshotStore, pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { - if clusterSnapshot == nil { - return clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") - } - nodeInfo, err := clusterSnapshot.GetNodeInfo(nodeName) - if err != nil { - return clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error obtaining NodeInfo for name %q: %v", nodeName, err)) - } - - p.fwHandle.DelegatingLister.UpdateDelegate(clusterSnapshot) - defer p.fwHandle.DelegatingLister.ResetDelegate() - - state := schedulerframework.NewCycleState() - _, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) - if !preFilterStatus.IsSuccess() { - return clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") - } - - filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) - - if !filterStatus.IsSuccess() { - filterName := filterStatus.Plugin() - filterReasons := filterStatus.Reasons() - unexpectedErrMsg := "" - if !filterStatus.IsRejected() { - unexpectedErrMsg = fmt.Sprintf("unexpected filter status %q", filterStatus.Code().String()) - } - return clustersnapshot.NewFailingPredicateError(pod, filterName, filterReasons, unexpectedErrMsg, p.failingFilterDebugInfo(filterName, nodeInfo)) - } - - return nil -} - -func (p *SchedulerBasedPredicateChecker) failingFilterDebugInfo(filterName string, nodeInfo *framework.NodeInfo) string { - infoParts := []string{fmt.Sprintf("nodeName: %q", nodeInfo.Node().Name)} - - switch filterName { - case "TaintToleration": - infoParts = append(infoParts, fmt.Sprintf("nodeTaints: %#v", nodeInfo.Node().Spec.Taints)) - } - - return strings.Join(infoParts, ", ") -}