Skip to content

Commit c808ce8

Browse files
galal-husseinbrandond
authored andcommitted
Fixes for 1.32
Signed-off-by: galal-hussein <[email protected]> Signed-off-by: Brad Davidson <[email protected]>
1 parent 92892cb commit c808ce8

File tree

11 files changed

+137
-111
lines changed

11 files changed

+137
-111
lines changed

manifests/rolebindings.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ rules:
2323
- nodes
2424
verbs:
2525
- get
26+
- list
27+
- watch
28+
- apiGroups:
29+
- ""
30+
resources:
31+
- nodes/status
32+
verbs:
33+
- patch
2634
- apiGroups:
2735
- ""
2836
resources:

pkg/agent/flannel/setup.go

+22-7
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ import (
99
goruntime "runtime"
1010
"strings"
1111

12-
"github.com/k3s-io/k3s/pkg/agent/util"
12+
agentutil "github.com/k3s-io/k3s/pkg/agent/util"
1313
"github.com/k3s-io/k3s/pkg/daemons/config"
14+
"github.com/k3s-io/k3s/pkg/util"
1415
"github.com/pkg/errors"
1516
"github.com/sirupsen/logrus"
17+
authorizationv1 "k8s.io/api/authorization/v1"
1618
v1 "k8s.io/api/core/v1"
1719
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1820
"k8s.io/apimachinery/pkg/fields"
@@ -64,9 +66,22 @@ func Prepare(ctx context.Context, nodeConfig *config.Node) error {
6466
return createFlannelConf(nodeConfig)
6567
}
6668

67-
func Run(ctx context.Context, nodeConfig *config.Node, nodes typedcorev1.NodeInterface) error {
69+
func Run(ctx context.Context, nodeConfig *config.Node) error {
6870
logrus.Infof("Starting flannel with backend %s", nodeConfig.FlannelBackend)
69-
if err := waitForPodCIDR(ctx, nodeConfig.AgentConfig.NodeName, nodes); err != nil {
71+
72+
if err := util.WaitForRBACReady(ctx, nodeConfig.AgentConfig.KubeConfigK3sController, util.DefaultAPIServerReadyTimeout, authorizationv1.ResourceAttributes{
73+
Verb: "list",
74+
Resource: "nodes",
75+
}, ""); err != nil {
76+
return errors.Wrap(err, "flannel failed to wait for RBAC")
77+
}
78+
79+
coreClient, err := util.GetClientSet(nodeConfig.AgentConfig.KubeConfigK3sController)
80+
if err != nil {
81+
return err
82+
}
83+
84+
if err := waitForPodCIDR(ctx, nodeConfig.AgentConfig.NodeName, coreClient.CoreV1().Nodes()); err != nil {
7085
return errors.Wrap(err, "flannel failed to wait for PodCIDR assignment")
7186
}
7287

@@ -75,7 +90,7 @@ func Run(ctx context.Context, nodeConfig *config.Node, nodes typedcorev1.NodeInt
7590
return errors.Wrap(err, "failed to check netMode for flannel")
7691
}
7792
go func() {
78-
err := flannel(ctx, nodeConfig.FlannelIface, nodeConfig.FlannelConfFile, nodeConfig.AgentConfig.KubeConfigKubelet, nodeConfig.FlannelIPv6Masq, netMode)
93+
err := flannel(ctx, nodeConfig.FlannelIface, nodeConfig.FlannelConfFile, nodeConfig.AgentConfig.KubeConfigK3sController, nodeConfig.FlannelIPv6Masq, netMode)
7994
if err != nil && !errors.Is(err, context.Canceled) {
8095
logrus.Errorf("flannel exited: %v", err)
8196
os.Exit(1)
@@ -123,7 +138,7 @@ func createCNIConf(dir string, nodeConfig *config.Node) error {
123138

124139
if nodeConfig.AgentConfig.FlannelCniConfFile != "" {
125140
logrus.Debugf("Using %s as the flannel CNI conf", nodeConfig.AgentConfig.FlannelCniConfFile)
126-
return util.CopyFile(nodeConfig.AgentConfig.FlannelCniConfFile, p, false)
141+
return agentutil.CopyFile(nodeConfig.AgentConfig.FlannelCniConfFile, p, false)
127142
}
128143

129144
cniConfJSON := cniConf
@@ -138,7 +153,7 @@ func createCNIConf(dir string, nodeConfig *config.Node) error {
138153
cniConfJSON = strings.ReplaceAll(cniConfJSON, "%SERVICE_CIDR%", nodeConfig.AgentConfig.ServiceCIDR.String())
139154
}
140155

141-
return util.WriteFile(p, cniConfJSON)
156+
return agentutil.WriteFile(p, cniConfJSON)
142157
}
143158

144159
func createFlannelConf(nodeConfig *config.Node) error {
@@ -235,7 +250,7 @@ func createFlannelConf(nodeConfig *config.Node) error {
235250
confJSON = strings.ReplaceAll(confJSON, "%backend%", backendConf)
236251

237252
logrus.Debugf("The flannel configuration is %s", confJSON)
238-
return util.WriteFile(nodeConfig.FlannelConfFile, confJSON)
253+
return agentutil.WriteFile(nodeConfig.FlannelConfFile, confJSON)
239254
}
240255

241256
// fundNetMode returns the mode (ipv4, ipv6 or dual-stack) in which flannel is operating

pkg/agent/run.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -177,17 +177,18 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
177177
return errors.Wrap(err, "failed to wait for apiserver ready")
178178
}
179179

180-
coreClient, err := util.GetClientSet(nodeConfig.AgentConfig.KubeConfigKubelet)
180+
// Use the kubelet kubeconfig to update annotations on the local node
181+
kubeletClient, err := util.GetClientSet(nodeConfig.AgentConfig.KubeConfigKubelet)
181182
if err != nil {
182183
return err
183184
}
184185

185-
if err := configureNode(ctx, nodeConfig, coreClient.CoreV1().Nodes()); err != nil {
186+
if err := configureNode(ctx, nodeConfig, kubeletClient.CoreV1().Nodes()); err != nil {
186187
return err
187188
}
188189

189190
if !nodeConfig.NoFlannel {
190-
if err := flannel.Run(ctx, nodeConfig, coreClient.CoreV1().Nodes()); err != nil {
191+
if err := flannel.Run(ctx, nodeConfig); err != nil {
191192
return err
192193
}
193194
}

pkg/cloudprovider/servicelb.go

+5-13
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ package cloudprovider
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"sort"
78
"strconv"
89
"strings"
910
"time"
10-
"encoding/json"
11+
1112
"sigs.k8s.io/yaml"
1213

1314
"github.com/k3s-io/k3s/pkg/util"
@@ -27,11 +28,9 @@ import (
2728
"k8s.io/apimachinery/pkg/util/intstr"
2829
"k8s.io/apimachinery/pkg/util/sets"
2930
"k8s.io/apimachinery/pkg/util/wait"
30-
utilfeature "k8s.io/apiserver/pkg/util/feature"
3131
"k8s.io/client-go/util/retry"
3232
"k8s.io/cloud-provider/names"
3333
servicehelper "k8s.io/cloud-provider/service/helpers"
34-
"k8s.io/kubernetes/pkg/features"
3534
utilsnet "k8s.io/utils/net"
3635
utilsptr "k8s.io/utils/ptr"
3736
)
@@ -563,7 +562,7 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) {
563562
Name: "DEST_IPS",
564563
ValueFrom: &core.EnvVarSource{
565564
FieldRef: &core.ObjectFieldSelector{
566-
FieldPath: getHostIPsFieldPath(),
565+
FieldPath: "status.hostIPs",
567566
},
568567
},
569568
},
@@ -710,8 +709,8 @@ func (k *k3s) getPriorityClassName(svc *core.Service) string {
710709
return k.LBDefaultPriorityClassName
711710
}
712711

713-
// getTolerations retrieves the tolerations from a service's annotations.
714-
// It parses the tolerations from a JSON or YAML string stored in the annotations.
712+
// getTolerations retrieves the tolerations from a service's annotations.
713+
// It parses the tolerations from a JSON or YAML string stored in the annotations.
715714
func (k *k3s) getTolerations(svc *core.Service) ([]core.Toleration, error) {
716715
tolerationsStr, ok := svc.Annotations[tolerationsAnnotation]
717716
if !ok {
@@ -778,10 +777,3 @@ func ingressToString(ingresses []core.LoadBalancerIngress) []string {
778777
}
779778
return parts
780779
}
781-
782-
func getHostIPsFieldPath() string {
783-
if utilfeature.DefaultFeatureGate.Enabled(features.PodHostIPs) {
784-
return "status.hostIPs"
785-
}
786-
return "status.hostIP"
787-
}

pkg/daemons/agent/agent_linux.go

-2
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,6 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
141141
argsMap["node-ip"] = cfg.NodeIP
142142
}
143143
} else {
144-
// Cluster is using the embedded CCM, we know that the feature-gate will be enabled there as well.
145-
argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "CloudDualStackNodeIPs=true")
146144
if nodeIPs := util.JoinIPs(cfg.NodeIPs); nodeIPs != "" {
147145
argsMap["node-ip"] = util.JoinIPs(cfg.NodeIPs)
148146
}

pkg/daemons/agent/agent_windows.go

-2
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
106106
argsMap["node-ip"] = cfg.NodeIP
107107
}
108108
} else {
109-
// Cluster is using the embedded CCM, we know that the feature-gate will be enabled there as well.
110-
argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "CloudDualStackNodeIPs=true")
111109
if nodeIPs := util.JoinIPs(cfg.NodeIPs); nodeIPs != "" {
112110
argsMap["node-ip"] = util.JoinIPs(cfg.NodeIPs)
113111
}

pkg/daemons/control/deps/deps.go

+11-11
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ import (
2929
"github.com/sirupsen/logrus"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/apimachinery/pkg/util/sets"
32-
"k8s.io/apiserver/pkg/apis/apiserver"
3332
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/apiserver/v1"
33+
apiserverv1beta1 "k8s.io/apiserver/pkg/apis/apiserver/v1beta1"
3434
"k8s.io/apiserver/pkg/authentication/user"
3535
"k8s.io/client-go/util/keyutil"
3636
)
@@ -785,19 +785,19 @@ func genEncryptionConfigAndState(controlConfig *config.Control) error {
785785
}
786786

787787
func genEgressSelectorConfig(controlConfig *config.Control) error {
788-
var clusterConn apiserver.Connection
788+
var clusterConn apiserverv1beta1.Connection
789789

790790
if controlConfig.EgressSelectorMode == config.EgressSelectorModeDisabled {
791-
clusterConn = apiserver.Connection{
792-
ProxyProtocol: apiserver.ProtocolDirect,
791+
clusterConn = apiserverv1beta1.Connection{
792+
ProxyProtocol: apiserverv1beta1.ProtocolDirect,
793793
}
794794
} else {
795-
clusterConn = apiserver.Connection{
796-
ProxyProtocol: apiserver.ProtocolHTTPConnect,
797-
Transport: &apiserver.Transport{
798-
TCP: &apiserver.TCPTransport{
795+
clusterConn = apiserverv1beta1.Connection{
796+
ProxyProtocol: apiserverv1beta1.ProtocolHTTPConnect,
797+
Transport: &apiserverv1beta1.Transport{
798+
TCP: &apiserverv1beta1.TCPTransport{
799799
URL: fmt.Sprintf("https://%s:%d", controlConfig.BindAddressOrLoopback(false, true), controlConfig.SupervisorPort),
800-
TLSConfig: &apiserver.TLSConfig{
800+
TLSConfig: &apiserverv1beta1.TLSConfig{
801801
CABundle: controlConfig.Runtime.ServerCA,
802802
ClientKey: controlConfig.Runtime.ClientKubeAPIKey,
803803
ClientCert: controlConfig.Runtime.ClientKubeAPICert,
@@ -807,12 +807,12 @@ func genEgressSelectorConfig(controlConfig *config.Control) error {
807807
}
808808
}
809809

810-
egressConfig := apiserver.EgressSelectorConfiguration{
810+
egressConfig := apiserverv1beta1.EgressSelectorConfiguration{
811811
TypeMeta: metav1.TypeMeta{
812812
Kind: "EgressSelectorConfiguration",
813813
APIVersion: "apiserver.k8s.io/v1beta1",
814814
},
815-
EgressSelections: []apiserver.EgressSelection{
815+
EgressSelections: []apiserverv1beta1.EgressSelection{
816816
{
817817
Name: "cluster",
818818
Connection: clusterConn,

pkg/daemons/control/server.go

+84-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@ import (
1919
"github.com/pkg/errors"
2020
"github.com/sirupsen/logrus"
2121
authorizationv1 "k8s.io/api/authorization/v1"
22+
v1 "k8s.io/api/core/v1"
2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
k8sruntime "k8s.io/apimachinery/pkg/runtime"
25+
"k8s.io/apimachinery/pkg/watch"
26+
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
27+
"k8s.io/client-go/tools/cache"
28+
toolswatch "k8s.io/client-go/tools/watch"
29+
cloudproviderapi "k8s.io/cloud-provider/api"
2330
logsapi "k8s.io/component-base/logs/api/v1"
2431
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
2532
"k8s.io/kubernetes/pkg/registry/core/node"
@@ -157,8 +164,36 @@ func scheduler(ctx context.Context, cfg *config.Control) error {
157164

158165
args := config.GetArgs(argsMap, cfg.ExtraSchedulerAPIArgs)
159166

167+
schedulerNodeReady := make(chan struct{})
168+
169+
go func() {
170+
defer close(schedulerNodeReady)
171+
172+
apiReadyLoop:
173+
for {
174+
select {
175+
case <-ctx.Done():
176+
return
177+
case <-cfg.Runtime.APIServerReady:
178+
break apiReadyLoop
179+
case <-time.After(30 * time.Second):
180+
logrus.Infof("Waiting for API server to become available to start kube-scheduler")
181+
}
182+
}
183+
184+
// If we're running the embedded cloud controller, wait for it to untaint at least one
185+
// node (usually, the local node) before starting the scheduler to ensure that it
186+
// finds a node that is ready to run pods during its initial scheduling loop.
187+
if !cfg.DisableCCM {
188+
logrus.Infof("Waiting for untainted node")
189+
if err := waitForUntaintedNode(ctx, runtime.KubeConfigScheduler); err != nil {
190+
logrus.Fatalf("failed to wait for untained node: %v", err)
191+
}
192+
}
193+
}()
194+
160195
logrus.Infof("Running kube-scheduler %s", config.ArgString(args))
161-
return executor.Scheduler(ctx, cfg.Runtime.APIServerReady, args)
196+
return executor.Scheduler(ctx, schedulerNodeReady, args)
162197
}
163198

164199
func apiServer(ctx context.Context, cfg *config.Control) error {
@@ -323,7 +358,6 @@ func cloudControllerManager(ctx context.Context, cfg *config.Control) error {
323358
"authentication-kubeconfig": runtime.KubeConfigCloudController,
324359
"node-status-update-frequency": "1m0s",
325360
"bind-address": cfg.Loopback(false),
326-
"feature-gates": "CloudDualStackNodeIPs=true",
327361
}
328362
if cfg.NoLeaderElect {
329363
argsMap["leader-elect"] = "false"
@@ -359,7 +393,7 @@ func cloudControllerManager(ctx context.Context, cfg *config.Control) error {
359393
case <-cfg.Runtime.APIServerReady:
360394
break apiReadyLoop
361395
case <-time.After(30 * time.Second):
362-
logrus.Infof("Waiting for API server to become available")
396+
logrus.Infof("Waiting for API server to become available to start cloud-controller-manager")
363397
}
364398
}
365399

@@ -449,3 +483,50 @@ func promise(f func() error) <-chan error {
449483
}()
450484
return c
451485
}
486+
487+
// waitForUntaintedNode watches nodes, waiting to find one not tainted as
488+
// uninitialized by the external cloud provider.
489+
func waitForUntaintedNode(ctx context.Context, kubeConfig string) error {
490+
491+
restConfig, err := util.GetRESTConfig(kubeConfig)
492+
if err != nil {
493+
return err
494+
}
495+
coreClient, err := typedcorev1.NewForConfig(restConfig)
496+
if err != nil {
497+
return err
498+
}
499+
nodes := coreClient.Nodes()
500+
501+
lw := &cache.ListWatch{
502+
ListFunc: func(options metav1.ListOptions) (object k8sruntime.Object, e error) {
503+
return nodes.List(ctx, options)
504+
},
505+
WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) {
506+
return nodes.Watch(ctx, options)
507+
},
508+
}
509+
510+
condition := func(ev watch.Event) (bool, error) {
511+
if node, ok := ev.Object.(*v1.Node); ok {
512+
return getCloudTaint(node.Spec.Taints) == nil, nil
513+
}
514+
return false, errors.New("event object not of type v1.Node")
515+
}
516+
517+
if _, err := toolswatch.UntilWithSync(ctx, lw, &v1.Node{}, nil, condition); err != nil {
518+
return errors.Wrap(err, "failed to wait for untainted node")
519+
}
520+
return nil
521+
}
522+
523+
// getCloudTaint returns the external cloud provider taint, if present.
524+
// Cribbed from k8s.io/cloud-provider/controllers/node/node_controller.go
525+
func getCloudTaint(taints []v1.Taint) *v1.Taint {
526+
for _, taint := range taints {
527+
if taint.Key == cloudproviderapi.TaintExternalCloudProvider {
528+
return &taint
529+
}
530+
}
531+
return nil
532+
}

0 commit comments

Comments
 (0)