@@ -15,34 +15,37 @@ import (
15
15
"github.com/microsoft/retina/pkg/log"
16
16
fm "github.com/microsoft/retina/pkg/managers/filtermanager"
17
17
"github.com/microsoft/retina/pkg/pubsub"
18
+ "github.com/microsoft/retina/pkg/utils"
18
19
"go.uber.org/zap"
20
+ "k8s.io/client-go/rest"
19
21
kcfg "sigs.k8s.io/controller-runtime/pkg/client/config"
20
22
)
21
23
22
24
const (
23
25
filterManagerRetries = 3
26
+ hostLookupRetries = 6 // 6 retries for a total of 63 seconds.
24
27
)
25
28
26
29
type ApiServerWatcher struct {
27
- isRunning bool
28
- l * log.ZapLogger
29
- current cache
30
- new cache
31
- apiServerUrl string
32
- hostResolver IHostResolver
33
- filtermanager fm.IFilterManager
30
+ isRunning bool
31
+ l * log.ZapLogger
32
+ current cache
33
+ new cache
34
+ apiServerHostName string
35
+ hostResolver IHostResolver
36
+ filterManager fm.IFilterManager
37
+ restConfig * rest.Config
34
38
}
35
39
36
40
var a * ApiServerWatcher
37
41
38
- // Watcher creates a new apiserver watcher .
42
+ // Watcher creates a new ApiServerWatcher instance .
39
43
func Watcher () * ApiServerWatcher {
40
44
if a == nil {
41
45
a = & ApiServerWatcher {
42
46
isRunning : false ,
43
47
l : log .Logger ().Named ("apiserver-watcher" ),
44
48
current : make (cache ),
45
- apiServerUrl : getHostURL (),
46
49
hostResolver : net .DefaultResolver ,
47
50
}
48
51
}
@@ -56,12 +59,39 @@ func (a *ApiServerWatcher) Init(ctx context.Context) error {
56
59
return nil
57
60
}
58
61
59
- a .filtermanager = getFilterManager ()
62
+ // Get filter manager.
63
+ if a .filterManager == nil {
64
+ var err error
65
+ a .filterManager , err = fm .Init (filterManagerRetries )
66
+ if err != nil {
67
+ a .l .Error ("failed to init filter manager" , zap .Error (err ))
68
+ return fmt .Errorf ("failed to init filter manager: %w" , err )
69
+ }
70
+ }
71
+
72
+ // Get kubeconfig.
73
+ if a .restConfig == nil {
74
+ config , err := kcfg .GetConfig ()
75
+ if err != nil {
76
+ a .l .Error ("failed to get kubeconfig" , zap .Error (err ))
77
+ return fmt .Errorf ("failed to get kubeconfig: %w" , err )
78
+ }
79
+ a .restConfig = config
80
+ }
81
+
82
+ hostName , err := a .getHostName ()
83
+ if err != nil {
84
+ a .l .Error ("failed to get host name" , zap .Error (err ))
85
+ return fmt .Errorf ("failed to get host name: %w" , err )
86
+ }
87
+ a .apiServerHostName = hostName
88
+
60
89
a .isRunning = true
90
+
61
91
return nil
62
92
}
63
93
64
- // Stop the apiserver watcher .
94
+ // Stop stops the ApiServerWatcher .
65
95
func (a * ApiServerWatcher ) Stop (ctx context.Context ) error {
66
96
if ! a .isRunning {
67
97
a .l .Info ("apiserver watcher is not running" )
@@ -74,61 +104,57 @@ func (a *ApiServerWatcher) Stop(ctx context.Context) error {
74
104
func (a * ApiServerWatcher ) Refresh (ctx context.Context ) error {
75
105
err := a .initNewCache (ctx )
76
106
if err != nil {
107
+ a .l .Error ("failed to initialize new cache" , zap .Error (err ))
77
108
return err
78
109
}
79
- // Compare the new ips with the old ones.
110
+
111
+ // Compare the new IPs with the old ones.
80
112
created , deleted := a .diffCache ()
81
113
82
- // Publish the new ips.
83
- createdIps := []net.IP {}
84
- deletedIps := []net.IP {}
114
+ createdIPs := []net.IP {}
115
+ deletedIPs := []net.IP {}
85
116
86
117
for _ , v := range created {
87
- a .l .Info ("New Apiserver ips :" , zap .Any ("ip" , v ))
118
+ a .l .Info ("New Apiserver IPs :" , zap .Any ("ip" , v ))
88
119
ip := net .ParseIP (v .(string )).To4 ()
89
- createdIps = append (createdIps , ip )
120
+ createdIPs = append (createdIPs , ip )
90
121
}
91
122
92
123
for _ , v := range deleted {
93
- a .l .Info ("Deleted Apiserver ips :" , zap .Any ("ip" , v ))
124
+ a .l .Info ("Deleted Apiserver IPs :" , zap .Any ("ip" , v ))
94
125
ip := net .ParseIP (v .(string )).To4 ()
95
- deletedIps = append (deletedIps , ip )
126
+ deletedIPs = append (deletedIPs , ip )
96
127
}
97
128
98
- if len (createdIps ) > 0 {
99
- // Publish the new ips.
100
- a .publish (createdIps , cc .EventTypeAddAPIServerIPs )
101
- // Add ips to filter manager if any.
102
- err := a .filtermanager .AddIPs (createdIps , "apiserver-watcher" , fm.RequestMetadata {RuleID : "apiserver-watcher" })
129
+ if len (createdIPs ) > 0 {
130
+ a .publish (createdIPs , cc .EventTypeAddAPIServerIPs )
131
+ err := a .filterManager .AddIPs (createdIPs , "apiserver-watcher" , fm.RequestMetadata {RuleID : "apiserver-watcher" })
103
132
if err != nil {
104
- a .l .Error ("Failed to add ips to filter manager" , zap .Error (err ))
133
+ a .l .Error ("Failed to add IPs to filter manager" , zap .Error (err ))
105
134
}
106
135
}
107
136
108
- if len (deletedIps ) > 0 {
109
- // Publish the deleted ips.
110
- a .publish (deletedIps , cc .EventTypeDeleteAPIServerIPs )
111
- // Delete ips from filter manager if any.
112
- err := a .filtermanager .DeleteIPs (deletedIps , "apiserver-watcher" , fm.RequestMetadata {RuleID : "apiserver-watcher" })
137
+ if len (deletedIPs ) > 0 {
138
+ a .publish (deletedIPs , cc .EventTypeDeleteAPIServerIPs )
139
+ err := a .filterManager .DeleteIPs (deletedIPs , "apiserver-watcher" , fm.RequestMetadata {RuleID : "apiserver-watcher" })
113
140
if err != nil {
114
- a .l .Error ("Failed to delete ips from filter manager" , zap .Error (err ))
141
+ a .l .Error ("Failed to delete IPs from filter manager" , zap .Error (err ))
115
142
}
116
143
}
117
144
118
- // update the current cache and reset the new cache
119
145
a .current = a .new .deepcopy ()
120
146
a .new = nil
121
147
122
148
return nil
123
149
}
124
150
125
151
func (a * ApiServerWatcher ) initNewCache (ctx context.Context ) error {
126
- ips , err := a .getApiServerIPs (ctx )
152
+ ips , err := a .resolveIPs (ctx , a . apiServerHostName )
127
153
if err != nil {
128
- return err
154
+ return fmt . Errorf ( "failed to resolve IPs: %w" , err )
129
155
}
130
156
131
- // Reset the new cache.
157
+ // Reset new cache.
132
158
a .new = make (cache )
133
159
for _ , ip := range ips {
134
160
a .new [ip ] = struct {}{}
@@ -137,14 +163,14 @@ func (a *ApiServerWatcher) initNewCache(ctx context.Context) error {
137
163
}
138
164
139
165
func (a * ApiServerWatcher ) diffCache () (created , deleted []interface {}) {
140
- // check if there are new ips
166
+ // Check if there are any new IPs.
141
167
for k := range a .new {
142
168
if _ , ok := a .current [k ]; ! ok {
143
169
created = append (created , k )
144
170
}
145
171
}
146
172
147
- // check if there are deleted ips
173
+ // Check if there are any deleted IPs.
148
174
for k := range a .current {
149
175
if _ , ok := a .new [k ]; ! ok {
150
176
deleted = append (deleted , k )
@@ -153,53 +179,35 @@ func (a *ApiServerWatcher) diffCache() (created, deleted []interface{}) {
153
179
return
154
180
}
155
181
156
- func (a * ApiServerWatcher ) getApiServerIPs (ctx context.Context ) ([]string , error ) {
157
- // Parse the URL
158
- host , err := a .retrieveApiServerHostname ()
159
- if err != nil {
160
- return nil , err
161
- }
162
-
163
- // Get the ips for the host
164
- ips , err := a .resolveIPs (ctx , host )
165
- if err != nil {
166
- return nil , err
167
- }
168
-
169
- return ips , nil
170
- }
171
-
172
- // parse url to extract hostname
173
- func (a * ApiServerWatcher ) retrieveApiServerHostname () (string , error ) {
174
- // Parse the URL
175
- url , err := url .Parse (a .apiServerUrl )
176
- if err != nil {
177
- fmt .Println ("Failed to parse URL:" , err )
178
- return "" , err
179
- }
180
-
181
- // Remove the scheme (http:// or https://) and port from the host
182
- host := strings .TrimPrefix (url .Host , "www." )
183
- colonIndex := strings .IndexByte (host , ':' )
184
- if colonIndex != - 1 {
185
- host = host [:colonIndex ]
182
+ func (a * ApiServerWatcher ) resolveIPs (ctx context.Context , host string ) ([]string , error ) {
183
+ // perform a DNS lookup for the host URL using the net.DefaultResolver which uses the local resolver.
184
+ // Possible errors here are:
185
+ // - Canceled context: The context was canceled before the lookup completed.
186
+ // -DNS server errors ie NXDOMAIN, SERVFAIL.
187
+ // - Network errors ie timeout, unreachable DNS server.
188
+ // -Other DNS-related errors encapsulated in a DNSError.
189
+ var hostIPs []string
190
+ var err error
191
+
192
+ retryFunc := func () error {
193
+ hostIPs , err = a .hostResolver .LookupHost (ctx , host )
194
+ if err != nil {
195
+ return fmt .Errorf ("APIServer LookupHost failed: %w" , err )
196
+ }
197
+ return nil
186
198
}
187
- return host , nil
188
- }
189
199
190
- // Resolve the list of ips for the given host
191
- func (a * ApiServerWatcher ) resolveIPs (ctx context.Context , host string ) ([]string , error ) {
192
- hostIps , err := a .hostResolver .LookupHost (ctx , host )
200
+ // Retry the lookup for hostIPs in case of failure.
201
+ err = utils .Retry (retryFunc , hostLookupRetries )
193
202
if err != nil {
194
203
return nil , err
195
204
}
196
205
197
- if len (hostIps ) == 0 {
198
- a .l .Error ("no ips found for host" , zap .String ("host" , host ))
199
- return nil , fmt .Errorf ("no ips found for host %s" , host )
206
+ if len (hostIPs ) == 0 {
207
+ a .l .Debug ("no IPs found for host" , zap .String ("host" , host ))
200
208
}
201
209
202
- return hostIps , nil
210
+ return hostIPs , nil
203
211
}
204
212
205
213
func (a * ApiServerWatcher ) publish (netIPs []net.IP , eventType cc.EventType ) {
@@ -212,30 +220,23 @@ func (a *ApiServerWatcher) publish(netIPs []net.IP, eventType cc.EventType) {
212
220
ipsToPublish = append (ipsToPublish , ip .String ())
213
221
}
214
222
ps := pubsub .New ()
215
- ps .Publish (common .PubSubAPIServer ,
216
- cc .NewCacheEvent (
217
- eventType ,
218
- common .NewAPIServerObject (ipsToPublish ),
219
- ),
220
- )
223
+ ps .Publish (common .PubSubAPIServer , cc .NewCacheEvent (eventType , common .NewAPIServerObject (ipsToPublish )))
221
224
a .l .Debug ("Published event" , zap .Any ("eventType" , eventType ), zap .Any ("netIPs" , ipsToPublish ))
222
225
}
223
226
224
- // getHostURL returns the host url from the config.
225
- func getHostURL () string {
226
- config , err := kcfg .GetConfig ()
227
+ func (a * ApiServerWatcher ) getHostName () (string , error ) {
228
+ // Parse the host URL.
229
+ hostURL := a .restConfig .Host
230
+ parsedURL , err := url .ParseRequestURI (hostURL )
227
231
if err != nil {
228
- log .Logger ().Error ("failed to get config" , zap .Error (err ))
229
- return ""
232
+ log .Logger ().Error ("failed to parse URL" , zap . String ( "url" , hostURL ) , zap .Error (err ))
233
+ return "" , fmt . Errorf ( "failed to parse URL: %w" , err )
230
234
}
231
- return config .Host
232
- }
233
235
234
- // Get FilterManager
235
- func getFilterManager () * fm.FilterManager {
236
- f , err := fm .Init (filterManagerRetries )
237
- if err != nil {
238
- a .l .Error ("failed to init filter manager" , zap .Error (err ))
236
+ // Extract the host name from the URL.
237
+ host := strings .TrimPrefix (parsedURL .Host , "www." )
238
+ if colonIndex := strings .IndexByte (host , ':' ); colonIndex != - 1 {
239
+ host = host [:colonIndex ]
239
240
}
240
- return f
241
+ return host , nil
241
242
}
0 commit comments