Skip to content

Commit 65b6244

Browse files
fix: fix ethtool issue when trying to skip unsupported interface (#1296)
# Description This PR addresses an issue where the plugin repeatedly logged the same operation not supported error for interfaces on every metrics collection cycle. The root cause ethtool handle is created and closed during each interval, which invalidated any cached (LRU) information on unsupported interfaces. Now, once an interface is identified as unsupported, it won’t repeatedly log the same error. ## Related Issue If this pull request is related to any issue, please mention it here. Additionally, make sure that the issue is assigned to you before submitting this pull request. #1280 ## Checklist - [x] I have read the [contributing documentation](https://retina.sh/docs/contributing). - [x] I signed and signed-off the commits (`git commit -S -s ...`). See [this documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) on signing commits. - [x] I have correctly attributed the author(s) of the code. - [x] I have tested the changes locally. - [x] I have followed the project's style guidelines. - [x] I have updated the documentation, if necessary. - [x] I have added tests, if applicable. ## Screenshots (if applicable) or Testing Completed Please add any relevant screenshots or GIFs to showcase the changes made. Logs before fixing the issue: <img width="851" alt="Before" src="https://github.com/user-attachments/assets/edae5f1e-275e-4de6-9f74-23100dc49600" /> Logs after fixing the issue, the unsupported cache will only log once now (add debug message for test and validate behaviour of LRU, which has been removed in PR): <img width="848" alt="After" src="https://github.com/user-attachments/assets/ffbff4da-431b-4349-bc67-bdb4d7818fb9" /> ## Additional Notes Add any additional notes or context about the pull request here. --- Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information on how to contribute to this project.
1 parent 2600b27 commit 65b6244

5 files changed

+44
-45
lines changed

pkg/plugin/linuxutil/ethtool_handle_linux.go

+2-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
lru "github.com/hashicorp/golang-lru/v2"
99

1010
"github.com/microsoft/retina/pkg/log"
11-
"go.uber.org/zap"
1211
)
1312

1413
type CachedEthtool struct {
@@ -17,15 +16,10 @@ type CachedEthtool struct {
1716
l *log.ZapLogger
1817
}
1918

20-
func NewCachedEthtool(ethHandle EthtoolInterface, opts *EthtoolOpts) *CachedEthtool {
21-
cache, err := lru.New[string, struct{}](int(opts.limit))
22-
if err != nil {
23-
log.Logger().Error("failed to create LRU cache: ", zap.Error(err))
24-
}
25-
19+
func NewCachedEthtool(ethHandle EthtoolInterface, unsupportedInterfacesCache *lru.Cache[string, struct{}]) *CachedEthtool {
2620
return &CachedEthtool{
2721
EthtoolInterface: ethHandle,
28-
unsupported: cache,
22+
unsupported: unsupportedInterfacesCache,
2923
l: log.Logger().Named(string("EthtoolReader")),
3024
}
3125
}

pkg/plugin/linuxutil/ethtool_stats_linux.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net"
88
"strings"
99

10+
lru "github.com/hashicorp/golang-lru/v2"
1011
"github.com/microsoft/retina/pkg/log"
1112
"github.com/microsoft/retina/pkg/metrics"
1213
"github.com/microsoft/retina/pkg/utils"
@@ -21,7 +22,7 @@ type EthtoolReader struct {
2122
ethHandle EthtoolInterface
2223
}
2324

24-
func NewEthtoolReader(opts *EthtoolOpts, ethHandle EthtoolInterface) *EthtoolReader {
25+
func NewEthtoolReader(opts *EthtoolOpts, ethHandle EthtoolInterface, unsupportedInterfacesCache *lru.Cache[string, struct{}]) *EthtoolReader {
2526
if ethHandle == nil {
2627
var err error
2728
ethHandle, err = ethtool.NewEthtool()
@@ -31,7 +32,7 @@ func NewEthtoolReader(opts *EthtoolOpts, ethHandle EthtoolInterface) *EthtoolRea
3132
}
3233
}
3334
// Construct a cached ethtool handle
34-
CachedEthHandle := NewCachedEthtool(ethHandle, opts)
35+
CachedEthHandle := NewCachedEthtool(ethHandle, unsupportedInterfacesCache)
3536
return &EthtoolReader{
3637
l: log.Logger().Named(string("EthtoolReader")),
3738
opts: opts,
@@ -58,8 +59,6 @@ func (er *EthtoolReader) readInterfaceStats() error {
5859
return err
5960
}
6061

61-
defer er.ethHandle.Close()
62-
6362
er.data = &EthtoolStats{
6463
stats: make(map[string]uint64),
6564
}
@@ -78,8 +77,10 @@ func (er *EthtoolReader) readInterfaceStats() error {
7877
if err != nil {
7978
if errors.Is(err, errskip) {
8079
er.l.Debug("Skipping unsupported interface", zap.String("ifacename", i.Name))
80+
} else if strings.Contains(err.Error(), "interface not supported while retrieving stats") {
81+
er.l.Warn("Unsupported interface detected:", zap.String("ifacename", i.Name), zap.Error(err))
8182
} else {
82-
er.l.Error("Error while getting ethtool:", zap.String("ifacename", i.Name), zap.Error(err))
83+
er.l.Error("Error while retrieving stats:", zap.String("ifacename", i.Name), zap.Error(err))
8384
}
8485
continue
8586
}

pkg/plugin/linuxutil/ethtool_stats_linux_test.go

+20-23
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,17 @@ func TestNewEthtool(t *testing.T) {
2929
opts := &EthtoolOpts{
3030
errOrDropKeysOnly: false,
3131
addZeroVal: false,
32-
limit: 10,
3332
}
3433
ctrl := gomock.NewController(t)
3534
defer ctrl.Finish()
3635

36+
unsupportedInterfacesCache, err := lru.New[string, struct{}](10)
37+
if err != nil {
38+
t.Fatal("failed to create cache:", err)
39+
}
40+
3741
ethHandle := NewMockEthtoolInterface(ctrl)
38-
ethReader := NewEthtoolReader(opts, ethHandle)
42+
ethReader := NewEthtoolReader(opts, ethHandle, unsupportedInterfacesCache)
3943
assert.NotNil(t, ethReader)
4044
}
4145

@@ -45,15 +49,19 @@ func TestNewEthtoolWithNil(t *testing.T) {
4549
opts := &EthtoolOpts{
4650
errOrDropKeysOnly: false,
4751
addZeroVal: false,
48-
limit: 10,
4952
}
5053

51-
ethReader := NewEthtoolReader(opts, nil)
54+
unsupportedInterfacesCache, err := lru.New[string, struct{}](10)
55+
if err != nil {
56+
t.Fatal("failed to create cache:", err)
57+
}
58+
59+
ethReader := NewEthtoolReader(opts, nil, unsupportedInterfacesCache)
5260
assert.NotNil(t, ethReader)
5361
}
5462

5563
func TestReadInterfaceStats(t *testing.T) {
56-
globalCache, err := lru.New[string, struct{}](10)
64+
unsupportedInterfacesCache, err := lru.New[string, struct{}](10)
5765
if err != nil {
5866
t.Fatal("failed to create LRU cache: ", err)
5967
}
@@ -74,7 +82,6 @@ func TestReadInterfaceStats(t *testing.T) {
7482
opts: &EthtoolOpts{
7583
errOrDropKeysOnly: false,
7684
addZeroVal: false,
77-
limit: 10,
7885
},
7986
statsReturn: map[string]uint64{
8087
"rx_packets": 1,
@@ -90,32 +97,28 @@ func TestReadInterfaceStats(t *testing.T) {
9097
opts: &EthtoolOpts{
9198
errOrDropKeysOnly: false,
9299
addZeroVal: false,
93-
limit: 10,
94100
},
95101
statsReturn: nil,
96102
statErr: errOther,
97103
result: nil,
98104
wantErr: true,
99105
},
100106
{
101-
name: "test unsported interface",
107+
name: "test unsupported interface",
102108
opts: &EthtoolOpts{
103109
errOrDropKeysOnly: false,
104110
addZeroVal: false,
105-
limit: 10,
106111
},
107112
statsReturn: nil,
108113
statErr: errInterfaceNotSupported,
109-
110-
result: nil,
111-
wantErr: false,
114+
result: nil,
115+
wantErr: false,
112116
},
113117
{
114118
name: "test skipped interface",
115119
opts: &EthtoolOpts{
116120
errOrDropKeysOnly: false,
117121
addZeroVal: false,
118-
limit: 10,
119122
},
120123
statsReturn: nil,
121124
statErr: errInterfaceNotSupported,
@@ -132,15 +135,11 @@ func TestReadInterfaceStats(t *testing.T) {
132135

133136
ethHandle := NewMockEthtoolInterface(ctrl)
134137

135-
cachedEthHandle := NewCachedEthtool(ethHandle, tt.opts)
136-
cachedEthHandle.unsupported = globalCache
137-
138-
ethReader := NewEthtoolReader(tt.opts, cachedEthHandle)
138+
ethReader := NewEthtoolReader(tt.opts, ethHandle, unsupportedInterfacesCache)
139139

140140
assert.NotNil(t, ethReader)
141141

142142
ethHandle.EXPECT().Stats(gomock.Any()).Return(tt.statsReturn, tt.statErr).AnyTimes()
143-
ethHandle.EXPECT().Close().Times(1)
144143
InitalizeMetricsForTesting(ctrl)
145144

146145
if tt.statErr == nil {
@@ -160,13 +159,11 @@ func TestReadInterfaceStats(t *testing.T) {
160159
}
161160

162161
if tt.statErr != nil && errors.Is(tt.statErr, errInterfaceNotSupported) {
163-
assert.NotNil(t, cachedEthHandle.unsupported, "cache should not be nil")
164-
assert.NotEqual(t, 0, cachedEthHandle.unsupported.Len(), "cache should contain interface")
162+
assert.NotNil(t, unsupportedInterfacesCache, "cache should not be nil")
163+
assert.NotEqual(t, 0, unsupportedInterfacesCache.Len(), "cache should contain interface")
165164
} else if tt.statErr != nil && !errors.Is(tt.statErr, errInterfaceNotSupported) {
166-
assert.Equal(t, 0, cachedEthHandle.unsupported.Len(), "cache should not add interface for other errors")
165+
assert.Equal(t, 0, unsupportedInterfacesCache.Len(), "cache should not add interface for other errors")
167166
}
168-
169-
globalCache = cachedEthHandle.unsupported
170167
}
171168
}
172169

pkg/plugin/linuxutil/linuxutil_linux.go

+15-6
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ import (
1111
"time"
1212

1313
hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1"
14+
lru "github.com/hashicorp/golang-lru/v2"
1415
kcfg "github.com/microsoft/retina/pkg/config"
1516
"github.com/microsoft/retina/pkg/log"
1617
"github.com/microsoft/retina/pkg/plugin/registry"
18+
"github.com/pkg/errors"
1719
"github.com/safchain/ethtool"
1820
"go.uber.org/zap"
1921
)
2022

21-
const defaultLimit = 2000
22-
2323
func init() {
2424
registry.Add(name, New)
2525
}
@@ -61,6 +61,14 @@ func (lu *linuxUtil) SetupChannel(chan *hubblev1.Event) error {
6161

6262
func (lu *linuxUtil) run(ctx context.Context) error {
6363
lu.l.Info("Running linuxutil plugin...")
64+
65+
// create a LRU cache to skip unsupported interfaces
66+
unsupportedInterfacesCache, err := lru.New[string, struct{}](int(defaultLimit))
67+
if err != nil {
68+
lu.l.Error("failed to create global LRU cache", zap.Error(err))
69+
return err
70+
}
71+
6472
ticker := time.NewTicker(lu.cfg.MetricsInterval)
6573
defer ticker.Stop()
6674

@@ -93,20 +101,20 @@ func (lu *linuxUtil) run(ctx context.Context) error {
93101
ethtoolOpts := &EthtoolOpts{
94102
errOrDropKeysOnly: true,
95103
addZeroVal: false,
96-
limit: defaultLimit,
97104
}
98105

99106
ethHandle, err := ethtool.NewEthtool()
100107
if err != nil {
101108
lu.l.Error("Error while creating ethHandle: %v\n", zap.Error(err))
102-
return err
109+
return fmt.Errorf("failed to create ethHandle: %w", err)
103110
}
104111

105-
ethReader := NewEthtoolReader(ethtoolOpts, ethHandle)
112+
ethReader := NewEthtoolReader(ethtoolOpts, ethHandle, unsupportedInterfacesCache)
106113
if ethReader == nil {
107114
lu.l.Error("Error while creating ethReader")
108-
return fmt.Errorf("error while creating ethReader")
115+
return errors.New("error while creating ethReader")
109116
}
117+
110118
wg.Add(1)
111119
go func() {
112120
defer wg.Done()
@@ -117,6 +125,7 @@ func (lu *linuxUtil) run(ctx context.Context) error {
117125
}()
118126

119127
wg.Wait()
128+
ethHandle.Close()
120129
}
121130
}
122131
}

pkg/plugin/linuxutil/types_linux.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
)
1010

1111
const name = "linuxutil"
12+
const defaultLimit = 2000
1213

1314
//go:generate go run go.uber.org/mock/[email protected] -source=types_linux.go -destination=linuxutil_mock_generated_linux.go -package=linuxutil
1415
type linuxUtil struct {
@@ -102,9 +103,6 @@ type EthtoolOpts struct {
102103

103104
// when true will include all keys with value 0
104105
addZeroVal bool
105-
106-
// Configurable limit for unsupported interfaces cache
107-
limit uint
108106
}
109107

110108
type EthtoolInterface interface {

0 commit comments

Comments
 (0)