Skip to content

Commit 248495c

Browse files
structlogging: restructure hot range logger for testability
This change does a few things to improve the testability of the hot ranges logger. The includes: __Logger__: * The introduction of a shouldLog function, which determines whether the system should log or not. * The breakout of the logging action into its own function. __Tests__: * The addition of a setup and teardown utility for the hot ranage logger. * The breakout of the default case and a timed case. * Tests for hot ranges which exist in the system to start. Fixes: cockroachdb#142995 Epic: CRDB-43150 Release note: None
1 parent 8c96475 commit 248495c

File tree

3 files changed

+96
-119
lines changed

3 files changed

+96
-119
lines changed

pkg/server/structlogging/BUILD.bazel

-5
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,20 @@ go_test(
3232
deps = [
3333
":structlogging",
3434
"//pkg/base",
35-
"//pkg/keys",
3635
"//pkg/kv/kvserver",
3736
"//pkg/kv/kvserver/allocator/plan",
3837
"//pkg/security/securityassets",
3938
"//pkg/security/securitytest",
4039
"//pkg/server",
41-
"//pkg/sql/catalog/desctestutils",
4240
"//pkg/testutils",
4341
"//pkg/testutils/serverutils",
4442
"//pkg/testutils/skip",
45-
"//pkg/testutils/sqlutils",
4643
"//pkg/testutils/testcluster",
4744
"//pkg/util/leaktest",
4845
"//pkg/util/log",
4946
"//pkg/util/log/eventpb",
5047
"//pkg/util/log/logpb",
5148
"//pkg/util/randutil",
5249
"//pkg/util/syncutil",
53-
"@com_github_cockroachdb_errors//:errors",
54-
"@com_github_stretchr_testify//assert",
5550
],
5651
)

pkg/server/structlogging/hot_ranges_log.go

+51-35
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ func (s *hotRangesLoggingScheduler) start(ctx context.Context, stopper *stop.Sto
8484
})
8585

8686
ticker := time.NewTicker(TelemetryHotRangesStatsInterval.Get(&s.st.SV))
87-
defer ticker.Stop()
8887

8988
for {
9089
select {
@@ -93,43 +92,60 @@ func (s *hotRangesLoggingScheduler) start(ctx context.Context, stopper *stop.Sto
9392
case <-ctx.Done():
9493
return
9594
case <-ticker.C:
96-
if !TelemetryHotRangesStatsEnabled.Get(&s.st.SV) {
97-
continue
98-
}
99-
resp, err := s.sServer.HotRangesV2(ctx,
100-
&serverpb.HotRangesRequest{NodeID: "local", PageSize: ReportTopHottestRanges})
101-
if err != nil {
102-
log.Warningf(ctx, "failed to get hot ranges: %s", err)
103-
continue
104-
}
105-
var events []logpb.EventPayload
106-
ts := timeutil.Now().UnixNano()
107-
108-
for _, r := range resp.Ranges {
109-
hrEvent := &eventpb.HotRangesStats{
110-
RangeID: int64(r.RangeID),
111-
Qps: r.QPS,
112-
Databases: r.Databases,
113-
Tables: r.Tables,
114-
Indexes: r.Indexes,
115-
SchemaName: r.SchemaName,
116-
CPUTimePerSecond: r.CPUTimePerSecond,
117-
ReadBytesPerSecond: r.ReadBytesPerSecond,
118-
WriteBytesPerSecond: r.WriteBytesPerSecond,
119-
ReadsPerSecond: r.ReadsPerSecond,
120-
WritesPerSecond: r.WritesPerSecond,
121-
LeaseholderNodeID: int32(r.LeaseholderNodeID),
122-
CommonEventDetails: logpb.CommonEventDetails{
123-
Timestamp: ts,
124-
},
125-
}
126-
events = append(events, hrEvent)
127-
}
128-
logutil.LogEventsWithDelay(ctx, events, stopper, TelemetryHotRangesStatsLoggingDelay.Get(&s.st.SV))
129-
95+
s.maybeLogHotRanges(ctx, stopper)
13096
case <-intervalChangedChan:
13197
ticker.Reset(TelemetryHotRangesStatsInterval.Get(&s.st.SV))
13298
}
13399
}
134100
})
135101
}
102+
103+
// maybeLogHotRanges is a small helper function which couples the
104+
// functionality of checking whether to log and logging.
105+
func (s *hotRangesLoggingScheduler) maybeLogHotRanges(ctx context.Context, stopper *stop.Stopper) {
106+
if s.shouldLog() {
107+
s.logHotRanges(ctx, stopper)
108+
}
109+
}
110+
111+
// shouldLog checks the below conditions to see whether it should emit logs.
112+
// - Is the cluster setting server.telemetry.hot_ranges_stats.enabled true?
113+
func (s *hotRangesLoggingScheduler) shouldLog() bool {
114+
return TelemetryHotRangesStatsEnabled.Get(&s.st.SV)
115+
}
116+
117+
// logHotRanges collects the hot ranges from this node's status server and
118+
// sends them to the TELEMETRY log channel.
119+
func (s *hotRangesLoggingScheduler) logHotRanges(ctx context.Context, stopper *stop.Stopper) {
120+
resp, err := s.sServer.HotRangesV2(ctx,
121+
&serverpb.HotRangesRequest{NodeID: "local", PageSize: ReportTopHottestRanges})
122+
if err != nil {
123+
log.Warningf(ctx, "failed to get hot ranges: %s", err)
124+
return
125+
}
126+
127+
var events []logpb.EventPayload
128+
ts := timeutil.Now().UnixNano()
129+
130+
for _, r := range resp.Ranges {
131+
hrEvent := &eventpb.HotRangesStats{
132+
RangeID: int64(r.RangeID),
133+
Qps: r.QPS,
134+
Databases: r.Databases,
135+
Tables: r.Tables,
136+
Indexes: r.Indexes,
137+
SchemaName: r.SchemaName,
138+
CPUTimePerSecond: r.CPUTimePerSecond,
139+
ReadBytesPerSecond: r.ReadBytesPerSecond,
140+
WriteBytesPerSecond: r.WriteBytesPerSecond,
141+
ReadsPerSecond: r.ReadsPerSecond,
142+
WritesPerSecond: r.WritesPerSecond,
143+
LeaseholderNodeID: int32(r.LeaseholderNodeID),
144+
CommonEventDetails: logpb.CommonEventDetails{
145+
Timestamp: ts,
146+
},
147+
}
148+
events = append(events, hrEvent)
149+
}
150+
logutil.LogEventsWithDelay(ctx, events, stopper, TelemetryHotRangesStatsLoggingDelay.Get(&s.st.SV))
151+
}

pkg/server/structlogging/hot_ranges_log_test.go

+45-79
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,24 @@ package structlogging_test
88
import (
99
"context"
1010
"encoding/json"
11+
"errors"
1112
"regexp"
13+
"slices"
1214
"testing"
1315
"time"
1416

1517
"github.com/cockroachdb/cockroach/pkg/base"
16-
"github.com/cockroachdb/cockroach/pkg/keys"
1718
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
1819
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/plan"
1920
"github.com/cockroachdb/cockroach/pkg/server/structlogging"
20-
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils"
2121
"github.com/cockroachdb/cockroach/pkg/testutils"
2222
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
2323
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
24-
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
2524
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2625
"github.com/cockroachdb/cockroach/pkg/util/log"
2726
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
2827
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
2928
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
30-
"github.com/cockroachdb/errors"
31-
"github.com/stretchr/testify/assert"
3229
)
3330

3431
type hotRangesLogSpy struct {
@@ -74,21 +71,12 @@ func (spy *hotRangesLogSpy) Reset() {
7471
spy.mu.logs = nil
7572
}
7673

77-
// TestHotRangesStatsTenants tests that hot ranges stats are logged per node.
78-
// The test will ensure each node contains 5 distinct range replicas for hot
79-
// ranges logging. Each node should thus log 5 distinct range ids.
80-
func TestHotRangesStats(t *testing.T) {
81-
defer leaktest.AfterTest(t)()
74+
func setupHotRangesLogTest(
75+
t *testing.T, ctx context.Context,
76+
) (serverutils.ApplicationLayerInterface, *hotRangesLogSpy, func()) {
8277
sc := log.ScopeWithoutShowLogs(t)
83-
defer sc.Close(t)
84-
85-
skip.UnderRace(t)
86-
87-
ctx := context.Background()
88-
spy := hotRangesLogSpy{t: t}
89-
defer log.InterceptWith(ctx, &spy)()
90-
91-
tc := serverutils.StartCluster(t, 3, base.TestClusterArgs{
78+
spy := &hotRangesLogSpy{t: t}
79+
tc := serverutils.StartCluster(t, 1, base.TestClusterArgs{
9280
ReplicationMode: base.ReplicationManual,
9381
ServerArgs: base.TestServerArgs{
9482
DefaultTestTenant: base.TestControlsTenantsExplicitly,
@@ -101,72 +89,50 @@ func TestHotRangesStats(t *testing.T) {
10189
},
10290
},
10391
})
104-
defer tc.Stopper().Stop(ctx)
105-
106-
db := tc.ServerConn(0)
107-
sqlutils.CreateTable(
108-
t, db, "foo",
109-
"k INT PRIMARY KEY, v INT",
110-
300,
111-
sqlutils.ToRowFn(sqlutils.RowIdxFn, sqlutils.RowModuloFn(2)),
112-
)
113-
114-
// Ensure both of node 1 and 2 have 5 distinct replicas from the table.
115-
tableDesc := desctestutils.TestingGetPublicTableDescriptor(
116-
tc.Server(0).DB(), keys.SystemSQLCodec, "test", "foo")
117-
tc.SplitTable(t, tableDesc, []serverutils.SplitPoint{
118-
{TargetNodeIdx: 1, Vals: []interface{}{100}},
119-
{TargetNodeIdx: 1, Vals: []interface{}{120}},
120-
{TargetNodeIdx: 1, Vals: []interface{}{140}},
121-
{TargetNodeIdx: 1, Vals: []interface{}{160}},
122-
{TargetNodeIdx: 1, Vals: []interface{}{180}},
123-
{TargetNodeIdx: 2, Vals: []interface{}{200}},
124-
{TargetNodeIdx: 2, Vals: []interface{}{220}},
125-
{TargetNodeIdx: 2, Vals: []interface{}{240}},
126-
{TargetNodeIdx: 2, Vals: []interface{}{260}},
127-
{TargetNodeIdx: 2, Vals: []interface{}{280}},
128-
})
12992

130-
// query table
131-
for i := 0; i < 300; i++ {
132-
db := tc.ServerConn(0)
133-
sqlutils.MakeSQLRunner(db).Query(t, `SELECT * FROM test.foo`)
93+
leakChecker := leaktest.AfterTest(t)
94+
logInterceptor := log.InterceptWith(ctx, spy)
95+
stopper := tc.Stopper()
96+
teardown := func() {
97+
stopper.Stop(ctx)
98+
sc.Close(t)
99+
logInterceptor()
100+
leakChecker()
134101
}
135102

136-
// Skip node 1 since it will contain many more replicas.
137-
// We only need to check nodes 2 and 3 to see that the nodes are logging their local hot ranges.
138-
rangeIDs := make(map[int64]struct{})
139-
for _, i := range []int{1, 2} {
140-
spy.Reset()
141-
ts := tc.ApplicationLayer(i)
142-
structlogging.TelemetryHotRangesStatsEnabled.Override(ctx, &ts.ClusterSettings().SV, true)
143-
structlogging.TelemetryHotRangesStatsInterval.Override(ctx, &ts.ClusterSettings().SV, time.Second)
144-
structlogging.TelemetryHotRangesStatsLoggingDelay.Override(ctx, &ts.ClusterSettings().SV, 0*time.Millisecond)
145-
146-
testutils.SucceedsSoon(t, func() error {
147-
logs := spy.Logs()
148-
if len(logs) < 5 {
149-
return errors.New("waiting for hot ranges to be logged")
150-
}
103+
ts := tc.ApplicationLayer(0)
104+
return ts, spy, teardown
105+
}
151106

152-
return nil
153-
})
154-
structlogging.TelemetryHotRangesStatsInterval.Override(ctx, &ts.ClusterSettings().SV, 1*time.Hour)
107+
// TestHotRangeLogger tests that hot ranges stats are logged per node.
108+
// The test will ensure each node contains 5 distinct range replicas for hot
109+
// ranges logging. Each node should thus log 5 distinct range ids.
110+
func TestHotRangeLogger(t *testing.T) {
111+
skip.UnderRace(t)
112+
ctx := context.Background()
113+
ts, spy, teardown := setupHotRangesLogTest(t, ctx)
114+
defer teardown()
155115

156-
// Get first 5 logs since the logging loop may have fired multiple times.
157-
// We should have gotten 5 distinct range ids, one for each split point above.
158-
logs := spy.Logs()[:5]
116+
structlogging.TelemetryHotRangesStatsEnabled.Override(ctx, &ts.ClusterSettings().SV, true)
117+
structlogging.TelemetryHotRangesStatsInterval.Override(ctx, &ts.ClusterSettings().SV, time.Millisecond)
118+
structlogging.TelemetryHotRangesStatsLoggingDelay.Override(ctx, &ts.ClusterSettings().SV, 0*time.Millisecond)
119+
120+
testutils.SucceedsSoon(t, func() error {
121+
logs := spy.Logs()
122+
123+
// Depend on a range which we don't exist to go anywhere.
159124
for _, l := range logs {
160-
assert.Equal(t, l.Databases, []string{"‹test›"})
161-
assert.Equal(t, l.Tables, []string{"‹foo›"})
162-
assert.Equal(t, l.Indexes, []string{"‹foo_pkey›"})
163-
_, ok := rangeIDs[l.RangeID]
164-
if ok {
165-
t.Fatalf(`Logged ranges should be unique per node for this test.
166-
found range on node %d and node %d: %s %s %s %s %d`, i, l.LeaseholderNodeID, l.Databases, l.SchemaName, l.Tables, l.Indexes, l.RangeID)
125+
if !slices.Equal(l.Databases, []string{"‹system›"}) {
126+
continue
127+
}
128+
if !slices.Equal(l.Tables, []string{"‹sqlliveness›"}) {
129+
continue
167130
}
168-
rangeIDs[l.RangeID] = struct{}{}
131+
if !slices.Equal(l.Indexes, []string{"‹primary›"}) {
132+
continue
133+
}
134+
return nil
169135
}
170-
171-
}
136+
return errors.New("waited too long for the synthetic data")
137+
})
172138
}

0 commit comments

Comments
 (0)