Skip to content

Commit 6ea0b13

Browse files
craig[bot]angles-n-daemons
craig[bot]
andcommittedMar 13, 2025·
Merge #142236
142236: split: add key itself to popular key surface r=angles-n-daemons a=angles-n-daemons split: add key itself to popular key surface historically, load based splitters have only ever exposed the frequency with which a popular key is accessed, and not the key itself. this change updates this so that the key is included in the function surface. this may be used later, as part of the replica's split statistics. Fixes: #138758 Epic: CRDB-43150 Release note: None Co-authored-by: Brian Dillmann <[email protected]>
2 parents 4cf192a + cf55f5d commit 6ea0b13

File tree

5 files changed

+55
-35
lines changed

5 files changed

+55
-35
lines changed
 

‎pkg/kv/kvserver/split/decider.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ const minSplitSuggestionInterval = time.Minute
2323
const minNoSplitKeyLoggingMetricsInterval = time.Minute
2424
const minPerSecondSampleDuration = time.Second
2525

26+
type PopularKey struct {
27+
Key roachpb.Key
28+
Frequency float64
29+
}
30+
2631
type LoadBasedSplitter interface {
2732
redact.SafeFormatter
2833
// Record informs the LoadBasedSplitter about where the span lies with regard
@@ -43,9 +48,9 @@ type LoadBasedSplitter interface {
4348
// empty string.
4449
NoSplitKeyCauseLogMsg() redact.RedactableString
4550

46-
// PopularKeyFrequency returns the percentage that the most popular key
47-
// appears in the sampled candidate split keys.
48-
PopularKeyFrequency() float64
51+
// PopularKey returns the most popular key in the sample dataset in addition
52+
// to its frequency..
53+
PopularKey() PopularKey
4954

5055
// String formats the state of the load based splitter.
5156
String() string
@@ -262,7 +267,7 @@ func (d *Decider) recordLocked(
262267
if now.Sub(d.mu.lastNoSplitKeyLoggingMetrics) > minNoSplitKeyLoggingMetricsInterval {
263268
d.mu.lastNoSplitKeyLoggingMetrics = now
264269
if causeMsg := d.mu.splitFinder.NoSplitKeyCauseLogMsg(); causeMsg != "" {
265-
popularKeyFrequency := d.mu.splitFinder.PopularKeyFrequency()
270+
popularKeyFrequency := d.mu.splitFinder.PopularKey().Frequency
266271
log.KvDistribution.Infof(ctx, "%s, most popular key occurs in %d%% of samples",
267272
causeMsg, int(popularKeyFrequency*100))
268273
log.KvDistribution.VInfof(ctx, 3, "splitter_state=%v", (*lockedDecider)(d))

‎pkg/kv/kvserver/split/unweighted_finder.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -201,26 +201,31 @@ func (f *UnweightedFinder) NoSplitKeyCauseLogMsg() redact.RedactableString {
201201
imbalanceAndTooManyContained)
202202
}
203203

204-
// PopularKeyFrequency implements the LoadBasedSplitter interface.
205-
func (f *UnweightedFinder) PopularKeyFrequency() float64 {
204+
// PopularKey implements the LoadBasedSplitter interface.
205+
func (f *UnweightedFinder) PopularKey() PopularKey {
206206
slices.SortFunc(f.samples[:], func(a, b sample) int {
207207
return bytes.Compare(a.key, b.key)
208208
})
209209

210210
currentKeyCount := 1
211-
popularKeyCount := 1
211+
popularKeyCount := 0
212+
var key roachpb.Key
212213
for i := 1; i < len(f.samples); i++ {
213214
if bytes.Equal(f.samples[i].key, f.samples[i-1].key) {
214215
currentKeyCount++
215216
} else {
216217
currentKeyCount = 1
217218
}
218219
if popularKeyCount < currentKeyCount {
220+
key = f.samples[i].key
219221
popularKeyCount = currentKeyCount
220222
}
221223
}
222224

223-
return float64(popularKeyCount) / float64(splitKeySampleSize)
225+
return PopularKey{
226+
Key: key,
227+
Frequency: float64(popularKeyCount) / float64(splitKeySampleSize),
228+
}
224229
}
225230

226231
// SafeFormat implements the redact.SafeFormatter interface.

‎pkg/kv/kvserver/split/unweighted_finder_test.go

+18-15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package split
88
import (
99
"bytes"
1010
"context"
11+
"fmt"
1112
"math/rand"
1213
"reflect"
1314
"testing"
@@ -381,23 +382,25 @@ func TestFinderPopularKeyFrequency(t *testing.T) {
381382
}
382383
}
383384

384-
testCases := []struct {
385+
randSource := rand.New(rand.NewSource(2022))
386+
for i, test := range []struct {
385387
samples [splitKeySampleSize]sample
388+
expectedPopularKey roachpb.Key
386389
expectedPopularKeyFrequency float64
387390
}{
388-
{uniqueKeySample, 0.05},
389-
{twentyPercentPopularKeySample, 0.2},
390-
{twentyFivePercentPopularKeySample, 0.25},
391-
{fiftyPercentPopularKeySample, 0.5},
392-
{fiftyFivePercentPopularKeySample, 0.55},
393-
{sameKeySample, 1},
394-
}
395-
396-
randSource := rand.New(rand.NewSource(2022))
397-
for i, test := range testCases {
398-
finder := NewUnweightedFinder(timeutil.Now(), randSource)
399-
finder.samples = test.samples
400-
popularKeyFrequency := finder.PopularKeyFrequency()
401-
assert.Equal(t, test.expectedPopularKeyFrequency, popularKeyFrequency, "unexpected popular key frequency in test %d", i)
391+
{uniqueKeySample, keys.SystemSQLCodec.TablePrefix(1), 0.05},
392+
{twentyPercentPopularKeySample, keys.SystemSQLCodec.TablePrefix(6), 0.2},
393+
{twentyFivePercentPopularKeySample, keys.SystemSQLCodec.TablePrefix(2), 0.25},
394+
{fiftyPercentPopularKeySample, keys.SystemSQLCodec.TablePrefix(0), 0.5},
395+
{fiftyFivePercentPopularKeySample, keys.SystemSQLCodec.TablePrefix(0), 0.55},
396+
{sameKeySample, keys.SystemSQLCodec.TablePrefix(0), 1},
397+
} {
398+
t.Run(fmt.Sprintf("popular key test %d", i), func(t *testing.T) {
399+
finder := NewUnweightedFinder(timeutil.Now(), randSource)
400+
finder.samples = test.samples
401+
popularKey := finder.PopularKey()
402+
assert.Equal(t, test.expectedPopularKey, popularKey.Key, "unexpected popular key in test %d", i)
403+
assert.Equal(t, test.expectedPopularKeyFrequency, popularKey.Frequency, "unexpected popular key frequency in test %d", i)
404+
})
402405
}
403406
}

‎pkg/kv/kvserver/split/weighted_finder.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,8 @@ func (f *WeightedFinder) NoSplitKeyCauseLogMsg() redact.RedactableString {
252252
insufficientCounters, imbalance)
253253
}
254254

255-
// PopularKeyFrequency implements the LoadBasedSplitter interface.
256-
func (f *WeightedFinder) PopularKeyFrequency() float64 {
255+
// PopularKey implements the LoadBasedSplitter interface.
256+
func (f *WeightedFinder) PopularKey() PopularKey {
257257
// Sort the sample slice to determine the frequency that a popular key
258258
// appears. We could copy the slice, however it would require an allocation.
259259
// The probability a sample is replaced doesn't change as it is independent
@@ -263,6 +263,7 @@ func (f *WeightedFinder) PopularKeyFrequency() float64 {
263263
})
264264

265265
weight := f.samples[0].weight
266+
key := f.samples[0].key
266267
currentKeyWeight := weight
267268
popularKeyWeight := weight
268269
totalWeight := weight
@@ -274,12 +275,16 @@ func (f *WeightedFinder) PopularKeyFrequency() float64 {
274275
currentKeyWeight = weight
275276
}
276277
if popularKeyWeight < currentKeyWeight {
278+
key = f.samples[i].key
277279
popularKeyWeight = currentKeyWeight
278280
}
279281
totalWeight += weight
280282
}
281283

282-
return popularKeyWeight / totalWeight
284+
return PopularKey{
285+
Key: key,
286+
Frequency: popularKeyWeight / totalWeight,
287+
}
283288
}
284289

285290
// SafeFormat implements the redact.SafeFormatter interface.

‎pkg/kv/kvserver/split/weighted_finder_test.go

+11-9
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ func TestWeightedFinderNoSplitKeyCause(t *testing.T) {
335335
assert.Equal(t, 13, imbalance, "unexpected imbalance counters")
336336
}
337337

338-
func TestWeightedFinderPopularKeyFrequency(t *testing.T) {
338+
func TestWeightedFinderPopularKey(t *testing.T) {
339339
uniqueKeyUnweightedSample := [splitKeySampleSize]weightedSample{}
340340
for i, idx := range rand.Perm(splitKeySampleSize) {
341341
uniqueKeyUnweightedSample[idx] = weightedSample{
@@ -387,22 +387,24 @@ func TestWeightedFinderPopularKeyFrequency(t *testing.T) {
387387
const eps = 1e-3
388388
testCases := []struct {
389389
samples [splitKeySampleSize]weightedSample
390+
expectedPopularKey roachpb.Key
390391
expectedPopularKeyFrequency float64
391392
}{
392-
{uniqueKeyUnweightedSample, 1.0 / 20.0},
393-
{uniqueKeyWeightedSample, 20.0 / 210.0}, // 20/(1+2+...+20)
394-
{duplicateKeyUnweightedSample, 5.0 / 20.0},
395-
{duplicateKeyWeightedSample, 84.0 / 210.0}, // (9+10+...+15)/(1+2+...+20)
396-
{sameKeySample, 1},
393+
{uniqueKeyUnweightedSample, keys.SystemSQLCodec.TablePrefix(uint32(0)), 1.0 / 20.0},
394+
{uniqueKeyWeightedSample, keys.SystemSQLCodec.TablePrefix(uint32(19)), 20.0 / 210.0}, // 20/(1+2+...+20)
395+
{duplicateKeyUnweightedSample, keys.SystemSQLCodec.TablePrefix(uint32(2)), 5.0 / 20.0},
396+
{duplicateKeyWeightedSample, keys.SystemSQLCodec.TablePrefix(uint32(2)), 84.0 / 210.0}, // (9+10+...+15)/(1+2+...+20)
397+
{sameKeySample, keys.SystemSQLCodec.TablePrefix(uint32(0)), 1},
397398
}
398399

399400
randSource := rand.New(rand.NewSource(2022))
400401
for i, test := range testCases {
401402
weightedFinder := NewWeightedFinder(timeutil.Now(), randSource)
402403
weightedFinder.samples = test.samples
403-
popularKeyFrequency := weightedFinder.PopularKeyFrequency()
404-
assert.True(t, math.Abs(test.expectedPopularKeyFrequency-popularKeyFrequency) < eps,
404+
popularKey := weightedFinder.PopularKey()
405+
assert.Equal(t, test.expectedPopularKey, popularKey.Key)
406+
assert.True(t, math.Abs(test.expectedPopularKeyFrequency-popularKey.Frequency) < eps,
405407
"%d: expected popular key frequency %f, got %f",
406-
i, test.expectedPopularKeyFrequency, popularKeyFrequency)
408+
i, test.expectedPopularKeyFrequency, popularKey.Frequency)
407409
}
408410
}

0 commit comments

Comments
 (0)
Please sign in to comment.