Skip to content

Commit 4893f34

Browse files
committed
x/text/internal/colltab: Sort numbers with leading zeros
Makes numeric sorting deterministic. Particularly useful for approximating how an OS file manager will sort files.
1 parent b5012d1 commit 4893f34

File tree

3 files changed

+58
-24
lines changed

3 files changed

+58
-24
lines changed

Diff for: collate/collate_test.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,15 @@ func TestNumeric(t *testing.T) {
474474
{"A-1", "A-2", -1},
475475
{"A-2", "A-12", -1},
476476
{"A-12", "A-2", 1},
477-
{"A-0001", "A-1", 0},
477+
{"A-0001", "A-1", 1},
478478
{"0000-", "1-", -1},
479-
{"00001", "1", 0},
479+
{"00001", "1", 1},
480480
{"00", "00", 0},
481-
{"0", "00", 0},
482-
{"00", "0", 0},
483-
{"01", "001", 0},
484-
{"01", "1", 0},
485-
{"1", "01", 0},
481+
{"0", "00", -1},
482+
{"00", "0", 1},
483+
{"01", "001", -1},
484+
{"01", "1", 1},
485+
{"1", "01", -1},
486486
{"9-A", "0-A", 1},
487487
{"99-A", "0-A", 1},
488488
{"9-A", "1-A", 1},

Diff for: internal/colltab/numeric.go

+17
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,20 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
8080
}
8181
// ce might have been grown already, so take it instead of buf.
8282
nc.init(ce, len(buf), isZero)
83+
old_index := len(nc.elems)
8384
for n < len(s) {
8485
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
8586
nc.b = s
8687
n += sz
8788
if !nc.update(ce) {
8889
break
8990
}
91+
old_index = len(nc.elems)
9092
}
93+
nc.elems = append(nc.elems, 0)
94+
copy(nc.elems[old_index+1:], nc.elems[old_index:])
95+
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)
96+
9197
return nc.result(), n
9298
}
9399

@@ -105,14 +111,20 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n
105111
return ce, n
106112
}
107113
nc.init(ce, len(buf), isZero)
114+
old_index := len(nc.elems)
108115
for n < len(s) {
109116
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
110117
nc.s = s
111118
n += sz
112119
if !nc.update(ce) {
113120
break
114121
}
122+
old_index = len(nc.elems)
115123
}
124+
nc.elems = append(nc.elems, 0)
125+
copy(nc.elems[old_index+1:], nc.elems[old_index:])
126+
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)
127+
116128
return nc.result(), n
117129
}
118130

@@ -122,6 +134,7 @@ type numberConverter struct {
122134
elems []Elem
123135
nDigits int
124136
lenIndex int
137+
zero int
125138

126139
s string // set if the input was of type string
127140
b []byte // set if the input was of type []byte
@@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
133146
// Insert a marker indicating the start of a number and a placeholder
134147
// for the number of digits.
135148
if isZero {
149+
nc.zero++
136150
elems = append(elems[:oldLen], nc.w.numberStart, 0)
137151
} else {
138152
elems = append(elems, 0, 0)
@@ -217,6 +231,9 @@ const maxDigits = 1<<maxPrimaryBits - 1
217231
func (nc *numberConverter) update(elems []Elem) bool {
218232
isZero, ok := nc.checkNextDigit(elems)
219233
if nc.nDigits == 0 && isZero {
234+
if nc.zero+1 < maxDigits {
235+
nc.zero++
236+
}
220237
return true
221238
}
222239
nc.elems = elems

Diff for: internal/colltab/numeric_test.go

+34-17
Original file line numberDiff line numberDiff line change
@@ -78,36 +78,37 @@ func TestNumericAppendNext(t *testing.T) {
7878
{"a", p(5)},
7979
{"klm", p(99)},
8080
{"aa", p(5, 5)},
81-
{"1", p(120, 2, 101)},
82-
{"0", p(120, 1)},
83-
{"00", p(120, 1)},
84-
{"01", p(120, 2, 101)},
85-
{"0001", p(120, 2, 101)},
86-
{"02", p(120, 2, 102)},
87-
{"10", p(120, 3, 101, 100)},
88-
{"99", p(120, 3, 119, 119)},
89-
{"9999", p(120, 5, 119, 119, 119, 119)},
90-
{"1a", p(120, 2, 101, 5)},
91-
{"0b", p(120, 1, 6)},
92-
{"01c", p(120, 2, 101, 8, 2)},
93-
{"10x", p(120, 3, 101, 100, 200)},
94-
{"99y", p(120, 3, 119, 119, 201)},
95-
{"9999nop", p(120, 5, 119, 119, 119, 119, 121)},
81+
{"1", p(120, 2, 101, 1)},
82+
{"0", p(120, 1, 2)},
83+
{"00", p(120, 1, 3)},
84+
{"01", p(120, 2, 101, 2)},
85+
{"0001", p(120, 2, 101, 4)},
86+
{"02", p(120, 2, 102, 2)},
87+
{"10", p(120, 3, 101, 100, 1)},
88+
{"99", p(120, 3, 119, 119, 1)},
89+
{"9999", p(120, 5, 119, 119, 119, 119, 1)},
90+
{"1a", p(120, 2, 101, 1, 5)},
91+
{"0b", p(120, 1, 2, 6)},
92+
{"01c", p(120, 2, 101, 2, 8, 2)},
93+
{"10x", p(120, 3, 101, 100, 1, 200)},
94+
{"99y", p(120, 3, 119, 119, 1, 201)},
95+
{"9999nop", p(120, 5, 119, 119, 119, 119, 1, 121)},
9696

9797
// Allow follow-up collation elements if they have a zero non-primary.
98-
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
98+
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3, e(1)}},
9999
{
100100
"129",
101101
[]Elem{
102102
e(120), e(4),
103103
e(101, digSec, digTert+1),
104104
e(102, digSec, digTert+3),
105105
e(119, digSec, digTert+1),
106+
e(1),
106107
},
107108
},
108109

109110
// Ensure AppendNext* adds to the given buffer.
110-
{"a10", p(5, 120, 3, 101, 100)},
111+
{"a10", p(5, 120, 3, 101, 100, 1)},
111112
} {
112113
nw := NewNumericWeighter(numWeighter)
113114

@@ -148,6 +149,22 @@ func TestNumericOverflow(t *testing.T) {
148149
}
149150
}
150151

152+
func TestNumericZeroOverflow(t *testing.T) {
153+
manyDigits := strings.Repeat("0", maxDigits+1) + "a"
154+
155+
nw := NewNumericWeighter(numWeighter)
156+
157+
got, n := nw.AppendNextString(nil, manyDigits)
158+
159+
if n != maxDigits+2 { // Zeros after maxDigits-1 are ignored but are still consumed so that a number with leading zeros is ordered after a number with less leading zeros
160+
t.Errorf("n: got %d; want %d", n, maxDigits+2)
161+
}
162+
163+
if got[2].Primary() != maxDigits {
164+
t.Errorf("primary(e[2]): got %d; want %d", got[1].Primary(), maxDigits)
165+
}
166+
}
167+
151168
func TestNumericWeighterAlloc(t *testing.T) {
152169
buf := make([]Elem, 100)
153170
w := NewNumericWeighter(numWeighter)

0 commit comments

Comments
 (0)