Skip to content

Commit 17c1bc6

Browse files
authored
Eliminate lexer goroutines (miekg#792)
* Eliminate zlexer goroutine This replaces the zlexer goroutine and channels with a zlexer struct that maintains state and provides a channel-like API. * Eliminate klexer goroutine This replaces the klexer goroutine and channels with a klexer struct that maintains state and provides a channel-like API. * Merge scan into zlexer and klexer This does result in tokenText existing twice, but it's pretty simple and small so it's not that bad. * Avoid using text/scanner.Position to track position * Track escape within zlexer.Next * Avoid zl.commt check on space and tab in zlexer * Track stri within zlexer.Next * Track comi within zlexer.Next There is one special case at the start of a comment that needs to be handled, otherwise this is as simple as stri was. * Use a single token buffer in zlexer This is safe as there is never both a non-empty string buffer and a non-empty comment buffer. * Don't hardcode length of zl.tok in zlexer * Eliminate lex.length field This is always set to len(l.token) and is only queried in a few places. It was added in 47cc5b0 without any obvious need. * Add whitespace to klexer.Next * Track lex within klexer.Next * Use a strings.Builder in klexer.Next * Simplify : case in klexer.Next * Add whitespace to zlexer.Next * Change for loop style in zlexer.Next and klexer.Next * Surface read errors in zlexer * Surface read errors from klexer * Remove debug line from parseKey * Rename tokenText to readByte * Make readByte return ok bool Also change the for loop style to match the Next for loops. * Make readByte errors sticky klexer.Next calls readByte separately from within the loop. Without readByte being sticky, an error that occurs during that readByte call may be lost. * Panic in testRR if the error is non-nil * Add whitespace and unify field setting in zlexer.Next * Remove eof fields from zlexer and klexer With readByte having sticky errors, this no longer needed. zl.eof = true was also in the wrong place and could mask an unbalanced brace error. * Merge zl.tok blocks in zlexer.Next * Split the tok buffer into separate string and comment buffers The invariant of stri > 0 && comi > 0 never being true was broken when x == '\n' && !zl.quote && zl.commt && zl.brace != 0 (the "If not in a brace this ends the comment AND the RR" block). Split the buffer back out into two separate buffers to avoid clobbering. * Replace token slices with arrays in zlexer * Add a NewRR benchmark * Move token buffers into zlexer.Next These don't need to be retained across Next calls and can be stack allocated inside Next. This drastically reduces memory consumption as they accounted for nearly half of all the memory used. name old alloc/op new alloc/op delta NewRR-12 9.72kB ± 0% 4.98kB ± 0% -48.72% (p=0.000 n=10+10) * Add a ReadRR benchmark Unlike NewRR, this will use an io.Reader that does not implement any methods aside from Read. In particular it does not implement io.ByteReader. * Avoid using a bufio.Reader for io.ByteReader readers At the same time use a smaller buffer size of 1KiB rather than the bufio.NewReader default of 4KiB. name old time/op new time/op delta NewRR-12 11.0µs ± 3% 9.5µs ± 2% -13.77% (p=0.000 n=9+10) ReadRR-12 11.2µs ±16% 9.8µs ± 1% -13.03% (p=0.000 n=10+10) name old alloc/op new alloc/op delta NewRR-12 4.98kB ± 0% 0.81kB ± 0% -83.79% (p=0.000 n=10+10) ReadRR-12 4.87kB ± 0% 1.82kB ± 0% -62.73% (p=0.000 n=10+10) name old allocs/op new allocs/op delta NewRR-12 19.0 ± 0% 17.0 ± 0% -10.53% (p=0.000 n=10+10) ReadRR-12 19.0 ± 0% 19.0 ± 0% ~ (all equal) ReadRR-12 11.2µs ±16% 9.8µs ± 1% -13.03% (p=0.000 n=10+10) * Surface any remaining comment from zlexer.Next * Improve comment handling in zlexer.Next This both fixes a regression where comments could be lost under certain circumstances and now emits comments that occur within braces. * Remove outdated comment from zlexer.Next and klexer.Next * Delay converting LF to space in braced comment * Fixup TestParseZoneComments * Remove tokenUpper field from lex Not computing this for every token, and instead only when needed is a substantial performance improvement. name old time/op new time/op delta NewRR-12 9.56µs ± 0% 6.30µs ± 1% -34.08% (p=0.000 n=9+10) ReadRR-12 9.93µs ± 1% 6.67µs ± 1% -32.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta NewRR-12 824B ± 0% 808B ± 0% -1.94% (p=0.000 n=10+10) ReadRR-12 1.83kB ± 0% 1.82kB ± 0% -0.87% (p=0.000 n=10+10) name old allocs/op new allocs/op delta NewRR-12 17.0 ± 0% 17.0 ± 0% ~ (all equal) ReadRR-12 19.0 ± 0% 19.0 ± 0% ~ (all equal) * Update ParseZone documentation to match comment changes The zlexer code was changed to return comments more often, so update the ParseZone documentation to match.
1 parent 4a9ca7e commit 17c1bc6

10 files changed

+975
-671
lines changed

dnssec_keyscan.go

+113-50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package dns
22

33
import (
4+
"bufio"
45
"crypto"
56
"crypto/dsa"
67
"crypto/ecdsa"
@@ -194,23 +195,12 @@ func readPrivateKeyED25519(m map[string]string) (ed25519.PrivateKey, error) {
194195
// parseKey reads a private key from r. It returns a map[string]string,
195196
// with the key-value pairs, or an error when the file is not correct.
196197
func parseKey(r io.Reader, file string) (map[string]string, error) {
197-
s, cancel := scanInit(r)
198198
m := make(map[string]string)
199-
c := make(chan lex)
200-
k := ""
201-
defer func() {
202-
cancel()
203-
// zlexer can send up to two tokens, the next one and possibly 1 remainders.
204-
// Do a non-blocking read.
205-
_, ok := <-c
206-
_, ok = <-c
207-
if !ok {
208-
// too bad
209-
}
210-
}()
211-
// Start the lexer
212-
go klexer(s, c)
213-
for l := range c {
199+
var k string
200+
201+
c := newKLexer(r)
202+
203+
for l, ok := c.Next(); ok; l, ok = c.Next() {
214204
// It should alternate
215205
switch l.value {
216206
case zKey:
@@ -219,66 +209,139 @@ func parseKey(r io.Reader, file string) (map[string]string, error) {
219209
if k == "" {
220210
return nil, &ParseError{file, "no private key seen", l}
221211
}
222-
//println("Setting", strings.ToLower(k), "to", l.token, "b")
212+
223213
m[strings.ToLower(k)] = l.token
224214
k = ""
225215
}
226216
}
217+
218+
// Surface any read errors from r.
219+
if err := c.Err(); err != nil {
220+
return nil, &ParseError{file: file, err: err.Error()}
221+
}
222+
227223
return m, nil
228224
}
229225

230-
// klexer scans the sourcefile and returns tokens on the channel c.
231-
func klexer(s *scan, c chan lex) {
232-
var l lex
233-
str := "" // Hold the current read text
234-
commt := false
235-
key := true
236-
x, err := s.tokenText()
237-
defer close(c)
238-
for err == nil {
239-
l.column = s.position.Column
240-
l.line = s.position.Line
226+
type klexer struct {
227+
br io.ByteReader
228+
229+
readErr error
230+
231+
line int
232+
column int
233+
234+
key bool
235+
236+
eol bool // end-of-line
237+
}
238+
239+
func newKLexer(r io.Reader) *klexer {
240+
br, ok := r.(io.ByteReader)
241+
if !ok {
242+
br = bufio.NewReaderSize(r, 1024)
243+
}
244+
245+
return &klexer{
246+
br: br,
247+
248+
line: 1,
249+
250+
key: true,
251+
}
252+
}
253+
254+
func (kl *klexer) Err() error {
255+
if kl.readErr == io.EOF {
256+
return nil
257+
}
258+
259+
return kl.readErr
260+
}
261+
262+
// readByte returns the next byte from the input
263+
func (kl *klexer) readByte() (byte, bool) {
264+
if kl.readErr != nil {
265+
return 0, false
266+
}
267+
268+
c, err := kl.br.ReadByte()
269+
if err != nil {
270+
kl.readErr = err
271+
return 0, false
272+
}
273+
274+
// delay the newline handling until the next token is delivered,
275+
// fixes off-by-one errors when reporting a parse error.
276+
if kl.eol {
277+
kl.line++
278+
kl.column = 0
279+
kl.eol = false
280+
}
281+
282+
if c == '\n' {
283+
kl.eol = true
284+
} else {
285+
kl.column++
286+
}
287+
288+
return c, true
289+
}
290+
291+
func (kl *klexer) Next() (lex, bool) {
292+
var (
293+
l lex
294+
295+
str strings.Builder
296+
297+
commt bool
298+
)
299+
300+
for x, ok := kl.readByte(); ok; x, ok = kl.readByte() {
301+
l.line, l.column = kl.line, kl.column
302+
241303
switch x {
242304
case ':':
243-
if commt {
305+
if commt || !kl.key {
244306
break
245307
}
246-
l.token = str
247-
if key {
248-
l.value = zKey
249-
c <- l
250-
// Next token is a space, eat it
251-
s.tokenText()
252-
key = false
253-
str = ""
254-
} else {
255-
l.value = zValue
256-
}
308+
309+
kl.key = false
310+
311+
// Next token is a space, eat it
312+
kl.readByte()
313+
314+
l.value = zKey
315+
l.token = str.String()
316+
return l, true
257317
case ';':
258318
commt = true
259319
case '\n':
260320
if commt {
261321
// Reset a comment
262322
commt = false
263323
}
324+
325+
kl.key = true
326+
264327
l.value = zValue
265-
l.token = str
266-
c <- l
267-
str = ""
268-
commt = false
269-
key = true
328+
l.token = str.String()
329+
return l, true
270330
default:
271331
if commt {
272332
break
273333
}
274-
str += string(x)
334+
335+
str.WriteByte(x)
275336
}
276-
x, err = s.tokenText()
277337
}
278-
if len(str) > 0 {
338+
339+
if str.Len() > 0 {
279340
// Send remainder
280-
l.token = str
281341
l.value = zValue
282-
c <- l
342+
l.token = str.String()
343+
return l, true
283344
}
345+
346+
return lex{value: zEOF}, false
284347
}

dnssec_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -846,3 +846,13 @@ func TestRsaExponentUnpack(t *testing.T) {
846846
t.Fatalf("cannot verify RRSIG with keytag [%d]. Cause [%s]", ksk.KeyTag(), e.Error())
847847
}
848848
}
849+
850+
func TestParseKeyReadError(t *testing.T) {
851+
m, err := parseKey(errReader{}, "")
852+
if err == nil || !strings.Contains(err.Error(), errTestReadError.Error()) {
853+
t.Errorf("expected error to contain %q, but got %v", errTestReadError, err)
854+
}
855+
if m != nil {
856+
t.Errorf("expected a nil map, but got %v", m)
857+
}
858+
}

generate.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
// of $ after that are interpreted.
2121
// Any error are returned as a string value, the empty string signals
2222
// "no error".
23-
func generate(l lex, c chan lex, t chan *Token, o string) string {
23+
func generate(l lex, c *zlexer, t chan *Token, o string) string {
2424
step := 1
2525
if i := strings.IndexAny(l.token, "/"); i != -1 {
2626
if i+1 == len(l.token) {
@@ -52,11 +52,11 @@ func generate(l lex, c chan lex, t chan *Token, o string) string {
5252
return "bad range in $GENERATE range"
5353
}
5454

55-
<-c // _BLANK
55+
c.Next() // _BLANK
5656
// Create a complete new string, which we then parse again.
5757
s := ""
5858
BuildRR:
59-
l = <-c
59+
l, _ = c.Next()
6060
if l.value != zNewline && l.value != zEOF {
6161
s += l.token
6262
goto BuildRR
@@ -107,7 +107,7 @@ BuildRR:
107107
mod, offset, err = modToPrintf(s[j+2 : j+2+sep])
108108
if err != nil {
109109
return err.Error()
110-
} else if start + offset < 0 || end + offset > 1<<31-1 {
110+
} else if start+offset < 0 || end+offset > 1<<31-1 {
111111
return "bad offset in $GENERATE"
112112
}
113113
j += 2 + sep // Jump to it

0 commit comments

Comments
 (0)