-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizer.go
62 lines (53 loc) · 1.28 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package main
import (
"bufio"
"strings"
"unicode"
"unicode/utf8"
)
func isParan(r rune) bool {
return r == ')' || r == '('
}
func ScanTokens(data []byte, atEOF bool) (advance int, token []byte, err error) {
// Skip leading spaces.
start := 0
var r rune
var width int
for width = 0; start < len(data); start += width {
r, width = utf8.DecodeRune(data[start:])
if !unicode.IsSpace(r) {
break
}
}
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if isParan(r) {
return start + width, data[start:(start + 1)], nil
}
// Scan until space or paren and return the word.
for width, i := 0, start; i < len(data); i += width {
var r rune
r, width = utf8.DecodeRune(data[i:])
if unicode.IsSpace(r) {
return i + width, data[start:i], nil
}
if isParan(r) {
// Not adding i + width because we want to keep Parenthesis back in
// the buffer for the next token.
return i, data[start:i], nil
}
}
// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
if atEOF && len(data) > start {
return len(data), data[start:], nil
}
// Request more data.
return 0, nil, nil
}
func NewLispScanner(str string) *bufio.Scanner {
reader := strings.NewReader(str)
scanner := bufio.NewScanner(reader)
scanner.Split(ScanTokens)
return scanner
}