-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* symbol scanner: ignore symbols inside for loops * lexer/scanner: pass invalid input through token stream w/o error * lex invalid input gracefully
- Loading branch information
Showing
7 changed files
with
285 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
package gmars | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
) | ||
|
||
// symbol scanner accepts a tokenReader and scans for any | ||
// equ symbols contained. Symbols defined inside for loops | ||
// are ignored, allowing us to run the same code both before | ||
// and after for loops have been expanded. | ||
type symbolScanner struct { | ||
lex tokenReader | ||
|
||
nextToken token | ||
atEOF bool | ||
valBuf []token | ||
labelBuf []string | ||
forLevel int | ||
err error | ||
|
||
symbols map[string][]token | ||
} | ||
|
||
type scanStateFn func(p *symbolScanner) scanStateFn | ||
|
||
func newSymbolScanner(lex tokenReader) *symbolScanner { | ||
pre := &symbolScanner{ | ||
lex: lex, | ||
symbols: make(map[string][]token), | ||
} | ||
|
||
pre.next() | ||
|
||
return pre | ||
} | ||
|
||
func (p *symbolScanner) next() token { | ||
if p.atEOF { | ||
return token{typ: tokEOF} | ||
} | ||
tok, err := p.lex.NextToken() | ||
if err != nil { | ||
p.atEOF = true | ||
return token{tokError, fmt.Sprintf("%s\n", err)} | ||
} | ||
if tok.typ == tokEOF || tok.typ == tokError { | ||
p.atEOF = true | ||
} | ||
retTok := p.nextToken | ||
p.nextToken = tok | ||
return retTok | ||
} | ||
|
||
// run the preprocessor | ||
func (p *symbolScanner) ScanInput() (map[string][]token, error) { | ||
for state := scanLine; state != nil; { | ||
state = state(p) | ||
} | ||
if p.err != nil { | ||
return nil, p.err | ||
} | ||
return p.symbols, nil | ||
} | ||
|
||
func (p *symbolScanner) consume(nextState scanStateFn) scanStateFn { | ||
p.next() | ||
if p.nextToken.typ == tokEOF { | ||
return nil | ||
} | ||
return nextState | ||
} | ||
|
||
// run at start of each line | ||
// on text: preLabels | ||
// on other: preConsumeLine | ||
func scanLine(p *symbolScanner) scanStateFn { | ||
switch p.nextToken.typ { | ||
case tokText: | ||
p.labelBuf = make([]string, 0) | ||
return scanLabels | ||
default: | ||
return scanConsumeLine | ||
} | ||
} | ||
|
||
// text equ: consumeValue | ||
// text op: consumLine | ||
// text default: scanLabels | ||
// anything else: consumeLine | ||
func scanLabels(p *symbolScanner) scanStateFn { | ||
switch p.nextToken.typ { | ||
case tokText: | ||
if p.nextToken.IsPseudoOp() { | ||
opLower := strings.ToLower(p.nextToken.val) | ||
switch opLower { | ||
case "equ": | ||
if p.forLevel == 0 { | ||
p.valBuf = make([]token, 0) | ||
return p.consume(scanEquValue) | ||
} | ||
case "for": | ||
p.forLevel++ | ||
return scanConsumeLine | ||
case "rof": | ||
if p.forLevel > 0 { | ||
p.forLevel-- | ||
} | ||
return scanConsumeLine | ||
case "end": | ||
if p.forLevel > 1 { | ||
return scanConsumeLine | ||
} else { | ||
return nil | ||
} | ||
default: | ||
return scanConsumeLine | ||
} | ||
} else if p.nextToken.IsOp() { | ||
return scanConsumeLine | ||
} else if p.nextToken.typ == tokInvalid { | ||
return nil | ||
} | ||
p.labelBuf = append(p.labelBuf, p.nextToken.val) | ||
return p.consume(scanLabels) | ||
case tokComment: | ||
fallthrough | ||
case tokNewline: | ||
return p.consume(scanLabels) | ||
case tokEOF: | ||
return nil | ||
default: | ||
return scanConsumeLine | ||
} | ||
} | ||
|
||
func scanConsumeLine(p *symbolScanner) scanStateFn { | ||
switch p.nextToken.typ { | ||
case tokNewline: | ||
return p.consume(scanLine) | ||
case tokError: | ||
return nil | ||
case tokEOF: | ||
return nil | ||
default: | ||
return p.consume(scanConsumeLine) | ||
} | ||
} | ||
|
||
func scanEquValue(p *symbolScanner) scanStateFn { | ||
for p.nextToken.typ != tokNewline && p.nextToken.typ != tokEOF && p.nextToken.typ != tokError { | ||
p.valBuf = append(p.valBuf, p.nextToken) | ||
p.next() | ||
} | ||
for _, label := range p.labelBuf { | ||
_, ok := p.symbols[label] | ||
if ok { | ||
p.err = fmt.Errorf("symbol '%s' redefined", label) | ||
return nil | ||
} | ||
p.symbols[label] = p.valBuf | ||
} | ||
p.valBuf = make([]token, 0) | ||
p.labelBuf = make([]string, 0) | ||
return p.consume(scanLine) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package gmars | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
type symbolScannerTestCase struct { | ||
input string | ||
output map[string][]token | ||
} | ||
|
||
func runSymbolScannerTests(t *testing.T, cases []symbolScannerTestCase) { | ||
for _, test := range cases { | ||
tokens, err := LexInput(strings.NewReader(test.input)) | ||
require.NoError(t, err) | ||
require.NotNil(t, tokens) | ||
|
||
scanner := newSymbolScanner(newBufTokenReader(tokens)) | ||
symbols, err := scanner.ScanInput() | ||
require.NoError(t, err) | ||
require.NotNil(t, symbols) | ||
|
||
require.Equal(t, test.output, symbols) | ||
} | ||
} | ||
|
||
func TestSymbolScanner(t *testing.T) { | ||
tests := []symbolScannerTestCase{ | ||
{ | ||
input: "test equ 2\ndat 0, test\n", | ||
output: map[string][]token{ | ||
"test": {{tokNumber, "2"}}, | ||
}, | ||
}, | ||
{ | ||
input: "dat 0, 0", | ||
output: map[string][]token{}, | ||
}, | ||
{ | ||
input: "test\ntest2\nequ 2", | ||
output: map[string][]token{ | ||
"test": {{tokNumber, "2"}}, | ||
"test2": {{tokNumber, "2"}}, | ||
}, | ||
}, | ||
{ | ||
// ignore symbols inside for loops because they could be redifined. | ||
// will just re-scan after expanding for loops | ||
input: "test equ 2\nfor 0\nq equ 1\nrof\nfor 1\nq equ 2\nrof\n", | ||
output: map[string][]token{ | ||
"test": {{tokNumber, "2"}}, | ||
}, | ||
}, | ||
{ | ||
input: "for 1\nend\nrof\n ~", | ||
output: map[string][]token{}, | ||
}, | ||
} | ||
runSymbolScannerTests(t, tests) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package gmars | ||
|
||
import "fmt" | ||
|
||
// butTokenReader implements the same interface as a streaming parser to let | ||
// us cache and reuse the token stream instead of making multiple passes with | ||
// the lexer | ||
type bufTokenReader struct { | ||
tokens []token | ||
i int | ||
} | ||
|
||
func newBufTokenReader(tokens []token) *bufTokenReader { | ||
return &bufTokenReader{tokens: tokens} | ||
} | ||
|
||
func (r *bufTokenReader) NextToken() (token, error) { | ||
if r.i >= len(r.tokens) { | ||
return token{}, fmt.Errorf("no more tokens") | ||
} | ||
next := r.tokens[r.i] | ||
r.i++ | ||
return next, nil | ||
} | ||
|
||
func (r *bufTokenReader) Tokens() ([]token, error) { | ||
if r.i >= len(r.tokens) { | ||
return nil, fmt.Errorf("no more tokens") | ||
} | ||
subslice := r.tokens[r.i:] | ||
ret := make([]token, len(subslice)) | ||
copy(subslice, ret) | ||
return ret, nil | ||
} |