Skip to content

Commit

Permalink
implement symbol scanner (#87)
Browse files Browse the repository at this point in the history
* symbol scanner: ignore symbols inside for loops

* lexer/scanner: pass invalid input through token stream w/o error

* lex invalid input gracefully
  • Loading branch information
bobertlo authored Nov 23, 2024
1 parent 949368f commit 6f9d992
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 34 deletions.
13 changes: 12 additions & 1 deletion compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,18 @@ func (c *compiler) compile() (WarriorData, error) {

func CompileWarrior(r io.Reader, config SimulatorConfig) (WarriorData, error) {
lexer := newLexer(r)
parser := newParser(lexer)
tokens, err := lexer.Tokens()
if err != nil {
return WarriorData{}, err
}

// scanner := newSymbolScanner(newBufTokenReader(tokens))
// _, err = scanner.ScanInput()
// if err != nil {
// return WarriorData{}, fmt.Errorf("symbol scanner: %s", err)
// }

parser := newParser(newBufTokenReader(tokens))
sourceLines, metadata, err := parser.parse()
if err != nil {
return WarriorData{}, err
Expand Down
39 changes: 7 additions & 32 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,6 @@ type lexer struct {
tokens chan token
}

// butTokenReader implements the same interface as a streaming parser to let
// us cache and reuse the token stream instead of making multiple passes with
// the lexer
type bufTokenReader struct {
tokens []token
i int
}

func newBufTokenReader(tokens []token) *bufTokenReader {
return &bufTokenReader{tokens: tokens}
}

func (r *bufTokenReader) NextToken() (token, error) {
if r.i >= len(r.tokens) {
return token{}, fmt.Errorf("no more tokens")
}
next := r.tokens[r.i]
r.i++
return next, nil
}

func (r *bufTokenReader) Tokens() ([]token, error) {
if r.i >= len(r.tokens) {
return nil, fmt.Errorf("no more tokens")
}
subslice := r.tokens[r.i:]
ret := make([]token, len(subslice))
copy(subslice, ret)
return ret, nil
}

type lexStateFn func(l *lexer) lexStateFn

func newLexer(r io.Reader) *lexer {
Expand Down Expand Up @@ -207,7 +176,13 @@ func lexInput(l *lexer) lexStateFn {
case '\x1a':
return l.consume(lexInput)
default:
l.tokens <- token{tokError, fmt.Sprintf("unexpected character: '%s'", string(l.nextRune))}
// we will put this in the stream. if a file is formatted
// properly, and invalid input should be after an 'end'
// pseudo-op which will cause the parser to stop before
// processing this token, otherwise it is an error
l.tokens <- token{tokInvalid, string(l.nextRune)}
l.tokens <- token{typ: tokEOF}
return nil
}

return nil
Expand Down
3 changes: 2 additions & 1 deletion lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ func TestLexer(t *testing.T) {
{
input: "~",
expected: []token{
{tokError, "unexpected character: '~'"},
{tokInvalid, "~"},
{tokEOF, ""},
},
},
{
Expand Down
166 changes: 166 additions & 0 deletions symbol_scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package gmars

import (
"fmt"
"strings"
)

// symbol scanner accepts a tokenReader and scans for any
// equ symbols contained. Symbols defined inside for loops
// are ignored, allowing us to run the same code both before
// and after for loops have been expanded.
type symbolScanner struct {
lex tokenReader

nextToken token
atEOF bool
valBuf []token
labelBuf []string
forLevel int
err error

symbols map[string][]token
}

type scanStateFn func(p *symbolScanner) scanStateFn

func newSymbolScanner(lex tokenReader) *symbolScanner {
pre := &symbolScanner{
lex: lex,
symbols: make(map[string][]token),
}

pre.next()

return pre
}

func (p *symbolScanner) next() token {
if p.atEOF {
return token{typ: tokEOF}
}
tok, err := p.lex.NextToken()
if err != nil {
p.atEOF = true
return token{tokError, fmt.Sprintf("%s\n", err)}
}
if tok.typ == tokEOF || tok.typ == tokError {
p.atEOF = true
}
retTok := p.nextToken
p.nextToken = tok
return retTok
}

// run the preprocessor
func (p *symbolScanner) ScanInput() (map[string][]token, error) {
for state := scanLine; state != nil; {
state = state(p)
}
if p.err != nil {
return nil, p.err
}
return p.symbols, nil
}

func (p *symbolScanner) consume(nextState scanStateFn) scanStateFn {
p.next()
if p.nextToken.typ == tokEOF {
return nil
}
return nextState
}

// run at start of each line
// on text: preLabels
// on other: preConsumeLine
func scanLine(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokText:
p.labelBuf = make([]string, 0)
return scanLabels
default:
return scanConsumeLine
}
}

// text equ: consumeValue
// text op: consumLine
// text default: scanLabels
// anything else: consumeLine
func scanLabels(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokText:
if p.nextToken.IsPseudoOp() {
opLower := strings.ToLower(p.nextToken.val)
switch opLower {
case "equ":
if p.forLevel == 0 {
p.valBuf = make([]token, 0)
return p.consume(scanEquValue)
}
case "for":
p.forLevel++
return scanConsumeLine
case "rof":
if p.forLevel > 0 {
p.forLevel--
}
return scanConsumeLine
case "end":
if p.forLevel > 1 {
return scanConsumeLine
} else {
return nil
}
default:
return scanConsumeLine
}
} else if p.nextToken.IsOp() {
return scanConsumeLine
} else if p.nextToken.typ == tokInvalid {
return nil
}
p.labelBuf = append(p.labelBuf, p.nextToken.val)
return p.consume(scanLabels)
case tokComment:
fallthrough
case tokNewline:
return p.consume(scanLabels)
case tokEOF:
return nil
default:
return scanConsumeLine
}
}

func scanConsumeLine(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokNewline:
return p.consume(scanLine)
case tokError:
return nil
case tokEOF:
return nil
default:
return p.consume(scanConsumeLine)
}
}

func scanEquValue(p *symbolScanner) scanStateFn {
for p.nextToken.typ != tokNewline && p.nextToken.typ != tokEOF && p.nextToken.typ != tokError {
p.valBuf = append(p.valBuf, p.nextToken)
p.next()
}
for _, label := range p.labelBuf {
_, ok := p.symbols[label]
if ok {
p.err = fmt.Errorf("symbol '%s' redefined", label)
return nil
}
p.symbols[label] = p.valBuf
}
p.valBuf = make([]token, 0)
p.labelBuf = make([]string, 0)
return p.consume(scanLine)
}
63 changes: 63 additions & 0 deletions symbol_scanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package gmars

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
)

type symbolScannerTestCase struct {
input string
output map[string][]token
}

func runSymbolScannerTests(t *testing.T, cases []symbolScannerTestCase) {
for _, test := range cases {
tokens, err := LexInput(strings.NewReader(test.input))
require.NoError(t, err)
require.NotNil(t, tokens)

scanner := newSymbolScanner(newBufTokenReader(tokens))
symbols, err := scanner.ScanInput()
require.NoError(t, err)
require.NotNil(t, symbols)

require.Equal(t, test.output, symbols)
}
}

func TestSymbolScanner(t *testing.T) {
tests := []symbolScannerTestCase{
{
input: "test equ 2\ndat 0, test\n",
output: map[string][]token{
"test": {{tokNumber, "2"}},
},
},
{
input: "dat 0, 0",
output: map[string][]token{},
},
{
input: "test\ntest2\nequ 2",
output: map[string][]token{
"test": {{tokNumber, "2"}},
"test2": {{tokNumber, "2"}},
},
},
{
// ignore symbols inside for loops because they could be redifined.
// will just re-scan after expanding for loops
input: "test equ 2\nfor 0\nq equ 1\nrof\nfor 1\nq equ 2\nrof\n",
output: map[string][]token{
"test": {{tokNumber, "2"}},
},
},
{
input: "for 1\nend\nrof\n ~",
output: map[string][]token{},
},
}
runSymbolScannerTests(t, tests)
}
1 change: 1 addition & 0 deletions token.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const (
tokParenR
tokComment // includes semi-colon, no newline char
tokNewline
tokInvalid // pass invalid Runes through individually
tokEOF
)

Expand Down
34 changes: 34 additions & 0 deletions tokenbuf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package gmars

import "fmt"

// butTokenReader implements the same interface as a streaming parser to let
// us cache and reuse the token stream instead of making multiple passes with
// the lexer
type bufTokenReader struct {
tokens []token
i int
}

func newBufTokenReader(tokens []token) *bufTokenReader {
return &bufTokenReader{tokens: tokens}
}

func (r *bufTokenReader) NextToken() (token, error) {
if r.i >= len(r.tokens) {
return token{}, fmt.Errorf("no more tokens")
}
next := r.tokens[r.i]
r.i++
return next, nil
}

func (r *bufTokenReader) Tokens() ([]token, error) {
if r.i >= len(r.tokens) {
return nil, fmt.Errorf("no more tokens")
}
subslice := r.tokens[r.i:]
ret := make([]token, len(subslice))
copy(subslice, ret)
return ret, nil
}

0 comments on commit 6f9d992

Please sign in to comment.