From c1a26629c417cfc2e65f63d7513fbfc76a2b1dee Mon Sep 17 00:00:00 2001 From: Robert Lowry Date: Wed, 20 Nov 2024 21:54:24 -0600 Subject: [PATCH] Add bufTokenReader, various 94 parser fixes (#84) * lex: add bufTokenReader to recycle lexed token buffer * fix index panic in 94 loading empty strategy lines * cmd/compile_test: add exception for nop * fix (some) end label line values * add compile_test to .gitignore --- .gitignore | 1 + asm.go | 2 ++ cmd/compile_test/main.go | 8 ++++++++ compile.go | 4 ++-- lex.go | 43 ++++++++++++++++++++++++++++++++++++++++ lex_test.go | 13 ++++++++++++ load_test.go | 14 +++++++++++++ parser.go | 8 +++++--- 8 files changed, 88 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 30b0338..4859eab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /vmars /gmars +/compile_test diff --git a/asm.go b/asm.go index 345da89..83c6c0b 100644 --- a/asm.go +++ b/asm.go @@ -65,6 +65,8 @@ func (o OpCode) String() string { return "DJN" case SPL: return "SPL" + case NOP: + return "NOP" default: return "???" } diff --git a/cmd/compile_test/main.go b/cmd/compile_test/main.go index ed6af60..1491390 100644 --- a/cmd/compile_test/main.go +++ b/cmd/compile_test/main.go @@ -108,6 +108,14 @@ func main() { instructionsMatch := true for i, inst := range in.Code { if expected.Code[i] != inst { + if inst.Op == gmars.NOP { + ex := expected.Code[i] + if inst.OpMode == gmars.B && ex.OpMode == gmars.F { + if inst.Op == ex.Op && inst.AMode == ex.AMode && inst.A == ex.A && inst.BMode == ex.BMode && inst.B == ex.B { + continue + } + } + } fmt.Printf("%s: instruction mismatch: '%s' != '%s'\n", inPath, inst, expected.Code[i]) instructionsMatch = false } diff --git a/compile.go b/compile.go index 9cd03eb..2242098 100644 --- a/compile.go +++ b/compile.go @@ -78,7 +78,7 @@ func (c *compiler) loadSymbols() { func (c *compiler) reloadReferences() error { c.labels = make(map[string]int) - curPseudoLine := 1 + var curPseudoLine int for _, line := range c.lines { if line.typ == lineInstruction { for _, label := range line.labels { @@ -87,7 +87,7 @@ func (c *compiler) reloadReferences() error { return fmt.Errorf("line %d: label '%s' redefined", line.line, label) } c.labels[label] = line.codeLine - curPseudoLine++ + curPseudoLine = line.codeLine + 1 } } else if line.typ == linePseudoOp { for _, label := range line.labels { diff --git a/lex.go b/lex.go index b86dd62..760f297 100644 --- a/lex.go +++ b/lex.go @@ -7,6 +7,13 @@ import ( "unicode" ) +// tokenReader defines an interface shared between the stream based lexer +// and a bufTokenReader to cache tokens in memory. +type tokenReader interface { + NextToken() (token, error) + Tokens() ([]token, error) +} + type lexer struct { reader *bufio.Reader nextRune rune @@ -15,6 +22,37 @@ type lexer struct { tokens chan token } +// butTokenReader implements the same interface as a streaming parser to let +// us cache and reuse the token stream instead of making multiple passes with +// the lexer +type bufTokenReader struct { + tokens []token + i int +} + +func newBufTokenReader(tokens []token) *bufTokenReader { + return &bufTokenReader{tokens: tokens} +} + +func (r *bufTokenReader) NextToken() (token, error) { + if r.i >= len(r.tokens) { + return token{}, fmt.Errorf("no more tokens") + } + next := r.tokens[r.i] + r.i++ + return next, nil +} + +func (r *bufTokenReader) Tokens() ([]token, error) { + if r.i >= len(r.tokens) { + return nil, fmt.Errorf("no more tokens") + } + subslice := r.tokens[r.i:] + ret := make([]token, len(subslice)) + copy(subslice, ret) + return ret, nil +} + type lexStateFn func(l *lexer) lexStateFn func newLexer(r io.Reader) *lexer { @@ -221,3 +259,8 @@ func lexComment(l *lexer) lexStateFn { l.tokens <- token{typ: tokComment, val: string(commentBuf)} return lexInput } + +func LexInput(r io.Reader) ([]token, error) { + lexer := newLexer(r) + return lexer.Tokens() +} diff --git a/lex_test.go b/lex_test.go index 23621e8..1b10660 100644 --- a/lex_test.go +++ b/lex_test.go @@ -145,3 +145,16 @@ func TestLexEnd(t *testing.T) { assert.True(t, eof) assert.Equal(t, r, '\x00') } + +func TestBufTokenReader(t *testing.T) { + in := strings.NewReader("dat 0, 0\n") + lexer := newLexer(in) + tokens, err := lexer.Tokens() + require.NoError(t, err) + + bReader := newBufTokenReader(tokens) + bTokens, err := bReader.Tokens() + require.NoError(t, err) + + require.Equal(t, tokens, bTokens) +} diff --git a/load_test.go b/load_test.go index ebc03f1..ed84ef6 100644 --- a/load_test.go +++ b/load_test.go @@ -81,6 +81,20 @@ func TestValidInput(t *testing.T) { } } +func TestValidInput94(t *testing.T) { + // random inputs that are valid but not worth validating output + cases := []string{ + "ADD.BA $ 1, $ 1\n", + } + + config := ConfigNOP94 + for i, testCase := range cases { + reader := strings.NewReader(testCase) + _, err := ParseLoadFile(reader, config) + assert.NoError(t, err, "test: %d' '%s'", i, testCase) + } +} + func TestInvalidInput(t *testing.T) { // random inputs that will throw an error cases := []string{ diff --git a/parser.go b/parser.go index a060b23..734cd6b 100644 --- a/parser.go +++ b/parser.go @@ -50,7 +50,7 @@ func (line sourceLine) subSymbol(label string, value []token) sourceLine { } type parser struct { - lex *lexer + lex tokenReader // state for the running parser nextToken token @@ -71,7 +71,7 @@ type parser struct { references map[string]int } -func newParser(lex *lexer) *parser { +func newParser(lex tokenReader) *parser { p := &parser{ lex: lex, symbols: make(map[string]int), @@ -199,7 +199,9 @@ func parseLine(p *parser) parseStateFn { } else if strings.HasPrefix(p.nextToken.val, ";author") { p.metadata.Author = strings.TrimSpace(p.nextToken.val[7:]) } else if strings.HasPrefix(p.nextToken.val, ";strategy") { - p.metadata.Strategy += p.nextToken.val[10:] + "\n" + if len(p.nextToken.val) > 10 { + p.metadata.Strategy += p.nextToken.val[10:] + "\n" + } } p.currentLine.typ = lineComment return parseComment