Skip to content

Commit 73edbc5

Browse files
feat!: use lexical analyzer instead of regex
* remove IsHeaderErr, IsNoBlankLineErr * add more test cases, update benchmark * update README, LICENSE, comments
1 parent 3d97670 commit 73edbc5

11 files changed

+636
-214
lines changed

Diff for: LICENSE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
The MIT License (MIT)
22

33
Copyright (c) 2021 Muthu Krishnan
4-
Copyright (c) 2021 Matthew Bamber
4+
Copyright (c) 2021 Matthew Bamber, Rene Zbinden, Brandon Buck
55

66
Permission is hereby granted, free of charge, to any person obtaining a copy
77
of this software and associated documentation files (the "Software"), to deal

Diff for: README.md

+10-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
A go parser for [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) messages
44

5-
[![PkgGoDev](https://pkg.go.dev/badge/github.com/conventionalcommit/parser)](https://pkg.go.dev/github.com/conventionalcommit/parser)
5+
[![PkgGoDev](https://pkg.go.dev/badge/github.com/conventionalcommit/parser)](https://pkg.go.dev/github.com/conventionalcommit/parser)![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/conventionalcommit/parser)
66

77
### Usage
88

@@ -42,16 +42,19 @@ commitMsg = &parser.Commit{
4242
*/
4343
```
4444

45-
### Fork
46-
47-
This parser is a fork of [cov-commit-parser](https://github.com/mbamber/cov-commit-parser) by [Matthew Bamber](https://github.com/mbamber/)
48-
4945
### TODO
5046

51-
- [ ] Avoid regex
47+
- [ ] More Test Cases
5248
- [ ] Benchmark
5349

50+
### Attribution
51+
52+
This parser is inspired and forked from
53+
54+
- [cc](https://github.com/zbindenren/cc) by [Rene Zbinden](https://github.com/zbindenren)
55+
- [go-lexer](https://github.com/bbuck/go-lexer) by [Brandon Buck](https://github.com/bbuck)
56+
- [cov-commit-parser](https://github.com/mbamber/cov-commit-parser) by [Matthew Bamber](https://github.com/mbamber)
57+
5458
### License
5559

5660
[MIT License](https://github.com/conventionalcommit/parser/tree/master/LICENSE.md)
57-

Diff for: commit.go

+2
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,12 @@ func newNote(token, value string) Note {
7474
}
7575
}
7676

77+
// Token returns the token of the Footer Note
7778
func (n *Note) Token() string {
7879
return n.token
7980
}
8081

82+
// Value returns the value of the Footer Note
8183
func (n *Note) Value() string {
8284
return n.value
8385
}

Diff for: errors.go

-18
This file was deleted.

Diff for: lexer.go

+190
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
package parser
2+
3+
import (
4+
"strings"
5+
"unicode/utf8"
6+
)
7+
8+
const (
9+
eof rune = -1
10+
tokenChBufSize = 10
11+
runeStackBufSize = 64
12+
)
13+
14+
type stateFunc func(*lexer) stateFunc
15+
16+
type tokenType int
17+
18+
type token struct {
19+
Type tokenType
20+
Value string
21+
Start, End int
22+
}
23+
24+
type lexer struct {
25+
source string
26+
startPos, currentPos int
27+
runeStack []rune
28+
29+
startState stateFunc
30+
tokenCh chan token
31+
32+
err error
33+
errorHandler func(err error)
34+
}
35+
36+
// newLexer creates a returns a lexer ready to parse the given source code.
37+
func newLexer(src string, start stateFunc, errHand func(err error)) *lexer {
38+
return &lexer{
39+
source: src,
40+
startState: start,
41+
startPos: 0,
42+
currentPos: 0,
43+
errorHandler: errHand,
44+
runeStack: make([]rune, runeStackBufSize),
45+
}
46+
}
47+
48+
func (l *lexer) Start() {
49+
l.tokenCh = make(chan token, tokenChBufSize)
50+
51+
go l.start()
52+
}
53+
54+
func (l *lexer) start() {
55+
state := l.startState
56+
for state != nil {
57+
state = state(l)
58+
}
59+
close(l.tokenCh)
60+
}
61+
62+
// NextToken returns the next token from the lexer and a value to denote whether
63+
// or not the token is finished.
64+
func (l *lexer) NextToken() (*token, bool) {
65+
tok, ok := <-l.tokenCh
66+
if ok {
67+
return &tok, false
68+
}
69+
return nil, true
70+
}
71+
72+
// Error if an errorHandler is given, sets lex.Err with given error and calls errorHandler
73+
// if no errorHandler is given, then it panics with given error.
74+
func (l *lexer) Error(e error) {
75+
if l.errorHandler == nil {
76+
panic(e)
77+
}
78+
79+
l.err = e
80+
l.errorHandler(e)
81+
}
82+
83+
// Current returns the value being being analyzed at this moment.
84+
func (l *lexer) Current() string {
85+
return l.source[l.startPos:l.currentPos]
86+
}
87+
88+
// Current returns the value being being analyzed at this moment.
89+
func (l *lexer) Get(startPos, endPos int) string {
90+
return l.source[startPos:endPos]
91+
}
92+
93+
func (l *lexer) Err() error {
94+
return l.err
95+
}
96+
97+
// Emit will receive a token type and push a new token with the current analyzed
98+
// value into the tokens channel.
99+
func (l *lexer) Emit(t tokenType) {
100+
tok := token{
101+
Type: t,
102+
Value: l.Current(),
103+
Start: l.startPos,
104+
End: l.currentPos,
105+
}
106+
l.tokenCh <- tok
107+
l.startPos = l.currentPos
108+
l.clearRune()
109+
}
110+
111+
// Ignore clears the rewind stack and then sets the current beginning position
112+
// to the current position in the source which effectively ignores the section
113+
// of the source being analyzed.
114+
func (l *lexer) Ignore() {
115+
l.clearRune()
116+
l.startPos = l.currentPos
117+
}
118+
119+
// Peek performs a Next operation immediately followed by a Rewind returning the
120+
// peeked rune.
121+
func (l *lexer) Peek() rune {
122+
r := l.Next()
123+
l.Rewind()
124+
125+
return r
126+
}
127+
128+
// Rewind will take the last rune read (if any) and rewind back. Rewinds can
129+
// occur more than once per call to Next but you can never rewind past the
130+
// last point a token was emitted.
131+
func (l *lexer) Rewind() {
132+
r := l.popRune()
133+
if r > eof {
134+
size := utf8.RuneLen(r)
135+
l.currentPos -= size
136+
if l.currentPos < l.startPos {
137+
l.currentPos = l.startPos
138+
}
139+
}
140+
}
141+
142+
// Next pulls the next rune from the Lexer and returns it, moving the position
143+
// forward in the source.
144+
func (l *lexer) Next() rune {
145+
str := l.source[l.currentPos:]
146+
if str == "" {
147+
l.pushRune(eof)
148+
return eof
149+
}
150+
151+
r, size := utf8.DecodeRuneInString(str)
152+
l.currentPos += size
153+
l.pushRune(r)
154+
155+
return r
156+
}
157+
158+
// Take receives a string containing all acceptable strings and will contine
159+
// over each consecutive character in the source until a token not in the given
160+
// string is encountered. This should be used to quickly pull token parts.
161+
func (l *lexer) Take(chars string) {
162+
r := l.Next()
163+
for strings.ContainsRune(chars, r) {
164+
r = l.Next()
165+
}
166+
l.Rewind() // last next wasn't a match
167+
}
168+
169+
// TakeNext is similar to Take but takes if next rune matches
170+
func (l *lexer) TakeNext(ch rune) {
171+
r := l.Next()
172+
173+
if ch != r {
174+
l.Rewind() // last next wasn't a match
175+
}
176+
}
177+
178+
func (l *lexer) pushRune(r rune) {
179+
l.runeStack = append(l.runeStack, r)
180+
}
181+
182+
func (l *lexer) popRune() rune {
183+
r := l.runeStack[len(l.runeStack)-1]
184+
l.runeStack = l.runeStack[:len(l.runeStack)-1]
185+
return r
186+
}
187+
188+
func (l *lexer) clearRune() {
189+
l.runeStack = l.runeStack[:0]
190+
}

0 commit comments

Comments
 (0)