Skip to content
This repository was archived by the owner on Jul 3, 2022. It is now read-only.

Commit f55c50b

Browse files
committed
Add identifiers, Completed chapter 4, the Scanner
1 parent fb29075 commit f55c50b

File tree

3 files changed

+93
-7
lines changed

3 files changed

+93
-7
lines changed

CHANGELOG.md

+4-6
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
11+
## [0.0.1] - 2020-08-01
12+
1013
### Added
1114

1215
- Completing chapter 4
13-
- Completed including chapter 4.6.2
1416
- Created CHANGELOG.md, README.md, STATUS.md
1517
- Created first structure of the project, including tox, flake8, mypy, black and
1618
other utilities
1719
- Implemented `run`, `run_file` and `run_prompt` methods
1820
- Added `Token`, `Scanner`, `TokenType`
19-
- Support strings and numbers
20-
21-
## [0.0.1] - 2020-00-00
22-
23-
Todo, this is just a placeholder
21+
- Support `strings`, `numbers` and `identifiers`
2422

2523
[Unreleased]: https://github.com/RoelAdriaans/yaplox/compare/v0.0.1...HEAD
2624
[0.0.1]: https://github.com/RoelAdriaans/yaplox/releases/tag/v0.0.1

src/yaplox/scanner.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,24 @@ class Scanner:
99
start: int = 0
1010
current: int = 0
1111
line: int = 1
12+
keywords = {
13+
"and": TokenType.AND,
14+
"class": TokenType.CLASS,
15+
"else": TokenType.ELSE,
16+
"false": TokenType.FALSE,
17+
"for": TokenType.FOR,
18+
"fun": TokenType.FUN,
19+
"if": TokenType.IF,
20+
"nil": TokenType.NIL,
21+
"or": TokenType.OR,
22+
"print": TokenType.PRINT,
23+
"return": TokenType.RETURN,
24+
"super": TokenType.SUPER,
25+
"this": TokenType.THIS,
26+
"true": TokenType.TRUE,
27+
"var": TokenType.VAR,
28+
"while": TokenType.WHILE,
29+
}
1230

1331
def __init__(self, source: str, on_error=None):
1432
"""
@@ -76,6 +94,16 @@ def _number(self):
7694
number_value = self.source[self.start : self.current]
7795
self._add_token(TokenType.NUMBER, float(number_value))
7896

97+
def _identifier(self):
98+
while self._peek().isalnum() or self._peek() == "_":
99+
self._advance()
100+
101+
# See if the identifier is a reserved word
102+
text = self.source[self.start : self.current]
103+
token_type = self.keywords.get(text, TokenType.IDENTIFIER)
104+
105+
self._add_token(token_type=token_type)
106+
79107
def _scan_token(self):
80108
""" Scan tokens"""
81109
c = self._advance()
@@ -126,8 +154,12 @@ def _scan_token(self):
126154
if c.isdigit():
127155
# An digit encountered, consume the number
128156
self._number()
129-
# If we have an on_error callback, run this, otherwise raise the error again
157+
elif c.isalpha() or c == "_":
158+
# An letter encoutered
159+
self._identifier()
130160
elif self.on_error:
161+
# If we have an on_error callback, run this, otherwise raise the
162+
# error again
131163
self.on_error(self.line, f"Unexpected character: {c}")
132164
else:
133165
raise

tests/test_scanner.py

+56
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,59 @@ def test_scanner_with_number(self, mocker):
176176
assert tokens[5].literal == 13.0
177177

178178
assert not on_error_mock.called
179+
180+
def test_scanner_identifier(self, mocker):
181+
source = "appelflap or nil if while _foo_bar_1_2"
182+
183+
on_error_mock = mocker.MagicMock()
184+
scanner = Scanner(source, on_error=on_error_mock)
185+
186+
tokens = scanner.scan_tokens()
187+
188+
assert tokens[0].token_type == TokenType.IDENTIFIER
189+
assert tokens[0].lexeme == "appelflap"
190+
191+
assert tokens[1].token_type == TokenType.OR
192+
assert tokens[2].token_type == TokenType.NIL
193+
assert tokens[3].token_type == TokenType.IF
194+
assert tokens[4].token_type == TokenType.WHILE
195+
196+
assert tokens[5].token_type == TokenType.IDENTIFIER
197+
assert tokens[5].lexeme == "_foo_bar_1_2"
198+
199+
assert not on_error_mock.called
200+
201+
def test_scanner_invalid_identifier(self, mocker):
202+
# The bit of source code below is completely wrong, and identifies and
203+
# numbers in here will not result in valid tokens, but not the tokens you
204+
# would expect. This is not a problem of the scanner, it just does as it's
205+
# told.
206+
source = "123foo_bar bar-stool spam_egg_1.3_chickens"
207+
208+
on_error_mock = mocker.MagicMock()
209+
scanner = Scanner(source, on_error=on_error_mock)
210+
211+
tokens = scanner.scan_tokens()
212+
213+
assert tokens[0].literal == 123.0
214+
215+
assert tokens[1].lexeme == "foo_bar"
216+
assert tokens[1].token_type == TokenType.IDENTIFIER
217+
218+
assert tokens[2].lexeme == "bar"
219+
assert tokens[2].token_type == TokenType.IDENTIFIER
220+
221+
assert tokens[3].token_type == TokenType.MINUS
222+
223+
assert tokens[4].lexeme == "stool"
224+
assert tokens[5].lexeme == "spam_egg_1"
225+
assert tokens[6].token_type == TokenType.DOT
226+
227+
# This token did not consume the 1 before, since that was still part of the
228+
# valid identifier. The dot broke the identifier, and then a number started
229+
assert tokens[7].token_type == TokenType.NUMBER
230+
assert tokens[7].literal == 3.0
231+
232+
assert tokens[8].lexeme == "_chickens"
233+
234+
assert not on_error_mock.called

0 commit comments

Comments
 (0)