From 79db90f5055e00934c1a10c290cf899d22c7ef31 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 22:09:00 +0300 Subject: [PATCH 01/29] add units test for the .set directive --- tests/assemble.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/assemble.py b/tests/assemble.py index 3875ee0..33b41fe 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -3,6 +3,7 @@ from esp32_ulp.nocomment import remove_comments src = """\ + .set const, 123 start: wait 42 ld r0, r1, 0 @@ -14,14 +15,14 @@ def test_parse_line(): a = Assembler() - lines = src.splitlines() - # note: line number = index + 1 - assert a.parse_line(lines[0]) == None - assert a.parse_line(lines[1]) == ('start', 'wait', ('42', )) - assert a.parse_line(lines[2]) == (None, 'ld', ('r0', 'r1', '0', )) - assert a.parse_line(lines[3]) == (None, 'st', ('r0', 'r1', '0', )) - assert a.parse_line(lines[4]) == (None, 'halt', ()) - assert a.parse_line(lines[5]) == ('end', None, ()) + lines = iter(src.splitlines()) + assert a.parse_line(next(lines)) == (None, '.set', ('const', '123', )) + assert a.parse_line(next(lines)) == None + assert a.parse_line(next(lines)) == ('start', 'wait', ('42', )) + assert a.parse_line(next(lines)) == (None, 'ld', ('r0', 'r1', '0', )) + assert a.parse_line(next(lines)) == (None, 'st', ('r0', 'r1', '0', )) + assert a.parse_line(next(lines)) == (None, 'halt', ()) + assert a.parse_line(next(lines)) == ('end', None, ()) def test_parse(): @@ -34,8 +35,10 @@ def test_parse(): def test_assemble(): a = Assembler() a.assemble(src) + assert a.symbols.has_sym('const') assert a.symbols.has_sym('start') assert a.symbols.has_sym('end') + assert a.symbols.get_sym('const') == (ABS, None, 123) assert a.symbols.get_sym('start') == (REL, TEXT, 0) assert a.symbols.get_sym('end') == (REL, TEXT, 4) assert len(b''.join(a.sections[TEXT])) == 16 # 4 instructions * 4B @@ -50,6 +53,7 @@ def test_symbols(): ('abs_t4', ABS, TEXT, 4), ('rel_d4', REL, DATA, 4), ('abs_d4', ABS, DATA, 4), + ('const', ABS, None, 123), ]: st.set_sym(*entry) # PASS 1 ======================================================== @@ -62,11 +66,13 @@ def test_symbols(): assert st.resolve_absolute('abs_d4') == 4 assert st.resolve_absolute('rel_t4') == 4 assert st.resolve_absolute('rel_d4') == 4 + assert st.resolve_absolute('const') == 123 st.set_from(TEXT, 8) assert st.resolve_relative('abs_t4') == -4 assert st.resolve_relative('abs_d4') == -4 assert st.resolve_relative('rel_t4') == -4 assert st.resolve_relative('rel_d4') == -4 + assert st.resolve_absolute('const') == 123 # PASS 2 ======================================================== st.set_bases({TEXT: 100, DATA: 200}) st.set_pass(2) @@ -84,11 +90,13 @@ def test_symbols(): assert st.resolve_absolute('abs_d4') == 4 assert st.resolve_absolute('rel_t4') == 100 + 4 assert st.resolve_absolute('rel_d4') == 200 + 4 + assert st.resolve_absolute('const') == 123 st.set_from(TEXT, 8) assert st.resolve_relative('abs_t4') == 4 - 108 assert st.resolve_relative('abs_d4') == 4 - 108 assert st.resolve_relative('rel_t4') == 104 - 108 assert st.resolve_relative('rel_d4') == 204 - 108 + assert st.resolve_absolute('const') == 123 test_parse_line() From 84d734ddd22f89d44932ad05153b9ff5ac3d38d5 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 22:12:44 +0300 Subject: [PATCH 02/29] add support for left aligned assembler directives (e.g. .set) Much open-source code out there has .global, .set, etc directives starting in the first column of a line. This change allows assembling such code. Incidentally this also fixes a bug, where directives without parameters, such as .text, .data, etc were silently accepted when left-aligned but in those cases treated as labels instead of section headers. --- esp32_ulp/assemble.py | 2 +- tests/assemble.py | 6 ++++++ tests/compat/symbols.S | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index d0b1ff2..e775329 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -118,7 +118,7 @@ def parse_line(self, line): """ if not line: return - has_label = line[0] not in '\t ' + has_label = line[0] not in '\t .' if has_label: label_line = line.split(None, 1) if len(label_line) == 2: diff --git a/tests/assemble.py b/tests/assemble.py index 33b41fe..4a64b1c 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -4,12 +4,14 @@ src = """\ .set const, 123 +.set const_left, 976 start: wait 42 ld r0, r1, 0 st r0, r1,0 halt end: +.data """ @@ -17,12 +19,14 @@ def test_parse_line(): a = Assembler() lines = iter(src.splitlines()) assert a.parse_line(next(lines)) == (None, '.set', ('const', '123', )) + assert a.parse_line(next(lines)) == (None, '.set', ('const_left', '976', )) assert a.parse_line(next(lines)) == None assert a.parse_line(next(lines)) == ('start', 'wait', ('42', )) assert a.parse_line(next(lines)) == (None, 'ld', ('r0', 'r1', '0', )) assert a.parse_line(next(lines)) == (None, 'st', ('r0', 'r1', '0', )) assert a.parse_line(next(lines)) == (None, 'halt', ()) assert a.parse_line(next(lines)) == ('end', None, ()) + assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label def test_parse(): @@ -36,9 +40,11 @@ def test_assemble(): a = Assembler() a.assemble(src) assert a.symbols.has_sym('const') + assert a.symbols.has_sym('const_left') assert a.symbols.has_sym('start') assert a.symbols.has_sym('end') assert a.symbols.get_sym('const') == (ABS, None, 123) + assert a.symbols.get_sym('const_left') == (ABS, None, 976) assert a.symbols.get_sym('start') == (REL, TEXT, 0) assert a.symbols.get_sym('end') == (REL, TEXT, 4) assert len(b''.join(a.sections[TEXT])) == 16 # 4 instructions * 4B diff --git a/tests/compat/symbols.S b/tests/compat/symbols.S index bf59c3b..359fa15 100644 --- a/tests/compat/symbols.S +++ b/tests/compat/symbols.S @@ -1,10 +1,12 @@ .text .set constant42, 42 +.set notindented, 1 start: move r0, data0 move r1, data1 move r2, constant42 + move r3, notindented # count from 0 .. 42 in stage register stage_rst From ec81ecc040691076824fca2cd897b70e4d202215 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 22:41:58 +0300 Subject: [PATCH 03/29] fix a crash bug where BSS size calculation was attempted on the value of a data item (bytes) instead of the size of that data item (int) The size of the bss section was increased with the value of the defined symbol rather than the size of that value (number of bytes). This change fixes that. --- esp32_ulp/assemble.py | 4 ++-- tests/assemble.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index e775329..764ae29 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -150,8 +150,8 @@ def append_section(self, value, expected_section=None): if expected_section is not None and s is not expected_section: raise TypeError('only allowed in %s section' % expected_section) if s is BSS: - # just increase BSS size by value - self.offsets[s] += value + # just increase BSS size by length of value + self.offsets[s] += len(value) else: self.sections[s].append(value) self.offsets[s] += len(value) diff --git a/tests/assemble.py b/tests/assemble.py index 4a64b1c..f23a6b6 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -14,6 +14,13 @@ .data """ +src_bss = """\ + .bss + +label: + .long 0 +""" + def test_parse_line(): a = Assembler() @@ -52,6 +59,18 @@ def test_assemble(): assert a.offsets[BSS] == 0 +def test_assemble_bss(): + a = Assembler() + try: + a.assemble(src_bss) + except TypeError: + raised = True + else: + raised = False + assert not raised + assert a.offsets[BSS] == 4 # 1 word * 4B + + def test_symbols(): st = SymbolTable({}, {}) for entry in [ @@ -108,4 +127,5 @@ def test_symbols(): test_parse_line() test_parse() test_assemble() +test_assemble_bss() test_symbols() From c184924f9645e2e73b466139b194ee435bfa50ab Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 29 Jul 2021 21:32:19 +0300 Subject: [PATCH 04/29] raise error when attempting to store values in .bss section A simple safety-net matching the behaviour of binutils-esp32ulp --- esp32_ulp/assemble.py | 2 ++ tests/assemble.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 764ae29..912fa7d 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -150,6 +150,8 @@ def append_section(self, value, expected_section=None): if expected_section is not None and s is not expected_section: raise TypeError('only allowed in %s section' % expected_section) if s is BSS: + if int.from_bytes(value, 'little') != 0: + raise ValueError('attempt to store non-zero value in section .bss') # just increase BSS size by length of value self.offsets[s] += len(value) else: diff --git a/tests/assemble.py b/tests/assemble.py index f23a6b6..edc321e 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -71,6 +71,25 @@ def test_assemble_bss(): assert a.offsets[BSS] == 4 # 1 word * 4B +def test_assemble_bss_with_value(): + lines = """\ +.bss + .long 3 #non-zero value not allowed in bss section +""" + + a = Assembler() + try: + a.assemble(lines) + except ValueError as e: + if str(e) != "attempt to store non-zero value in section .bss": + raise # re-raise failures we didn't expect + raised = True + else: + raised = False + + assert raised + + def test_symbols(): st = SymbolTable({}, {}) for entry in [ @@ -128,4 +147,5 @@ def test_symbols(): test_parse() test_assemble() test_assemble_bss() +test_assemble_bss_with_value() test_symbols() From 25d34b0d517dd172e179a1d755cb2a92769e90fa Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 23:13:36 +0300 Subject: [PATCH 05/29] fix reference to non-existing variable --- esp32_ulp/assemble.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 912fa7d..3f73a9d 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -32,7 +32,7 @@ def get_from(self): def set_sym(self, symbol, stype, section, value): entry = (stype, section, value) if symbol in self._symbols and entry != self._symbols[symbol]: - raise Exception('redefining symbol %s with different value %r -> %r.' % (label, self._symbols[symbol], entry)) + raise Exception('redefining symbol %s with different value %r -> %r.' % (symbol, self._symbols[symbol], entry)) self._symbols[symbol] = entry def has_sym(self, symbol): From 76a81aca0353561213dacedbba8c769dcd11f093 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Fri, 23 Jul 2021 01:10:03 +0300 Subject: [PATCH 06/29] fix typo in comment of instruction definition --- esp32_ulp/opcodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 4e2ca04..3018b30 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -112,7 +112,7 @@ def make_ins(layout): unused : 8 # Unused low : 5 # Low bit high : 5 # High bit - opcode : 4 # Opcode (OPCODE_WR_REG) + opcode : 4 # Opcode (OPCODE_RD_REG) """) From 56f4530ce1042a5d48397832e6b475ef2209d589 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 23:07:06 +0300 Subject: [PATCH 07/29] add support for the .global directive. only symbols flagged as global will be exported This change is mostly to support code that uses the .global directive without having to modify it first (such as commenting out those lines). --- esp32_ulp/assemble.py | 18 ++++++++++++++---- tests/assemble.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 3f73a9d..25c7f23 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -12,9 +12,10 @@ class SymbolTable: - def __init__(self, symbols, bases): + def __init__(self, symbols, bases, globals): self._symbols = symbols self._bases = bases + self._globals = globals self._pass = None def set_pass(self, _pass): @@ -53,7 +54,9 @@ def dump(self): print(symbol, entry) def export(self): - addrs_syms = [(self.resolve_absolute(entry), symbol) for symbol, entry in self._symbols.items()] + addrs_syms = [(self.resolve_absolute(entry), symbol) + for symbol, entry in self._symbols.items() + if symbol in self._globals] return sorted(addrs_syms) def to_abs_addr(self, section, offset): @@ -93,11 +96,15 @@ def resolve_relative(self, symbol): from_addr = self.to_abs_addr(self._from_section, self._from_offset) return sym_addr - from_addr + def set_global(self, symbol): + self._globals[symbol] = True + pass + class Assembler: - def __init__(self, symbols=None, bases=None): - self.symbols = SymbolTable(symbols or {}, bases or {}) + def __init__(self, symbols=None, bases=None, globls=None): + self.symbols = SymbolTable(symbols or {}, bases or {}, globls or {}) opcodes.symbols = self.symbols # XXX dirty hack def init(self, a_pass): @@ -236,6 +243,9 @@ def d_set(self, symbol, expr): value = int(expr) # TODO: support more than just integers self.symbols.set_sym(symbol, ABS, None, value) + def d_global(self, symbol): + self.symbols.set_global(symbol) + def append_data(self, wordlen, args): data = [int(arg).to_bytes(wordlen, 'little') for arg in args] self.append_section(b''.join(data)) diff --git a/tests/assemble.py b/tests/assemble.py index edc321e..2cde82f 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -22,6 +22,23 @@ """ +src_global = """\ + + .global counter +counter: + .long 0 + +internal: + .long 0 + + .text + .global entry +entry: + wait 42 + halt +""" + + def test_parse_line(): a = Assembler() lines = iter(src.splitlines()) @@ -90,8 +107,19 @@ def test_assemble_bss_with_value(): assert raised +def test_assemble_global(): + a = Assembler() + a.assemble(src_global) + assert a.symbols.has_sym('counter') + assert a.symbols.has_sym('internal') + assert a.symbols.has_sym('entry') + + exported_symbols = a.symbols.export() + assert exported_symbols == [(0, 'counter'), (2, 'entry')] # internal not exported + + def test_symbols(): - st = SymbolTable({}, {}) + st = SymbolTable({}, {}, {}) for entry in [ ('rel_t4', REL, TEXT, 4), ('abs_t4', ABS, TEXT, 4), @@ -148,4 +176,5 @@ def test_symbols(): test_assemble() test_assemble_bss() test_assemble_bss_with_value() +test_assemble_global() test_symbols() From 9907b107c94f0d831e0483617cd6b7bad8d7d50d Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 23:23:12 +0300 Subject: [PATCH 08/29] let SymbolTable.export() optionally export non-global symbols too This is then the same behaviour as before the .global directive was supported. It might be useful for debugging purposes or potentially some backward compatibility issues (e.g. scripts that depend on the symbol printout after assembling) --- esp32_ulp/assemble.py | 4 ++-- tests/assemble.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 25c7f23..c847432 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -53,10 +53,10 @@ def dump(self): for symbol, entry in self._symbols.items(): print(symbol, entry) - def export(self): + def export(self, incl_non_globals=False): addrs_syms = [(self.resolve_absolute(entry), symbol) for symbol, entry in self._symbols.items() - if symbol in self._globals] + if incl_non_globals or symbol in self._globals] return sorted(addrs_syms) def to_abs_addr(self, section, offset): diff --git a/tests/assemble.py b/tests/assemble.py index 2cde82f..3839390 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -117,6 +117,9 @@ def test_assemble_global(): exported_symbols = a.symbols.export() assert exported_symbols == [(0, 'counter'), (2, 'entry')] # internal not exported + exported_symbols = a.symbols.export(True) # include non-global symbols + assert exported_symbols == [(0, 'counter'), (1, 'internal'), (2, 'entry')] + def test_symbols(): st = SymbolTable({}, {}, {}) From 27ab85027052efca27e52290598fd053840a5d96 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 23:32:45 +0300 Subject: [PATCH 09/29] support ULP opcodes in upper case Some open-source out there uses upper case for ULP opcodes. This change allows using such code unmodified instead of crashing with "Unsupported opcode or directive" --- esp32_ulp/assemble.py | 4 ++-- tests/assemble.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index c847432..ef21079 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -275,12 +275,12 @@ def assembler_pass(self, lines): continue else: # machine instruction - func = getattr(opcodes, 'i_' + opcode, None) + func = getattr(opcodes, 'i_' + opcode.lower(), None) if func is not None: instruction = func(*args) self.append_section(instruction.to_bytes(4, 'little'), TEXT) continue - raise Exception('Unknown opcode or directive: %s' % opcode) + raise ValueError('Unknown opcode or directive: %s' % opcode) self.finalize_sections() def assemble(self, text): diff --git a/tests/assemble.py b/tests/assemble.py index 3839390..cc59377 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -121,6 +121,20 @@ def test_assemble_global(): assert exported_symbols == [(0, 'counter'), (1, 'internal'), (2, 'entry')] +def test_assemble_uppercase_opcode(): + a = Assembler() + try: + a.assemble(" WAIT 42") + except ValueError as e: + if str(e) != "Unknown opcode or directive: WAIT": + # re-raise failures we didn't expect + raise + raised = True + else: + raised = False + assert not raised + + def test_symbols(): st = SymbolTable({}, {}, {}) for entry in [ @@ -180,4 +194,5 @@ def test_symbols(): test_assemble_bss() test_assemble_bss_with_value() test_assemble_global() +test_assemble_uppercase_opcode() test_symbols() From 54b117e79595ecf8fa376ed4a75149ace5f0bb4d Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 22 Jul 2021 23:47:06 +0300 Subject: [PATCH 10/29] add a compatibility test for the recent fixes and improvements Just to double-check and ensure that we're still matching the binary output of binutils-esp32ulp --- tests/compat/fixes.S | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/compat/fixes.S diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S new file mode 100644 index 0000000..0c84f1b --- /dev/null +++ b/tests/compat/fixes.S @@ -0,0 +1,20 @@ +# This file tests various fixes to the assembler, +# to ensure the binary output matches that of binutils. +# a) support for left-aligned directives (e.g. .set without preceding whitespace) +# b) a crash-fix related to data items in the .bss section +# c) support for marking labels as global +# d) support for upper case ULP opcode names +# +.set gpio, 2 + +.bss + +counter: +.long 0 + + .text + .global entry +entry: + MOVE R1, gpio + WAIT 42 + halt From feb42dc9eb315c3d521027141beabf0ab43acaf1 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Fri, 23 Jul 2021 00:08:42 +0300 Subject: [PATCH 11/29] add support for evaluating expressions This change allows immediate values to be calculated from an expression, such as 1+1, even including symbols such as "100 << const" (where const was defined with the .set directive). Expressions are also supported in the .set directives. Expressions are evaluated using the built-in eval(). To prevent misuse or malicious code execution, expressions are validated. At the point when eval is called, all symbols should have already been resolved to their values. That means we only need to allow for numeric characters along with arithmetic and bitwise operators, round brackets and whitespace. The character 'x' and the characters 'abcdef' are also accepted to allow for hex numbers such as 0x123abc. These are only allowed however in sequences starting with 0x. If any other character is encountered the expression is deemed invalid and an exception is raised. --- esp32_ulp/assemble.py | 2 +- esp32_ulp/opcodes.py | 23 ++++++++++++++-- esp32_ulp/util.py | 58 +++++++++++++++++++++++++++++++++++++++ tests/00_unit_tests.sh | 2 +- tests/assemble.py | 23 ++++++++++++++++ tests/opcodes.py | 54 +++++++++++++++++++++++++++++++++++- tests/util.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 219 insertions(+), 5 deletions(-) create mode 100644 tests/util.py diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index ef21079..2fdd154 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -240,7 +240,7 @@ def d_align(self, align=4, fill=None): self.fill(self.section, amount, fill) def d_set(self, symbol, expr): - value = int(expr) # TODO: support more than just integers + value = int(opcodes.eval_arg(expr)) # TODO: support more than just integers self.symbols.set_sym(symbol, ABS, None, value) def d_global(self, symbol): diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 3018b30..59006f2 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -6,6 +6,7 @@ from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN from .soc import * +from .util import split_tokens, validate_expression # XXX dirty hack: use a global for the symbol table symbols = None @@ -267,6 +268,20 @@ def make_ins(layout): ARG = namedtuple('ARG', ('type', 'value', 'raw')) +def eval_arg(arg): + parts = [] + for token in split_tokens(arg): + if symbols.has_sym(token): + _, _, sym_value = symbols.get_sym(token) + parts.append(str(sym_value)) + else: + parts.append(token) + parts = "".join(parts) + if not validate_expression(parts): + raise ValueError('Unsupported expression: %s' % parts) + return eval(parts) + + def arg_qualify(arg): """ look at arg and qualify its type: @@ -289,8 +304,12 @@ def arg_qualify(arg): return ARG(IMM, int(arg), arg) except ValueError: pass - entry = symbols.get_sym(arg) - return ARG(SYM, entry, arg) + try: + entry = symbols.get_sym(arg) + return ARG(SYM, entry, arg) + except KeyError: + pass + return ARG(IMM, int(eval_arg(arg)), arg) def get_reg(arg): diff --git a/esp32_ulp/util.py b/esp32_ulp/util.py index c184414..8d2832f 100644 --- a/esp32_ulp/util.py +++ b/esp32_ulp/util.py @@ -2,6 +2,8 @@ import gc +NORMAL, WHITESPACE = 0, 1 + def garbage_collect(msg, verbose=DEBUG): free_before = gc.mem_free() @@ -9,3 +11,59 @@ def garbage_collect(msg, verbose=DEBUG): free_after = gc.mem_free() if verbose: print("%s: %d --gc--> %d bytes free" % (msg, free_before, free_after)) + + +def split_tokens(line): + buf = "" + tokens = [] + state = NORMAL + for c in line: + if ('a' <= c <= 'z') or ('A' <= c <= 'Z') or ('0' <= c <= '9') or c == '_': + if state != NORMAL: + if len(buf) > 0: + tokens.append(buf) + buf = "" + state = NORMAL + buf += c + elif c == ' ' or c == '\t': + if state != WHITESPACE: + if len(buf) > 0: + tokens.append(buf) + buf = "" + state = WHITESPACE + buf += c + else: + if len(buf) > 0: + tokens.append(buf) + buf = "" + tokens.append(c) + + if len(buf) > 0: + tokens.append(buf) + + return tokens + + +def validate_expression(param): + for token in split_tokens(param): + state = 0 + for c in token: + if c not in ' \t+-*/%()<>&|~x0123456789abcdef': + return False + + # the following allows hex digits a-f after 0x but not otherwise + if state == 0: + if c in 'abcdef': + return False + if c == '0': + state = 1 + continue + + if state == 1: + state = 2 if c == 'x' else 0 + continue + + if state == 2: + if c not in '0123456789abcdef': + state = 0 + return True diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index 07d221f..c7e2f89 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,7 @@ set -e -for file in opcodes assemble link ; do +for file in opcodes assemble link util; do echo testing $file... micropython $file.py done diff --git a/tests/assemble.py b/tests/assemble.py index cc59377..ac2d423 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -135,6 +135,28 @@ def test_assemble_uppercase_opcode(): assert not raised +def test_assemble_evalulate_expressions(): + src_w_expr = """\ + .set shft, 2 + .set loops, (1 << shft) + +entry: + move r0, 1+1 + move r1, loops + move r2, (shft + 10) * 2 + move r3, entry << 2 +""" + a = Assembler() + a.assemble(src_w_expr) + + assert a.symbols.has_sym('shft') + assert a.symbols.has_sym('loops') + assert a.symbols.has_sym('entry') + assert a.symbols.get_sym('shft') == (ABS, None, 2) + assert a.symbols.get_sym('loops') == (ABS, None, 4) + assert a.symbols.get_sym('entry') == (REL, TEXT, 0) + + def test_symbols(): st = SymbolTable({}, {}, {}) for entry in [ @@ -195,4 +217,5 @@ def test_symbols(): test_assemble_bss_with_value() test_assemble_global() test_assemble_uppercase_opcode() +test_assemble_evalulate_expressions() test_symbols() diff --git a/tests/opcodes.py b/tests/opcodes.py index 54bb673..f14829a 100644 --- a/tests/opcodes.py +++ b/tests/opcodes.py @@ -1,6 +1,8 @@ from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN from esp32_ulp.opcodes import make_ins, make_ins_struct_def -from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, ARG, REG, IMM, COND +from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND +from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT +import esp32_ulp.opcodes as opcodes OPCODE_DELAY = 4 LAYOUT_DELAY = """ @@ -43,6 +45,19 @@ def test_arg_qualify(): assert arg_qualify('Eq') == ARG(COND, 'eq', 'Eq') assert arg_qualify('EQ') == ARG(COND, 'eq', 'EQ') + # for the next tests, ensure the opcodes module has a SymbolTable + opcodes.symbols = SymbolTable({}, {}, {}) + opcodes.symbols.set_sym('const', ABS, None, 42) # constant as defined by .set + opcodes.symbols.set_sym('entry', REL, TEXT, 4) # label pointing to code + + assert arg_qualify('1+1') == ARG(IMM, 2, '1+1') + assert arg_qualify('const >> 1') == ARG(IMM, 21, 'const >> 1') + assert arg_qualify('entry') == ARG(SYM, (REL, TEXT, 4), 'entry') # symbols should not (yet) be evaluated + assert arg_qualify('entry + const') == ARG(IMM, 46, 'entry + const') + + # clean up + opcodes.symbols = None + def test_get_reg(): assert get_reg('r0') == 0 @@ -57,9 +72,46 @@ def test_get_cond(): assert get_cond('Eq') == 'eq' +def test_eval_arg(): + opcodes.symbols = SymbolTable({}, {}, {}) + opcodes.symbols.set_sym('const', ABS, None, 42) # constant + opcodes.symbols.set_sym('raise', ABS, None, 99) # constant using a python keyword as name (is allowed) + + assert eval_arg('1+1') == 2 + assert eval_arg('1+const') == 43 + assert eval_arg('raise*2/3') == 66 + assert eval_arg('raise-const') == 57 + assert eval_arg('(raise-const)*2') == 114 + assert eval_arg('const % 5') == 2 + assert eval_arg('const + 0x19af') == 0x19af + 42 + assert eval_arg('const & ~2') == 40 + assert eval_arg('const << 3') == 336 + assert eval_arg('const >> 1') == 21 + assert eval_arg('(const|4)&0xf') == 0xe + + assert_raises(ValueError, eval_arg, 'evil()') + assert_raises(ValueError, eval_arg, 'def cafe()') + assert_raises(ValueError, eval_arg, '1 ^ 2') + assert_raises(ValueError, eval_arg, '!100') + + # clean up + opcodes.symbols = None + + +def assert_raises(exception, func, *args): + try: + func(*args) + except exception: + raised = True + else: + raised = False + assert raised + + test_make_ins_struct_def() test_make_ins() test_arg_qualify() test_get_reg() test_get_imm() test_get_cond() +test_eval_arg() \ No newline at end of file diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..18ab54e --- /dev/null +++ b/tests/util.py @@ -0,0 +1,62 @@ +from esp32_ulp.util import split_tokens, validate_expression + +tests = [] + + +def test(param): + """ + the @test decorator + """ + tests.append(param) + + +@test +def test_split_tokens(): + assert split_tokens("") == [] + assert split_tokens("t") == ['t'] + assert split_tokens("test") == ['test'] + assert split_tokens("t t") == ['t', ' ', 't'] + assert split_tokens("t,t") == ['t', ',', 't'] + assert split_tokens("test(arg)") == ['test', '(', 'arg', ')'] + assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')'] + assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')'] + assert split_tokens(" test( arg, arg2)") == [' ', 'test', '(', ' ', 'arg', ',', ' ', 'arg2', ')'] + assert split_tokens(" test( arg ) ") == [' ', 'test', '(', ' ', 'arg', ' ', ')', ' '] + assert split_tokens("\t test \t ") == ['\t ', 'test', " \t "] + assert split_tokens("test\nrow2") == ['test', "\n", "row2"] + + # split_token does not support comments. should generally only be used after comments are already stripped + assert split_tokens("test(arg /*comment*/)") == ['test', '(', 'arg', ' ', '/', '*', 'comment', '*', '/', ')'] + assert split_tokens("#test") == ['#', 'test'] + + +@test +def test_validate_expression(): + assert validate_expression('') is True + assert validate_expression('1') is True + assert validate_expression('1+1') is True + assert validate_expression('(1+1)') is True + assert validate_expression('(1+1)*2') is True + assert validate_expression('(1 + 1)') is True + assert validate_expression('10 % 2') is True + assert validate_expression('0x100 << 2') is True + assert validate_expression('0x100 & ~2') is True + assert validate_expression('0xabcdef') is True + assert validate_expression('0x123def') is True + assert validate_expression('2*3+4/5&6|7') is True + assert validate_expression('(((((1+1) * 2') is True # valid characters, even if expression is not valid + + assert validate_expression(':') is False + assert validate_expression('_') is False + assert validate_expression('=') is False + assert validate_expression('.') is False + assert validate_expression('!') is False + assert validate_expression('123 ^ 4') is False # operator not supported for now + assert validate_expression('evil()') is False + assert validate_expression('def cafe()') is False # valid hex digits, but potentially dangerous code + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() From 87507c9f48c1e9450adf52f2a37b1ccaa0653038 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Fri, 23 Jul 2021 16:35:52 +0300 Subject: [PATCH 12/29] add a compatibility test for evaluating expressions This is a common example, where the ADC is read multiple times (oversampled) and the oversampling factor is calculated by a shift-left, which makes calculating the average of samples as easy as shifting their sum right by the same amount of bits. Using .set directives with expressions makes the oversampling factor easily configurable. --- tests/compat/expr.S | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/compat/expr.S diff --git a/tests/compat/expr.S b/tests/compat/expr.S new file mode 100644 index 0000000..48f7304 --- /dev/null +++ b/tests/compat/expr.S @@ -0,0 +1,44 @@ + .set adc_channel, 6 + + .set adc_oversampling_factor_log, 2 + .set adc_oversampling_factor, (1 << adc_oversampling_factor_log) + +.data + +result: + .long 0 + + .text + .global entry +entry: + move r0, 0 + stage_rst + +measure: + adc r1, 0, adc_channel + 1 + add r0, r0, r1 + + stage_inc 1 + jumps measure, adc_oversampling_factor, lt + + rsh r0, r0, adc_oversampling_factor_log + + move r3, result + st r0, r3, 0 + + #test that expressions evaluate correctly for all supported operators + move r3, 1+2 + move r3, 3-5 + move r3, -5 + move r3, 2*3 + move r3, 4/2 + move r3, 4 % 3 + move r3, 0xff << 2 + move r3, 0xff >> 1 + move r3, (0xabcdef | 0xff) & 0xff + move r3, 0x1234 & ~2 + move r3, 42|4&0xf # 46 (4&0xf is evaluated first) + move r3, (42|4)&0xf # 14 (42|4 is evaluated first) + +exit: + halt From 99352a3b1b2a79f04266e64443680f9d282cf3c4 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 29 Jul 2021 22:32:17 +0300 Subject: [PATCH 13/29] docs: add that expressions are now supported --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 3952878..56395d1 100644 --- a/README.rst +++ b/README.rst @@ -17,6 +17,10 @@ Status The most commonly used simple stuff should work. +Expressions in assembly source code are supported and get evaluated during +assembling. Only expressions evaluating to a single integer are supported. +Constants defined with ``.set`` are supported in expressions. + We have some unit tests and also compatibility tests that compare the output whether it is identical with binutils-esp32ulp output. From d76fd2696eb72987834ffd549924604cd7f3a295 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Fri, 23 Jul 2021 16:59:42 +0300 Subject: [PATCH 14/29] add preprocessor that can replace simple #define values in code The preprocessor strips all comments and lines containing a define statement but keeps the empty lines in the output (to preserve line numbering). The output is then passed directly into the assembler. The preprocessor does not support "function style" #define macros (i.e. ADD(a,b) a+b) but this is not needed for expanding the constants used by WRITE_RTC_REG(), et al. --- esp32_ulp/__main__.py | 2 + esp32_ulp/preprocess.py | 57 ++++++++++ tests/00_unit_tests.sh | 2 +- tests/01_compat_tests.sh | 4 +- tests/compat/preprocess_simple.S | 7 ++ tests/preprocess.py | 175 +++++++++++++++++++++++++++++++ 6 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 esp32_ulp/preprocess.py create mode 100644 tests/compat/preprocess_simple.S create mode 100644 tests/preprocess.py diff --git a/esp32_ulp/__main__.py b/esp32_ulp/__main__.py index 584a3dd..b24578a 100644 --- a/esp32_ulp/__main__.py +++ b/esp32_ulp/__main__.py @@ -2,6 +2,7 @@ from .util import garbage_collect +from .preprocess import preprocess from .assemble import Assembler from .link import make_binary garbage_collect('after import') @@ -23,6 +24,7 @@ def main(fn): with open(fn) as f: src = f.read() + src = preprocess(src) binary = src_to_binary(src) if fn.endswith('.s') or fn.endswith('.S'): diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py new file mode 100644 index 0000000..12c13b1 --- /dev/null +++ b/esp32_ulp/preprocess.py @@ -0,0 +1,57 @@ +from . import nocomment +from .util import split_tokens + + +class Preprocessor: + def __init__(self): + self._defines = {} + + def parse_defines(self, content): + result = {} + for line in content.splitlines(): + line = line.strip() + if not line.startswith("#define"): + # skip lines not containing #define + continue + line = line[8:].strip() # remove #define + parts = line.split(None, 1) + if len(parts) != 2: + # skip defines without value + continue + identifier, value = parts + tmp = identifier.split('(', 1) + if len(tmp) == 2: + # skip parameterised defines (macros) + continue + value = "".join(nocomment.remove_comments(value)).strip() + result[identifier] = value + self._defines = result + return result + + def expand_defines(self, line): + found = True + while found: # do as many passed as needed, until nothing was replaced anymore + found = False + tokens = split_tokens(line) + line = "" + for t in tokens: + lu = self._defines.get(t, t) + if lu != t: + found = True + line += lu + + return line + + def preprocess(self, content): + self.parse_defines(content) + lines = nocomment.remove_comments(content) + result = [] + for line in lines: + line = self.expand_defines(line) + result.append(line) + result = "\n".join(result) + return result + + +def preprocess(content): + return Preprocessor().preprocess(content) diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index c7e2f89..efd5b64 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,7 @@ set -e -for file in opcodes assemble link util; do +for file in opcodes assemble link util preprocess; do echo testing $file... micropython $file.py done diff --git a/tests/01_compat_tests.sh b/tests/01_compat_tests.sh index c565aa1..68f8bdc 100755 --- a/tests/01_compat_tests.sh +++ b/tests/01_compat_tests.sh @@ -13,12 +13,14 @@ for src_file in $(ls -1 compat/*.S); do log_file="${src_name}.log" micropython -m esp32_ulp $src_file 1>$log_file # generates $ulp_file + pre_file="${src_name}.pre" obj_file="${src_name}.o" elf_file="${src_name}.elf" bin_file="${src_name}.bin" echo -e "\tBuilding using binutils" - esp32ulp-elf-as -o $obj_file $src_file + gcc -E -o ${pre_file} $src_file + esp32ulp-elf-as -o $obj_file ${pre_file} esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file esp32ulp-elf-objcopy -O binary $elf_file $bin_file diff --git a/tests/compat/preprocess_simple.S b/tests/compat/preprocess_simple.S new file mode 100644 index 0000000..b6a61e8 --- /dev/null +++ b/tests/compat/preprocess_simple.S @@ -0,0 +1,7 @@ +#define GPIO 2 +#define BASE 0x100 +#define ADDR (BASE + GPIO) + +entry: + move r0, GPIO + move r1, ADDR diff --git a/tests/preprocess.py b/tests/preprocess.py new file mode 100644 index 0000000..bfca066 --- /dev/null +++ b/tests/preprocess.py @@ -0,0 +1,175 @@ +from esp32_ulp.preprocess import Preprocessor + +tests = [] + + +def test(param): + tests.append(param) + + +@test +def test_replace_defines_should_return_empty_line_given_empty_string(): + p = Preprocessor() + + assert p.preprocess("") == "" + + +@test +def replace_defines_should_return_remove_comments(): + p = Preprocessor() + + line = "// some comment" + expected = "" + assert p.preprocess(line) == expected + + +@test +def test_parse_defines(): + p = Preprocessor() + + assert p.parse_defines("") == {} + assert p.parse_defines("// comment") == {} + assert p.parse_defines(" // comment") == {} + assert p.parse_defines(" /* comment */") == {} + assert p.parse_defines(" /* comment */ #define A 42") == {} # #define must be the first thing on a line + assert p.parse_defines("#define a 1") == {"a": "1"} + assert p.parse_defines(" #define a 1") == {"a": "1"} + assert p.parse_defines("#define a 1 2") == {"a": "1 2"} + assert p.parse_defines("#define f(a,b) 1") == {} # macros not supported + assert p.parse_defines("#define f(a, b) 1") == {} # macros not supported + assert p.parse_defines("#define f (a,b) 1") == {"f": "(a,b) 1"} # f is not a macro + assert p.parse_defines("#define f (a, b) 1") == {"f": "(a, b) 1"} # f is not a macro + assert p.parse_defines("#define RTC_ADDR 0x12345 // start of range") == {"RTC_ADDR": "0x12345"} + + +@test +def test_parse_defines_handles_multiple_input_lines(): + p = Preprocessor() + + multi_line_1 = """\ +#define ID_WITH_UNDERSCORE something +#define ID2 somethingelse +""" + assert p.parse_defines(multi_line_1) == {"ID_WITH_UNDERSCORE": "something", "ID2": "somethingelse"} + + +@test +def test_parse_defines_does_not_understand_comments_by_current_design(): + # comments are not understood. lines are expected to already have comments removed! + p = Preprocessor() + + multi_line_2 = """\ +#define ID_WITH_UNDERSCORE something +/* +#define ID2 somethingelse +*/ +""" + assert "ID2" in p.parse_defines(multi_line_2) + + +@test +def test_parse_defines_does_not_understand_line_continuations_with_backslash_by_current_design(): + p = Preprocessor() + + multi_line_3 = r""" + #define ID_WITH_UNDERSCORE something \ + line2 + """ + + assert p.parse_defines(multi_line_3) == {"ID_WITH_UNDERSCORE": "something \\"} + + +@test +def preprocess_should_remove_comments_and_defines_but_keep_the_lines_as_empty_lines(): + p = Preprocessor() + + lines = """\ + // copyright + #define A 1 + + move r1, r2""" + + assert p.preprocess(lines) == "\n\n\n\tmove r1, r2" + + +@test +def preprocess_should_replace_words_defined(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + + move r1, DR_REG_RTCIO_BASE""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + + +@test +def preprocess_should_replace_words_defined_multiple_times(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + + move r1, DR_REG_RTCIO_BASE #once + move r2, DR_REG_RTCIO_BASE #second time""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + assert "move r2, 0x3ff48400" in p.preprocess(lines) + + +@test +def preprocess_should_replace_all_defined_words(): + p = Preprocessor() + + lines = """\ + #define DR_REG_RTCIO_BASE 0x3ff48400 + #define SOME_OFFSET 4 + + move r1, DR_REG_RTCIO_BASE + add r2, r1, SOME_OFFSET""" + + assert "move r1, 0x3ff48400" in p.preprocess(lines) + assert "add r2, r1, 4" in p.preprocess(lines) + + +@test +def preprocess_should_not_replace_substrings_within_identifiers(): + p = Preprocessor() + + # ie. if AAA is defined don't touch PREFIX_AAA_SUFFIX + lines = """\ + #define RTCIO 4 + move r1, DR_REG_RTCIO_BASE""" + + assert "DR_REG_4_BASE" not in p.preprocess(lines) + + # ie. if A and AA are defined, don't replace AA as two A's but with AA + lines = """\ + #define A 4 + #define AA 8 + move r1, A + move r2, AA""" + + assert "move r1, 4" in p.preprocess(lines) + assert "move r2, 8" in p.preprocess(lines) + + +@test +def preprocess_should_replace_defines_used_in_defines(): + p = Preprocessor() + + lines = """\ + #define BITS (BASE << 4) + #define BASE 0x1234 + + move r1, BITS + move r2, BASE""" + + assert "move r1, (0x1234 << 4)" in p.preprocess(lines) + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() From 4dded94fa2ca0fc663cbc04561c2c92247c42834 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Sat, 7 Aug 2021 17:35:07 +0300 Subject: [PATCH 15/29] allow assembler to skip comment removal to avoid removing comments twice Since the preprocessor was introduced, which already removes comments, the assembler does not need to remove comments anymore in the usual case. The assembler still retains the ability to remove comments (enabled by default) in case it is used without the preprocessor. The `remove_comments` argument to the `assemble()` method can be used to control whether comments will be removed during assembly or not. --- esp32_ulp/__main__.py | 2 +- esp32_ulp/assemble.py | 6 +++--- tests/assemble.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/esp32_ulp/__main__.py b/esp32_ulp/__main__.py index b24578a..d9555fd 100644 --- a/esp32_ulp/__main__.py +++ b/esp32_ulp/__main__.py @@ -10,7 +10,7 @@ def src_to_binary(src): assembler = Assembler() - assembler.assemble(src) + assembler.assemble(src, remove_comments=False) # comments already removed by preprocessor garbage_collect('before symbols export') addrs_syms = assembler.symbols.export() for addr, sym in addrs_syms: diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 2fdd154..297ebb8 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -3,7 +3,7 @@ """ from . import opcodes -from .nocomment import remove_comments +from .nocomment import remove_comments as do_remove_comments from .util import garbage_collect TEXT, DATA, BSS = 'text', 'data', 'bss' @@ -283,8 +283,8 @@ def assembler_pass(self, lines): raise ValueError('Unknown opcode or directive: %s' % opcode) self.finalize_sections() - def assemble(self, text): - lines = remove_comments(text) + def assemble(self, text, remove_comments=True): + lines = do_remove_comments(text) if remove_comments else text.splitlines() self.init(1) # pass 1 is only to get the symbol table right self.assembler_pass(lines) self.symbols.set_bases(self.compute_bases()) diff --git a/tests/assemble.py b/tests/assemble.py index ac2d423..496d4a9 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -157,6 +157,24 @@ def test_assemble_evalulate_expressions(): assert a.symbols.get_sym('entry') == (REL, TEXT, 0) +def test_assemble_optional_comment_removal(): + line = " move r1, 123 # comment" + + a = Assembler() + + # first assemble as normal (comments will be removed by default) + a.assemble(line) + + # now assemble with comment removal disabled + try: + a.assemble(line, remove_comments=False) + except ValueError as e: + raised = True + else: + raised = False + assert raised + + def test_symbols(): st = SymbolTable({}, {}, {}) for entry in [ @@ -218,4 +236,5 @@ def test_symbols(): test_assemble_global() test_assemble_uppercase_opcode() test_assemble_evalulate_expressions() +test_assemble_optional_comment_removal() test_symbols() From 219f939a4242040a7209e3cb019f8076af15f3a8 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Sun, 25 Jul 2021 18:15:57 +0300 Subject: [PATCH 16/29] fix evaluation of expressions during first assembler pass During the first assembler pass, the SymbolTable does not yet have all symbols. During a symbol lookup the SymbolTable has so far returned a fake symbol for non-existing symbols, to make the assembler happy (values are not really being used during the first pass, so it's ok). However now that expressions are supported, when the symbol lookup encountered expressions during pass 1, it assumed those expressions were "not-yet-existing-symbols", which is of course incorrect as they will eventually be evaluated to integer values. Some opcodes were unhappy with receiving an expression during pass 1 (e.g. the req_wr opcode, which expects a sane address as a first argument). This commit simply skips creating instructions during the first pass, because all instructions are 32-bit (4 bytes) long anyway, so the content doesn't matter during that first assembler pass, which only measures section sizes. --- esp32_ulp/assemble.py | 2 +- tests/assemble.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 297ebb8..12fae70 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -277,7 +277,7 @@ def assembler_pass(self, lines): # machine instruction func = getattr(opcodes, 'i_' + opcode.lower(), None) if func is not None: - instruction = func(*args) + instruction = 0 if self.a_pass == 1 else func(*args) self.append_section(instruction.to_bytes(4, 'little'), TEXT) continue raise ValueError('Unknown opcode or directive: %s' % opcode) diff --git a/tests/assemble.py b/tests/assemble.py index 496d4a9..f1a5b45 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -175,6 +175,19 @@ def test_assemble_optional_comment_removal(): assert raised +def test_assemble_test_regressions_from_evaluation(): + line = " reg_wr (0x3ff48400 + 0x10), 1, 1, 1" + + a = Assembler() + raised = False + try: + a.assemble(line) + except ValueError as e: + if str(e) == 'invalid register base': # ensure we trapped the expected Exception + raised = True + assert not raised + + def test_symbols(): st = SymbolTable({}, {}, {}) for entry in [ @@ -237,4 +250,5 @@ def test_symbols(): test_assemble_uppercase_opcode() test_assemble_evalulate_expressions() test_assemble_optional_comment_removal() +test_assemble_test_regressions_from_evaluation() test_symbols() From 5c3eeb85529dd4d5dd271f6b8afeda924264cd64 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jul 2021 06:46:47 +0300 Subject: [PATCH 17/29] remove no-longer-needed pass dependent code from SymbolTable SymbolTable used the pass number to handle some special cases in pass 1 of assembling, by returning dummy values. Since the first pass no longer creates actual instructions, the SymbolTable no longer needs to do this and no longer needs to be aware of the pass the assembler is in. --- esp32_ulp/assemble.py | 21 ++------------------- tests/assemble.py | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 12fae70..9180d8a 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -16,10 +16,6 @@ def __init__(self, symbols, bases, globals): self._symbols = symbols self._bases = bases self._globals = globals - self._pass = None - - def set_pass(self, _pass): - self._pass = _pass def set_bases(self, bases): self._bases = bases @@ -40,13 +36,7 @@ def has_sym(self, symbol): return symbol in self._symbols def get_sym(self, symbol): - try: - entry = self._symbols[symbol] - except KeyError: - if self._pass == 1: - entry = (REL, TEXT, 0) # for a dummy, this is good enough - else: - raise + entry = self._symbols[symbol] return entry def dump(self): @@ -60,13 +50,7 @@ def export(self, incl_non_globals=False): return sorted(addrs_syms) def to_abs_addr(self, section, offset): - try: - base = self._bases[section] - except KeyError: - if self._pass == 1: - base = 0 # for a dummy this is good enough - else: - raise + base = self._bases[section] return base + offset def resolve_absolute(self, symbol): @@ -109,7 +93,6 @@ def __init__(self, symbols=None, bases=None, globls=None): def init(self, a_pass): self.a_pass = a_pass - self.symbols.set_pass(a_pass) self.sections = dict(text=[], data=[]) self.offsets = dict(text=0, data=0, bss=0) self.section = TEXT diff --git a/tests/assemble.py b/tests/assemble.py index f1a5b45..e607ba2 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -199,25 +199,28 @@ def test_symbols(): ]: st.set_sym(*entry) # PASS 1 ======================================================== - st.set_pass(1) assert st.has_sym('abs_t4') assert st.get_sym('abs_t4') == (ABS, TEXT, 4) assert not st.has_sym('notexist') - assert st.get_sym('notexist') == (REL, TEXT, 0) # pass1 -> dummy + try: + st.get_sym('notexist') # pass1 -> raises + except KeyError: + raised = True + else: + raised = False + assert raised assert st.resolve_absolute('abs_t4') == 4 - assert st.resolve_absolute('abs_d4') == 4 - assert st.resolve_absolute('rel_t4') == 4 - assert st.resolve_absolute('rel_d4') == 4 - assert st.resolve_absolute('const') == 123 - st.set_from(TEXT, 8) - assert st.resolve_relative('abs_t4') == -4 - assert st.resolve_relative('abs_d4') == -4 - assert st.resolve_relative('rel_t4') == -4 - assert st.resolve_relative('rel_d4') == -4 + try: + # relative symbols cannot be resolved, because in pass 1 section bases are not yet defined + st.resolve_absolute('rel_t4') + except KeyError: + raised = True + else: + raised = False + assert raised assert st.resolve_absolute('const') == 123 # PASS 2 ======================================================== st.set_bases({TEXT: 100, DATA: 200}) - st.set_pass(2) assert st.has_sym('abs_t4') assert st.get_sym('abs_t4') == (ABS, TEXT, 4) assert not st.has_sym('notexist') From 3e8c0d515392d53f7ff6a0d328d9cf9cade4192d Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jul 2021 06:53:02 +0300 Subject: [PATCH 18/29] add support for macros such as WRITE_RTC_REG This is a simplified implementation, rather than adding "proper" support for macros because we don't need more. The macros WRITE_RTC_REG, READ_RTC_REG, WRITE_RTC_FIELD and READ_RTC_FIELD are simply expanded in a predefined way. If they are also defined as macros in the source code, those macros in the source will be ignored. --- esp32_ulp/preprocess.py | 50 +++++++++++++++++++++++++++++++++++++++++ tests/preprocess.py | 12 ++++++++++ 2 files changed, 62 insertions(+) diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py index 12c13b1..1eae375 100644 --- a/esp32_ulp/preprocess.py +++ b/esp32_ulp/preprocess.py @@ -2,6 +2,34 @@ from .util import split_tokens +class RTC_Macros: + @staticmethod + def READ_RTC_REG(rtc_reg, low_bit, bit_width): + return '\treg_rd ' + ', '.join(( + rtc_reg, + '%s + %s - 1' % (low_bit, bit_width), + low_bit + )) + + @staticmethod + def WRITE_RTC_REG(rtc_reg, low_bit, bit_width, value): + args = ( + rtc_reg, + '%s + %s - 1' % (low_bit, bit_width), + low_bit, + value + ) + return '\treg_wr ' + ', '.join(args) + + @staticmethod + def READ_RTC_FIELD(rtc_reg, low_bit): + return RTC_Macros.READ_RTC_REG(rtc_reg, low_bit, 1) + + @staticmethod + def WRITE_RTC_FIELD(rtc_reg, low_bit, value): + return RTC_Macros.WRITE_RTC_REG(rtc_reg, low_bit, 1, value + ' & 1') + + class Preprocessor: def __init__(self): self._defines = {} @@ -42,12 +70,34 @@ def expand_defines(self, line): return line + def expand_rtc_macros(self, line): + clean_line = line.strip() + if not clean_line: + return line + + macro = clean_line.split('(', 1) + if len(macro) != 2: + return line + + macro_name, macro_args = macro + + macro_fn = getattr(RTC_Macros, macro_name, None) + if macro_fn is None: + return line + + macro_args, _ = macro_args.rsplit(')', 1) # trim away right bracket. safe as comments already stripped + macro_args = macro_args.split(',') # not safe when args contain ',' but we should not have those + macro_args = [x.strip() for x in macro_args] + + return macro_fn(*macro_args) + def preprocess(self, content): self.parse_defines(content) lines = nocomment.remove_comments(content) result = [] for line in lines: line = self.expand_defines(line) + line = self.expand_rtc_macros(line) result.append(line) result = "\n".join(result) return result diff --git a/tests/preprocess.py b/tests/preprocess.py index bfca066..a31fe1b 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -169,6 +169,18 @@ def preprocess_should_replace_defines_used_in_defines(): assert "move r1, (0x1234 << 4)" in p.preprocess(lines) +@test +def test_expand_rtc_macros(): + p = Preprocessor() + + assert p.expand_rtc_macros("") == "" + assert p.expand_rtc_macros("abc") == "abc" + assert p.expand_rtc_macros("WRITE_RTC_REG(1, 2, 3, 4)") == "\treg_wr 1, 2 + 3 - 1, 2, 4" + assert p.expand_rtc_macros("READ_RTC_REG(1, 2, 3)") == "\treg_rd 1, 2 + 3 - 1, 2" + assert p.expand_rtc_macros("WRITE_RTC_FIELD(1, 2, 3)") == "\treg_wr 1, 2 + 1 - 1, 2, 3 & 1" + assert p.expand_rtc_macros("READ_RTC_FIELD(1, 2)") == "\treg_rd 1, 2 + 1 - 1, 2" + + if __name__ == '__main__': # run all methods marked with @test for t in tests: From ac1de99fe6f29735c689a4c52665cb2257323705 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jul 2021 08:13:43 +0300 Subject: [PATCH 19/29] add simple include file processing This will not work on the ESP32 for large files, due to limited memory. But this is only the first step. Next we'll add a database for storing defines from include files. --- esp32_ulp/preprocess.py | 50 +++++++++++++++++++++++++---------------- tests/fixtures/incl.h | 5 +++++ tests/fixtures/incl2.h | 2 ++ tests/preprocess.py | 49 +++++++++++++++++++++++++++++----------- 4 files changed, 74 insertions(+), 32 deletions(-) create mode 100644 tests/fixtures/incl.h create mode 100644 tests/fixtures/incl2.h diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py index 1eae375..51ec8ef 100644 --- a/esp32_ulp/preprocess.py +++ b/esp32_ulp/preprocess.py @@ -32,29 +32,31 @@ def WRITE_RTC_FIELD(rtc_reg, low_bit, value): class Preprocessor: def __init__(self): + self._defines_db = None self._defines = {} + def parse_define_line(self, line): + line = line.strip() + if not line.startswith("#define"): + # skip lines not containing #define + return {} + line = line[8:].strip() # remove #define + parts = line.split(None, 1) + if len(parts) != 2: + # skip defines without value + return {} + identifier, value = parts + tmp = identifier.split('(', 1) + if len(tmp) == 2: + # skip parameterised defines (macros) + return {} + value = "".join(nocomment.remove_comments(value)).strip() + return {identifier: value} + def parse_defines(self, content): - result = {} for line in content.splitlines(): - line = line.strip() - if not line.startswith("#define"): - # skip lines not containing #define - continue - line = line[8:].strip() # remove #define - parts = line.split(None, 1) - if len(parts) != 2: - # skip defines without value - continue - identifier, value = parts - tmp = identifier.split('(', 1) - if len(tmp) == 2: - # skip parameterised defines (macros) - continue - value = "".join(nocomment.remove_comments(value)).strip() - result[identifier] = value - self._defines = result - return result + self._defines.update(self.parse_define_line(line)) + return self._defines def expand_defines(self, line): found = True @@ -70,6 +72,16 @@ def expand_defines(self, line): return line + def process_include_file(self, filename): + defines = self._defines + + with open(filename, 'r') as f: + for line in f: + result = self.parse_defines(line) + defines.update(result) + + return defines + def expand_rtc_macros(self, line): clean_line = line.strip() if not clean_line: diff --git a/tests/fixtures/incl.h b/tests/fixtures/incl.h new file mode 100644 index 0000000..5c8415e --- /dev/null +++ b/tests/fixtures/incl.h @@ -0,0 +1,5 @@ +#define CONST1 42 +#define MACRO(x,y) x+y +#define MULTI_LINE abc \ + xyz +#define CONST2 99 \ No newline at end of file diff --git a/tests/fixtures/incl2.h b/tests/fixtures/incl2.h new file mode 100644 index 0000000..09775d1 --- /dev/null +++ b/tests/fixtures/incl2.h @@ -0,0 +1,2 @@ +#define CONST2 123 +#define CONST3 777 \ No newline at end of file diff --git a/tests/preprocess.py b/tests/preprocess.py index a31fe1b..f9fe936 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -27,19 +27,19 @@ def replace_defines_should_return_remove_comments(): def test_parse_defines(): p = Preprocessor() - assert p.parse_defines("") == {} - assert p.parse_defines("// comment") == {} - assert p.parse_defines(" // comment") == {} - assert p.parse_defines(" /* comment */") == {} - assert p.parse_defines(" /* comment */ #define A 42") == {} # #define must be the first thing on a line - assert p.parse_defines("#define a 1") == {"a": "1"} - assert p.parse_defines(" #define a 1") == {"a": "1"} - assert p.parse_defines("#define a 1 2") == {"a": "1 2"} - assert p.parse_defines("#define f(a,b) 1") == {} # macros not supported - assert p.parse_defines("#define f(a, b) 1") == {} # macros not supported - assert p.parse_defines("#define f (a,b) 1") == {"f": "(a,b) 1"} # f is not a macro - assert p.parse_defines("#define f (a, b) 1") == {"f": "(a, b) 1"} # f is not a macro - assert p.parse_defines("#define RTC_ADDR 0x12345 // start of range") == {"RTC_ADDR": "0x12345"} + assert p.parse_define_line("") == {} + assert p.parse_define_line("// comment") == {} + assert p.parse_define_line(" // comment") == {} + assert p.parse_define_line(" /* comment */") == {} + assert p.parse_define_line(" /* comment */ #define A 42") == {} # #define must be the first thing on a line + assert p.parse_define_line("#define a 1") == {"a": "1"} + assert p.parse_define_line(" #define a 1") == {"a": "1"} + assert p.parse_define_line("#define a 1 2") == {"a": "1 2"} + assert p.parse_define_line("#define f(a,b) 1") == {} # macros not supported + assert p.parse_define_line("#define f(a, b) 1") == {} # macros not supported + assert p.parse_define_line("#define f (a,b) 1") == {"f": "(a,b) 1"} # f is not a macro + assert p.parse_define_line("#define f (a, b) 1") == {"f": "(a, b) 1"} # f is not a macro + assert p.parse_define_line("#define RTC_ADDR 0x12345 // start of range") == {"RTC_ADDR": "0x12345"} @test @@ -181,6 +181,29 @@ def test_expand_rtc_macros(): assert p.expand_rtc_macros("READ_RTC_FIELD(1, 2)") == "\treg_rd 1, 2 + 1 - 1, 2" +@test +def test_process_include_file(): + p = Preprocessor() + + defines = p.process_include_file('fixtures/incl.h') + assert defines['CONST1'] == '42' + assert defines['CONST2'] == '99' + assert defines.get('MULTI_LINE', None) == 'abc \\' # correct. line continuations not supported + assert 'MACRO' not in defines + + +@test +def test_process_include_file_with_multiple_files(): + p = Preprocessor() + + defines = p.process_include_file('fixtures/incl.h') + defines = p.process_include_file('fixtures/incl2.h') + + assert defines['CONST1'] == '42', "constant from incl.h" + assert defines['CONST2'] == '123', "constant overridden by incl2.h" + assert defines['CONST3'] == '777', "constant from incl2.h" + + if __name__ == '__main__': # run all methods marked with @test for t in tests: From 8d88fd1dd82b57746497687831db494060792f4a Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jul 2021 18:02:01 +0300 Subject: [PATCH 20/29] add support for using a btree database (DefinesDB) to store defines for preprocessing The btree module, which ships with MicroPython, can efficiency manage a large number of key-value pairs with minimal memory. It automatically initialises to appropriate memory and cache limits, based on the device it's running on, but if needed those parameters can be tuned too, e.g. to restrict memory usage further. The database is optional and must be supplied to the Preprocessor via the use_db() method. It's safe however to always supply it, because a non-existing database will behave like an empty database. Care is taken not to unnecessarily create an empty db, when only reading from it and not to unnecessarily check the file-system whether the database exists. Inside the Preprocessor the database is opened and closed with a context manager. This ensures the database will be closed properly again. While DefinesDB opens the underlying database automatically, it cannot automatically close the database again (using a destructor __del__ does not work, and MicroPython does not have the "atexit" exit handler on the esp32). By using a context manager, the code becomes cleaner, while still ensuring the database is closed at the end. --- esp32_ulp/definesdb.py | 75 +++++++++++++++++++++++++++++++++ esp32_ulp/preprocess.py | 61 ++++++++++++++++++++------- esp32_ulp/util.py | 10 +++++ tests/00_unit_tests.sh | 2 +- tests/definesdb.py | 60 ++++++++++++++++++++++++++ tests/preprocess.py | 93 +++++++++++++++++++++++++++++++++++++++++ tests/util.py | 16 ++++++- 7 files changed, 299 insertions(+), 18 deletions(-) create mode 100644 esp32_ulp/definesdb.py create mode 100644 tests/definesdb.py diff --git a/esp32_ulp/definesdb.py b/esp32_ulp/definesdb.py new file mode 100644 index 0000000..ce1d232 --- /dev/null +++ b/esp32_ulp/definesdb.py @@ -0,0 +1,75 @@ +import os +import btree +from .util import file_exists + +DBNAME = 'defines.db' + + +class DefinesDB: + def __init__(self): + self._file = None + self._db = None + self._db_exists = None + + def clear(self): + self.close() + try: + os.remove(DBNAME) + self._db_exists = False + except OSError: + pass + + def open(self): + if self._db: + return + try: + self._file = open(DBNAME, 'r+b') + except OSError: + self._file = open(DBNAME, 'w+b') + self._db = btree.open(self._file) + self._db_exists = True + + def close(self): + if not self._db: + return + self._db.close() + self._db = None + self._file.close() + self._file = None + + def db_exists(self): + if self._db_exists is None: + self._db_exists = file_exists(DBNAME) + return self._db_exists + + def update(self, dictionary): + for k, v in dictionary.items(): + self.__setitem__(k, v) + + def get(self, key, default): + try: + result = self.__getitem__(key) + except KeyError: + result = default + return result + + def keys(self): + if not self.db_exists(): + return [] + + self.open() + return [k.decode() for k in self._db.keys()] + + def __getitem__(self, key): + if not self.db_exists(): + raise KeyError + + self.open() + return self._db[key.encode()].decode() + + def __setitem__(self, key, value): + self.open() + self._db[key.encode()] = str(value).encode() + + def __iter__(self): + return iter(self.keys()) diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py index 51ec8ef..c0be8eb 100644 --- a/esp32_ulp/preprocess.py +++ b/esp32_ulp/preprocess.py @@ -1,5 +1,6 @@ from . import nocomment from .util import split_tokens +from .definesdb import DefinesDB class RTC_Macros: @@ -56,6 +57,7 @@ def parse_define_line(self, line): def parse_defines(self, content): for line in content.splitlines(): self._defines.update(self.parse_define_line(line)) + return self._defines def expand_defines(self, line): @@ -66,6 +68,8 @@ def expand_defines(self, line): line = "" for t in tokens: lu = self._defines.get(t, t) + if lu == t and self._defines_db: + lu = self._defines_db.get(t, t) if lu != t: found = True line += lu @@ -73,14 +77,13 @@ def expand_defines(self, line): return line def process_include_file(self, filename): - defines = self._defines - - with open(filename, 'r') as f: - for line in f: - result = self.parse_defines(line) - defines.update(result) + with self.open_db() as db: + with open(filename, 'r') as f: + for line in f: + result = self.parse_define_line(line) + db.update(result) - return defines + return db def expand_rtc_macros(self, line): clean_line = line.strip() @@ -103,17 +106,43 @@ def expand_rtc_macros(self, line): return macro_fn(*macro_args) + def use_db(self, defines_db): + self._defines_db = defines_db + + def open_db(self): + class ctx: + def __init__(self, db): + self._db = db + + def __enter__(self): + # not opening DefinesDB - it opens itself when needed + return self._db + + def __exit__(self, type, value, traceback): + if isinstance(self._db, DefinesDB): + self._db.close() + + if self._defines_db: + return ctx(self._defines_db) + + return ctx(self._defines) + def preprocess(self, content): self.parse_defines(content) - lines = nocomment.remove_comments(content) - result = [] - for line in lines: - line = self.expand_defines(line) - line = self.expand_rtc_macros(line) - result.append(line) - result = "\n".join(result) + + with self.open_db(): + lines = nocomment.remove_comments(content) + result = [] + for line in lines: + line = self.expand_defines(line) + line = self.expand_rtc_macros(line) + result.append(line) + result = "\n".join(result) + return result -def preprocess(content): - return Preprocessor().preprocess(content) +def preprocess(content, use_defines_db=True): + preprocessor = Preprocessor() + preprocessor.use_db(DefinesDB()) + return preprocessor.preprocess(content) diff --git a/esp32_ulp/util.py b/esp32_ulp/util.py index 8d2832f..0dacf72 100644 --- a/esp32_ulp/util.py +++ b/esp32_ulp/util.py @@ -1,6 +1,7 @@ DEBUG = False import gc +import os NORMAL, WHITESPACE = 0, 1 @@ -67,3 +68,12 @@ def validate_expression(param): if c not in '0123456789abcdef': state = 0 return True + + +def file_exists(filename): + try: + os.stat(filename) + return True + except OSError: + pass + return False diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index efd5b64..ee1a239 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,7 @@ set -e -for file in opcodes assemble link util preprocess; do +for file in opcodes assemble link util preprocess definesdb; do echo testing $file... micropython $file.py done diff --git a/tests/definesdb.py b/tests/definesdb.py new file mode 100644 index 0000000..5e2100c --- /dev/null +++ b/tests/definesdb.py @@ -0,0 +1,60 @@ +import os + +from esp32_ulp.definesdb import DefinesDB, DBNAME +from esp32_ulp.util import file_exists + +tests = [] + + +def test(param): + tests.append(param) + + +@test +def test_definesdb_clear_removes_all_keys(): + db = DefinesDB() + db.open() + db.update({'KEY1': 'VALUE1'}) + + db.clear() + + assert 'KEY1' not in db + + db.close() + + +@test +def test_definesdb_persists_data_across_instantiations(): + db = DefinesDB() + db.open() + db.clear() + + db.update({'KEY1': 'VALUE1'}) + + assert 'KEY1' in db + + db.close() + del db + db = DefinesDB() + db.open() + + assert db.get('KEY1', None) == 'VALUE1' + + db.close() + + +@test +def test_definesdb_should_not_create_a_db_file_when_only_reading(): + db = DefinesDB() + + db.clear() + assert not file_exists(DBNAME) + + assert db.get('some-key', None) is None + assert not file_exists(DBNAME) + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() diff --git a/tests/preprocess.py b/tests/preprocess.py index f9fe936..e275707 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -1,4 +1,8 @@ +import os + from esp32_ulp.preprocess import Preprocessor +from esp32_ulp.definesdb import DefinesDB, DBNAME +from esp32_ulp.util import file_exists tests = [] @@ -186,6 +190,7 @@ def test_process_include_file(): p = Preprocessor() defines = p.process_include_file('fixtures/incl.h') + assert defines['CONST1'] == '42' assert defines['CONST2'] == '99' assert defines.get('MULTI_LINE', None) == 'abc \\' # correct. line continuations not supported @@ -204,6 +209,94 @@ def test_process_include_file_with_multiple_files(): assert defines['CONST3'] == '777', "constant from incl2.h" +@test +def test_process_include_file_using_database(): + db = DefinesDB() + db.clear() + + p = Preprocessor() + p.use_db(db) + + p.process_include_file('fixtures/incl.h') + p.process_include_file('fixtures/incl2.h') + + assert db['CONST1'] == '42', "constant from incl.h" + assert db['CONST2'] == '123', "constant overridden by incl2.h" + assert db['CONST3'] == '777', "constant from incl2.h" + + db.close() + + +@test +def test_process_include_file_should_not_load_database_keys_into_instance_defines_dictionary(): + db = DefinesDB() + db.clear() + + p = Preprocessor() + p.use_db(db) + + p.process_include_file('fixtures/incl.h') + + # a bit hackish to reference instance-internal state + # but it's important to verify this, as we otherwise run out of memory on device + assert 'CONST2' not in p._defines + + + +@test +def test_preprocess_should_use_definesdb_when_provided(): + p = Preprocessor() + + content = """\ +#define LOCALCONST 42 + +entry: + move r1, LOCALCONST + move r2, DBKEY +""" + + # first try without db + result = p.preprocess(content) + + assert "move r1, 42" in result + assert "move r2, DBKEY" in result + assert "move r2, 99" not in result + + # now try with db + db = DefinesDB() + db.clear() + db.update({'DBKEY': '99'}) + p.use_db(db) + + result = p.preprocess(content) + + assert "move r1, 42" in result + assert "move r2, 99" in result + assert "move r2, DBKEY" not in result + + +@test +def test_preprocess_should_ensure_no_definesdb_is_created_when_only_reading_from_it(): + content = """\ + #define CONST 42 + move r1, CONST""" + + # remove any existing db + db = DefinesDB() + db.clear() + assert not file_exists(DBNAME) + + # now preprocess using db + p = Preprocessor() + p.use_db(db) + + result = p.preprocess(content) + + assert "move r1, 42" in result + + assert not file_exists(DBNAME) + + if __name__ == '__main__': # run all methods marked with @test for t in tests: diff --git a/tests/util.py b/tests/util.py index 18ab54e..009f3f1 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,4 +1,5 @@ -from esp32_ulp.util import split_tokens, validate_expression +import os +from esp32_ulp.util import split_tokens, validate_expression, file_exists tests = [] @@ -56,6 +57,19 @@ def test_validate_expression(): assert validate_expression('def cafe()') is False # valid hex digits, but potentially dangerous code +@test +def test_file_exists(): + testfile = '.testfile' + with open(testfile, 'w') as f: + f.write('contents') + + assert file_exists(testfile) + + os.remove(testfile) + + assert not file_exists(testfile) + + if __name__ == '__main__': # run all methods marked with @test for t in tests: From 46f1442b25353c854ad244669d378335f794c2c8 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jul 2021 23:05:48 +0300 Subject: [PATCH 21/29] add special handling for the BIT macro used in the esp-idf framework The functions the preprocessor supports (WRITE_RTC_*/READ_RTC_*) do not need the value returned by the BIT macro. Instead, they use the bit number specified to the BIT macro, i.e. for BIT(x) they need x. So this change handles BIT by simply replacing it with an empty string, and BIT(x) results in (x) in the preprocessor output. --- esp32_ulp/preprocess.py | 9 +++++++++ tests/preprocess.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py index c0be8eb..a890005 100644 --- a/esp32_ulp/preprocess.py +++ b/esp32_ulp/preprocess.py @@ -70,6 +70,15 @@ def expand_defines(self, line): lu = self._defines.get(t, t) if lu == t and self._defines_db: lu = self._defines_db.get(t, t) + if lu == t and t == 'BIT': + # Special hack: BIT(..) translates to a 32-bit mask where only the specified bit is set. + # But the reg_wr and reg_rd opcodes expect actual bit numbers for argument 2 and 3. + # While the real READ_RTC_*/WRITE_RTC_* macros take in the output of BIT(x), they + # ultimately convert these back (via helper macros) to the bit number (x). And since this + # preprocessor does not (aim to) implement "proper" macro-processing, we can simply + # short-circuit this round-trip via macros and replace "BIT" with nothing so that + # "BIT(x)" gets mapped to "(x)". + continue if lu != t: found = True line += lu diff --git a/tests/preprocess.py b/tests/preprocess.py index e275707..30f4e49 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -185,6 +185,21 @@ def test_expand_rtc_macros(): assert p.expand_rtc_macros("READ_RTC_FIELD(1, 2)") == "\treg_rd 1, 2 + 1 - 1, 2" +@test +def preprocess_should_replace_BIT_with_empty_string_unless_defined(): + # by default replace BIT with empty string (see description for why in the code) + src = " move r1, 0x123 << BIT(24)" + assert "move r1, 0x123 << (24)" in Preprocessor().preprocess(src) + + # but if BIT is defined, use that + src = """\ + #define BIT 12 + + move r1, BIT""" + + assert "move r1, 12" in Preprocessor().preprocess(src) + + @test def test_process_include_file(): p = Preprocessor() From 2f6ee78d156d7ee15b44119db2a738125989107c Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Wed, 28 Jul 2021 07:29:04 +0300 Subject: [PATCH 22/29] add include processor tool for populating a defines.db from include files --- esp32_ulp/parse_to_db.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 esp32_ulp/parse_to_db.py diff --git a/esp32_ulp/parse_to_db.py b/esp32_ulp/parse_to_db.py new file mode 100644 index 0000000..ac61f98 --- /dev/null +++ b/esp32_ulp/parse_to_db.py @@ -0,0 +1,23 @@ +import sys + +from .preprocess import Preprocessor +from .definesdb import DefinesDB + + +def parse(files): + db = DefinesDB() + + p = Preprocessor() + p.use_db(db) + + for f in files: + print('Processing file:', f) + + p.process_include_file(f) + + print('Done.') + + +if __name__ == '__main__': + parse(sys.argv[1:]) + From 69ae94696bc9b4334a7934fdb8bd5d744df8a769 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Wed, 28 Jul 2021 07:26:18 +0300 Subject: [PATCH 23/29] add compatibility tests using good example code off the net The test script will fetch the ESP-IDF framework to have all necessary include files, and will then fetch two sources of example code (ulptool and binutil-esp32_ulp's own test examples). The examples are fetched rather than duplicated into this repo, to avoid potential licensing and attribution issues. --- .github/workflows/run_tests.yaml | 10 ++- tests/02_compat_rtc_tests.sh | 118 +++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 1 deletion(-) create mode 100755 tests/02_compat_rtc_tests.sh diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 166a2e5..e9fdb6d 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -70,5 +70,13 @@ jobs: export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }} export PATH=$PATH:${{ steps.build_binutils.outputs.bin_dir }} cd tests - ln -s ../binutils-esp32ulp # already cloned earlier. reuse. ./01_compat_tests.sh + + - name: Run compat tests with RTC macros + id: compat_rtc_tests + run: | + export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }} + export PATH=$PATH:${{ steps.build_binutils.outputs.bin_dir }} + cd tests + ln -s ../binutils-esp32ulp # already cloned earlier. reuse. + ./02_compat_rtc_tests.sh diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh new file mode 100755 index 0000000..0f64864 --- /dev/null +++ b/tests/02_compat_rtc_tests.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# export PYTHONPATH=.:$PYTHONPATH + +set -e + +make_log_dir() { + mkdir -p log +} + +fetch_esp_idf() { + [ -d esp-idf ] && return + + echo "Fetching esp-idf" + log_file=log/fetch-esp-idf.log + git clone --depth 1 \ + https://github.com/espressif/esp-idf.git 1>$log_file 2>&1 +} + +fetch_ulptool_examples() { + [ -d ulptool ] && return + + echo "Fetching ulptool examples" + log_file=log/fetch-ulptool.log + git clone --depth 1 \ + https://github.com/duff2013/ulptool 1>$log_file 2>&1 +} + +fetch_binutils_esp32ulp_examples() { + [ -d binutils-esp32ulp ] && return + + echo "Fetching binutils-esp32ulp examples" + log_file=log/fetch-binutils.log + git clone --depth 1 \ + https://github.com/espressif/binutils-esp32ulp.git 1>$log_file 2>&1 +} + +build_defines_db() { + local defines_db=defines.db + + if [ "$1" = "-r" ] && [ -s "${defines_db}" ]; then + # reuse existing defines.db + return + fi + + echo "Building defines DB from include files" + log_file=log/build_defines_db.log + rm -f "${defines_db}" + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/*.h \ + esp-idf/components/esp_common/include/*.h 1>$log_file +} + +make_log_dir +fetch_esp_idf +fetch_ulptool_examples +fetch_binutils_esp32ulp_examples +build_defines_db $1 + +for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/gas/esp32ulp/esp32/*.s; do + + src_name="${src_file%.s}" + + echo "Testing $src_file" + + test_name="${src_name##*/}" + + # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix) + for I in rtcio esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do + if [ "${test_name}" = "$I" ]; then + # these are old bugs, and not related to the RTC macro handling functionality + # they will still be great to fix over time + echo -e "\tSkipping... known bugs in esp32_ulp" + continue 2 + fi + done + + # for now, skip files that contain unsupported things (macros) + for I in i2c i2c_dev stack i2c_wr test1 test_jumpr test_macro; do + if [ "${test_name}" = "$I" ]; then + echo -e "\tSkipping... not yet supported" + continue 2 + fi + done + + echo -e "\tBuilding using py-esp32-ulp" + ulp_file="${src_name}.ulp" + log_file="${src_name}.log" + micropython -m esp32_ulp $src_file 1>$log_file # generates $ulp_file + + pre_file="${src_name}.pre" + obj_file="${src_name}.o" + elf_file="${src_name}.elf" + bin_file="${src_name}.bin" + + echo -e "\tBuilding using binutils" + gcc -I esp-idf/components/soc/esp32/include -I esp-idf/components/esp_common/include \ + -x assembler-with-cpp \ + -E -o ${pre_file} $src_file + esp32ulp-elf-as -o $obj_file ${pre_file} + esp32ulp-elf-ld -T esp32.ulp.ld -o $elf_file $obj_file + esp32ulp-elf-objcopy -O binary $elf_file $bin_file + + if ! diff $ulp_file $bin_file 1>/dev/null; then + echo -e "\tBuild outputs differ!" + echo "" + echo "Compatibility test failed for $src_file" + echo "py-esp32-ulp log:" + cat $log_file + echo "py-esp32-ulp output:" + xxd $ulp_file + echo "binutils output:" + xxd $bin_file + exit 1 + else + echo -e "\tBuild outputs match" + fi +done From 4f90f762d2dbcea09fc8d3ecd0fda293bf74a935 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 29 Jul 2021 21:59:48 +0300 Subject: [PATCH 24/29] add documentation for the preprocessor This defines what the preprocessor aims to do, why and what its intentional limitations are. Examples on how to use it and how to use the "Defines DB" are also provided --- README.rst | 5 ++ docs/preprocess.rst | 138 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 docs/preprocess.rst diff --git a/README.rst b/README.rst index 56395d1..2afa421 100644 --- a/README.rst +++ b/README.rst @@ -24,6 +24,11 @@ Constants defined with ``.set`` are supported in expressions. We have some unit tests and also compatibility tests that compare the output whether it is identical with binutils-esp32ulp output. +There is a simple preprocessor that understands just enough to allow assembling +ULP source files containing convenience macros such as WRITE_RTC_REG. The +preprocessor and how to use it is documented here: +`Preprocessor support `_. + There might be some stuff missing, some bugs and other symptoms of alpha software. Also, error and exception handling is rather rough yet. diff --git a/docs/preprocess.rst b/docs/preprocess.rst new file mode 100644 index 0000000..0716e69 --- /dev/null +++ b/docs/preprocess.rst @@ -0,0 +1,138 @@ +Preprocessor +--------------------- + +py-esp32-ulp contains a small preprocessor, which aims to fulfill one goal: +facilitate assembling of ULP code from Espressif and other open-source +projects to loadable/executable machine code without modification. + +Such code uses convenience macros (``READ_RTC_*`` and ``WRITE_RTC_*``) +provided by the ESP-IDF framework, along with constants defined in the +framework's include files (such as ``RTC_GPIO_IN_REG``), to make reading +and writing from/to peripheral registers much easier. + +In order to do this the preprocessor has two capabilities: + +1. Parse and replace identifiers defined with ``#define`` +2. Recognise the ``WRITE_RTC_*`` and ``READ_RTC_*`` macros and expand + them in a way that mirrors what the real ESP-IDF macros do. + + +Usage +------------------------ + +Normally the assembler is called as follows + +.. code-block:: python + + src = "..full assembler file contents" + assembler = Assembler() + assembler.assemble(src) + ... + +With the preprocessor, simply pass the source code via the preprocessor first: + +.. code-block:: python + + from preprocess import preprocess + + src = "..full assembler file contents" + src = preprocess(src) + assembler = Assembler() + assembler.assemble(src) + ... + + +Using a "Defines Database" +-------------------------- + +Because the py-esp32-ulp assembler was built for running on the ESP32 +microcontroller with limited RAM, the preprocessor aims to work there too. + +To handle large number of defined constants (such as the ``RTC_*`` constants from +the ESP-IDF) the preprocessor can use a database (based on BerkleyDB) stored on the +device's filesystem for looking up defines. + +The database needs to be populated before preprocessing. (Usually, when only using +constants from the ESP-IDF, this is a one-time step, because the include files +don't change.) The database can be reused for all subsequent preprocessor runs. + +(The database can also be generated on a PC and then deployed to the ESP32, to +save processing effort on the device. In that case the include files themselves +are not needed on the device either.) + +1. Build the defines database + + The ``esp32_ulp.parse_to_db`` tool can be used to generate the defines + database from include files. The resulting file will be called + ``defines.db``. + + (The following assume running on a PC. To do this on device, refer to the + `esp32_ulp/parse_to_db.py <../esp32_ulp/parse_to_db.py>`_ file.) + + .. code-block:: bash + + # general command + micropython -m esp32_ulp.parse_to_db path/to/include.h + + # loading specific ESP-IDF include files + micropython -m esp32_ulp.parse_to_db esp-idf/components/soc/esp32/include/soc/soc_ulp.h + + # loading multiple files at once + micropython -m esp32_ulp.parse_to_db esp-idf/components/soc/esp32/include/soc/*.h + + # if file system space is not a concern, the following can be convenient + # by including all relevant include files from the ESP-IDF framework. + # This results in an approximately 2MB large database. + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/*.h \ + esp-idf/components/esp_common/include/*.h + + # most ULP code uses only 5 include files. Parsing only those into the + # database should thus allow assembling virtually all ULP code one would + # find or want to write. + # This results in an approximately 250kB large database. + micropython -m esp32_ulp.parse_to_db \ + esp-idf/components/soc/esp32/include/soc/{soc,soc_ulp,rtc_cntl_reg,rtc_io_reg,sens_reg}.h + +2. Using the defines database during preprocessing + + The preprocessor will automatically use a defines database, when using the + ``preprocess.preprocess`` convenience function, even when the database does + not exist (an absent database is treated like an empty database, and care + is taken not to create an empty database file, cluttering up the filesystem, + when not needed). + + If you do not want the preprocessor use use a DefinesDB, pass ``False`` to + the ``use_defines_db`` argument of the ``preprocess`` convenience function, + or instantiate the ``Preprocessor`` class directly, without passing it a + DefinesDB instance via ``use_db``. + +Design choices +-------------- + +The preprocessor does not support: + +1. Function style macros such as :code:`#define f(a,b) (a+b)` + + This is not important, because there are only few RTC macros that need + to be supported and they are simply implemented as Python functions. + + Since the preprocessor will understand ``#define`` directives directly in the + assembler source file, include mechanisms are not needed in some cases + (simply copying the needed ``#define`` statements from include files into the + assembler source will work). + +2. ``#include`` directives + + The preprocessor does not currently follow ``#include`` directives. To + limit space requirements (both in memory and on the filesystem), the + preprocessor relies on a database of defines (key/value pairs). This + database should be populated before using the preprocessor, by using the + ``esp32_ulp.parse_to_db`` tool (see section above), which parses include + files for identifiers defined therein. + +3. Preserving comments + + The assumption is that the output will almost always go into the + assembler directly, so preserving comments is not very useful and + would add a lot of complexity. From d44384f1790d173fa2a1ff390da0948196f37d26 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Wed, 28 Jul 2021 07:52:02 +0300 Subject: [PATCH 25/29] fix use of treg field in i_move instruction to match binutils-esp32 output in all cases This fix makes compat tests pass for: https://github.com/duff2013/ulptool/blob/master/src/ulp_examples/ulp_rtc_gpio/rtcio.s --- esp32_ulp/opcodes.py | 2 +- tests/02_compat_rtc_tests.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 59006f2..10b5bd5 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -482,7 +482,7 @@ def i_move(reg_dest, reg_imm_src): if src.type == REG: _alu_reg.dreg = dest _alu_reg.sreg = src.value - _alu_reg.treg = 1 # XXX undocumented, this is the value binutils-esp32 uses + _alu_reg.treg = src.value # XXX undocumented, this is the value binutils-esp32 uses _alu_reg.unused = 0 _alu_reg.sel = ALU_SEL_MOV _alu_reg.sub_opcode = SUB_OPCODE_ALU_REG diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh index 0f64864..2904ee6 100755 --- a/tests/02_compat_rtc_tests.sh +++ b/tests/02_compat_rtc_tests.sh @@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/g test_name="${src_name##*/}" # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix) - for I in rtcio esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do + for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do if [ "${test_name}" = "$I" ]; then # these are old bugs, and not related to the RTC macro handling functionality # they will still be great to fix over time From 254adf983fcd7bfb7c673b91f3e7fbab2c97ae0d Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Wed, 28 Jul 2021 09:18:50 +0300 Subject: [PATCH 26/29] allow specifying the address for reg_rd and reg_wr in 32-bit words This change allows specifying the address in 32-bit words (i.e. the address as seen from the ULP), in addition to the existing mode of specifying a register's full address on the DPORT bus. If an address is between 0 and DR_REG_MAX_DIRECT (0x3ff), treat it as a word offset (ULP address), otherwise treat it as a full address on the DPORT bus as before. --- esp32_ulp/opcodes.py | 16 ++++++++++++---- tests/compat/fixes.S | 5 +++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 10b5bd5..8a5b6d7 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -16,6 +16,7 @@ OPCODE_WR_REG = 1 OPCODE_RD_REG = 2 +DR_REG_MAX_DIRECT = 0x3ff RD_REG_PERIPH_RTC_CNTL = 0 RD_REG_PERIPH_RTC_IO = 1 RD_REG_PERIPH_SENS = 2 @@ -353,8 +354,9 @@ def get_cond(arg): def _soc_reg_to_ulp_periph_sel(reg): # Map SoC peripheral register to periph_sel field of RD_REG and WR_REG instructions. - ret = 3 - if reg < DR_REG_RTCCNTL_BASE: + if reg < DR_REG_MAX_DIRECT: + ret = RD_REG_PERIPH_RTC_CNTL + elif reg < DR_REG_RTCCNTL_BASE: raise ValueError("invalid register base") elif reg < DR_REG_RTCIO_BASE: ret = RD_REG_PERIPH_RTC_CNTL @@ -371,7 +373,10 @@ def _soc_reg_to_ulp_periph_sel(reg): def i_reg_wr(reg, high_bit, low_bit, val): reg = get_imm(reg) - _wr_reg.addr = (reg & 0xff) >> 2 + if reg < DR_REG_MAX_DIRECT: # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c + _wr_reg.addr = reg + else: + _wr_reg.addr = (reg & 0xff) >> 2 _wr_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg) _wr_reg.data = get_imm(val) _wr_reg.low = get_imm(low_bit) @@ -382,7 +387,10 @@ def i_reg_wr(reg, high_bit, low_bit, val): def i_reg_rd(reg, high_bit, low_bit): reg = get_imm(reg) - _rd_reg.addr = (reg & 0xff) >> 2 + if reg < DR_REG_MAX_DIRECT: # see https://github.com/espressif/binutils-esp32ulp/blob/master/gas/config/tc-esp32ulp_esp32.c + _rd_reg.addr = reg + else: + _rd_reg.addr = (reg & 0xff) >> 2 _rd_reg.periph_sel = _soc_reg_to_ulp_periph_sel(reg) _rd_reg.unused = 0 _rd_reg.low = get_imm(low_bit) diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S index 0c84f1b..022951a 100644 --- a/tests/compat/fixes.S +++ b/tests/compat/fixes.S @@ -17,4 +17,9 @@ counter: entry: MOVE R1, gpio WAIT 42 + + # reg_rd/reg_wr with "short" and "long" address notation + reg_rd 12, 7, 0 + reg_rd 0x3ff48000, 7, 0 + halt From c3bd1010746324b9409e81360f75715c10d15d37 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Thu, 29 Jul 2021 21:31:00 +0300 Subject: [PATCH 27/29] support .int data type .long and .int are the same as per GNU assembler manual: https://sourceware.org/binutils/docs/as/Long.html binutils-esp32ulp also treats them the same (compat test included to verify this) --- esp32_ulp/assemble.py | 5 +++++ tests/compat/fixes.S | 3 +++ 2 files changed, 8 insertions(+) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 9180d8a..7a92a8e 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -240,6 +240,11 @@ def d_word(self, *args): self.append_data(2, args) def d_long(self, *args): + self.d_int(*args) + + def d_int(self, *args): + # .long and .int are identical as per GNU assembler documentation + # https://sourceware.org/binutils/docs/as/Long.html self.append_data(4, args) def assembler_pass(self, lines): diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S index 022951a..9e4d0ef 100644 --- a/tests/compat/fixes.S +++ b/tests/compat/fixes.S @@ -12,6 +12,9 @@ counter: .long 0 +.data +var2: .int 1111 + .text .global entry entry: From 2a0a39a810c70218a02c7ec9e0b33945ba064e23 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 9 Aug 2021 19:45:36 +0300 Subject: [PATCH 28/29] refactor: small improvements based on PR comments. --- esp32_ulp/__main__.py | 2 +- esp32_ulp/assemble.py | 12 +++++++++--- esp32_ulp/definesdb.py | 7 +++++-- esp32_ulp/opcodes.py | 6 +++--- esp32_ulp/preprocess.py | 5 ++--- esp32_ulp/util.py | 4 ++-- tests/compat/expr.S | 12 ++++++++---- tests/fixtures/incl.h | 2 +- tests/fixtures/incl2.h | 2 +- tests/preprocess.py | 20 ++++++++++++++++++++ 10 files changed, 52 insertions(+), 20 deletions(-) diff --git a/esp32_ulp/__main__.py b/esp32_ulp/__main__.py index d9555fd..209656f 100644 --- a/esp32_ulp/__main__.py +++ b/esp32_ulp/__main__.py @@ -10,6 +10,7 @@ def src_to_binary(src): assembler = Assembler() + src = preprocess(src) assembler.assemble(src, remove_comments=False) # comments already removed by preprocessor garbage_collect('before symbols export') addrs_syms = assembler.symbols.export() @@ -24,7 +25,6 @@ def main(fn): with open(fn) as f: src = f.read() - src = preprocess(src) binary = src_to_binary(src) if fn.endswith('.s') or fn.endswith('.S'): diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 7a92a8e..e348363 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -87,8 +87,8 @@ def set_global(self, symbol): class Assembler: - def __init__(self, symbols=None, bases=None, globls=None): - self.symbols = SymbolTable(symbols or {}, bases or {}, globls or {}) + def __init__(self, symbols=None, bases=None, globals=None): + self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {}) opcodes.symbols = self.symbols # XXX dirty hack def init(self, a_pass): @@ -223,7 +223,7 @@ def d_align(self, align=4, fill=None): self.fill(self.section, amount, fill) def d_set(self, symbol, expr): - value = int(opcodes.eval_arg(expr)) # TODO: support more than just integers + value = int(opcodes.eval_arg(expr)) self.symbols.set_sym(symbol, ABS, None, value) def d_global(self, symbol): @@ -265,6 +265,12 @@ def assembler_pass(self, lines): # machine instruction func = getattr(opcodes, 'i_' + opcode.lower(), None) if func is not None: + # during the first pass, symbols are not all known yet. + # so some expressions may not evaluate to something (yet). + # instruction building requires sane arguments however. + # since all instructions are 4 bytes long, we simply skip + # building instructions during pass 1, and append an "empty + # instruction" to the section to get the right section size. instruction = 0 if self.a_pass == 1 else func(*args) self.append_section(instruction.to_bytes(4, 'little'), TEXT) continue diff --git a/esp32_ulp/definesdb.py b/esp32_ulp/definesdb.py index ce1d232..4a05459 100644 --- a/esp32_ulp/definesdb.py +++ b/esp32_ulp/definesdb.py @@ -19,8 +19,11 @@ def clear(self): except OSError: pass + def is_open(self): + return self._db is not None + def open(self): - if self._db: + if self.is_open(): return try: self._file = open(DBNAME, 'r+b') @@ -30,7 +33,7 @@ def open(self): self._db_exists = True def close(self): - if not self._db: + if not self.is_open(): return self._db.close() self._db = None diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 8a5b6d7..103b1f7 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -307,10 +307,10 @@ def arg_qualify(arg): pass try: entry = symbols.get_sym(arg) - return ARG(SYM, entry, arg) except KeyError: - pass - return ARG(IMM, int(eval_arg(arg)), arg) + return ARG(IMM, int(eval_arg(arg)), arg) + else: + return ARG(SYM, entry, arg) def get_reg(arg): diff --git a/esp32_ulp/preprocess.py b/esp32_ulp/preprocess.py index a890005..03a9317 100644 --- a/esp32_ulp/preprocess.py +++ b/esp32_ulp/preprocess.py @@ -14,13 +14,12 @@ def READ_RTC_REG(rtc_reg, low_bit, bit_width): @staticmethod def WRITE_RTC_REG(rtc_reg, low_bit, bit_width, value): - args = ( + return '\treg_wr ' + ', '.join(( rtc_reg, '%s + %s - 1' % (low_bit, bit_width), low_bit, value - ) - return '\treg_wr ' + ', '.join(args) + )) @staticmethod def READ_RTC_FIELD(rtc_reg, low_bit): diff --git a/esp32_ulp/util.py b/esp32_ulp/util.py index 0dacf72..d79c538 100644 --- a/esp32_ulp/util.py +++ b/esp32_ulp/util.py @@ -19,14 +19,14 @@ def split_tokens(line): tokens = [] state = NORMAL for c in line: - if ('a' <= c <= 'z') or ('A' <= c <= 'Z') or ('0' <= c <= '9') or c == '_': + if c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_": if state != NORMAL: if len(buf) > 0: tokens.append(buf) buf = "" state = NORMAL buf += c - elif c == ' ' or c == '\t': + elif c in " \t": if state != WHITESPACE: if len(buf) > 0: tokens.append(buf) diff --git a/tests/compat/expr.S b/tests/compat/expr.S index 48f7304..3650623 100644 --- a/tests/compat/expr.S +++ b/tests/compat/expr.S @@ -1,3 +1,4 @@ +# common example of real world code using expressions .set adc_channel, 6 .set adc_oversampling_factor_log, 2 @@ -26,7 +27,13 @@ measure: move r3, result st r0, r3, 0 - #test that expressions evaluate correctly for all supported operators +exit: + halt + + +# --- +# test that expressions evaluate correctly for all supported operators +# (these statements do not mean anything other than testing the operations) move r3, 1+2 move r3, 3-5 move r3, -5 @@ -39,6 +46,3 @@ measure: move r3, 0x1234 & ~2 move r3, 42|4&0xf # 46 (4&0xf is evaluated first) move r3, (42|4)&0xf # 14 (42|4 is evaluated first) - -exit: - halt diff --git a/tests/fixtures/incl.h b/tests/fixtures/incl.h index 5c8415e..712aa7c 100644 --- a/tests/fixtures/incl.h +++ b/tests/fixtures/incl.h @@ -2,4 +2,4 @@ #define MACRO(x,y) x+y #define MULTI_LINE abc \ xyz -#define CONST2 99 \ No newline at end of file +#define CONST2 99 diff --git a/tests/fixtures/incl2.h b/tests/fixtures/incl2.h index 09775d1..d19aeba 100644 --- a/tests/fixtures/incl2.h +++ b/tests/fixtures/incl2.h @@ -1,2 +1,2 @@ #define CONST2 123 -#define CONST3 777 \ No newline at end of file +#define CONST3 777 diff --git a/tests/preprocess.py b/tests/preprocess.py index 30f4e49..5a3825d 100644 --- a/tests/preprocess.py +++ b/tests/preprocess.py @@ -312,6 +312,26 @@ def test_preprocess_should_ensure_no_definesdb_is_created_when_only_reading_from assert not file_exists(DBNAME) +@test +def test_preprocess_should_ensure_the_definesdb_is_properly_closed_after_use(): + content = """\ + #define CONST 42 + move r1, CONST""" + + # remove any existing db + db = DefinesDB() + db.open() + assert db.is_open() + + # now preprocess using db + p = Preprocessor() + p.use_db(db) + + p.preprocess(content) + + assert not db.is_open() + + if __name__ == '__main__': # run all methods marked with @test for t in tests: From 47d5e8a9e9e309cd8e50eeb9f8d8f36a7f67055a Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 9 Aug 2021 20:22:22 +0300 Subject: [PATCH 29/29] Updated LICENSE file and added AUTHORS file --- AUTHORS | 8 ++++++++ LICENSE | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 AUTHORS diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..571f8ee --- /dev/null +++ b/AUTHORS @@ -0,0 +1,8 @@ +E-mail addresses listed here are not intended for support. + +py-esp32-ulp authors +-------------------- +py-esp32-ulp is written and maintained by Thomas Waldmann and various contributors: + +- Thomas Waldmann +- Wilko Nienhaus diff --git a/LICENSE b/LICENSE index 6fc734f..46bf124 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2018 Thomas Waldmann +Copyright 2018-2021 by the py-esp32-ulp authors, see AUTHORS file Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal