3
3
"""
4
4
5
5
from . import opcodes
6
- from .nocomment import remove_comments
6
+ from .nocomment import remove_comments as do_remove_comments
7
7
from .util import garbage_collect
8
8
9
9
TEXT , DATA , BSS = 'text' , 'data' , 'bss'
12
12
13
13
14
14
class SymbolTable :
15
- def __init__ (self , symbols , bases ):
15
+ def __init__ (self , symbols , bases , globals ):
16
16
self ._symbols = symbols
17
17
self ._bases = bases
18
- self ._pass = None
19
-
20
- def set_pass (self , _pass ):
21
- self ._pass = _pass
18
+ self ._globals = globals
22
19
23
20
def set_bases (self , bases ):
24
21
self ._bases = bases
@@ -32,38 +29,28 @@ def get_from(self):
32
29
def set_sym (self , symbol , stype , section , value ):
33
30
entry = (stype , section , value )
34
31
if symbol in self ._symbols and entry != self ._symbols [symbol ]:
35
- raise Exception ('redefining symbol %s with different value %r -> %r.' % (label , self ._symbols [symbol ], entry ))
32
+ raise Exception ('redefining symbol %s with different value %r -> %r.' % (symbol , self ._symbols [symbol ], entry ))
36
33
self ._symbols [symbol ] = entry
37
34
38
35
def has_sym (self , symbol ):
39
36
return symbol in self ._symbols
40
37
41
38
def get_sym (self , symbol ):
42
- try :
43
- entry = self ._symbols [symbol ]
44
- except KeyError :
45
- if self ._pass == 1 :
46
- entry = (REL , TEXT , 0 ) # for a dummy, this is good enough
47
- else :
48
- raise
39
+ entry = self ._symbols [symbol ]
49
40
return entry
50
41
51
42
def dump (self ):
52
43
for symbol , entry in self ._symbols .items ():
53
44
print (symbol , entry )
54
45
55
- def export (self ):
56
- addrs_syms = [(self .resolve_absolute (entry ), symbol ) for symbol , entry in self ._symbols .items ()]
46
+ def export (self , incl_non_globals = False ):
47
+ addrs_syms = [(self .resolve_absolute (entry ), symbol )
48
+ for symbol , entry in self ._symbols .items ()
49
+ if incl_non_globals or symbol in self ._globals ]
57
50
return sorted (addrs_syms )
58
51
59
52
def to_abs_addr (self , section , offset ):
60
- try :
61
- base = self ._bases [section ]
62
- except KeyError :
63
- if self ._pass == 1 :
64
- base = 0 # for a dummy this is good enough
65
- else :
66
- raise
53
+ base = self ._bases [section ]
67
54
return base + offset
68
55
69
56
def resolve_absolute (self , symbol ):
@@ -93,16 +80,19 @@ def resolve_relative(self, symbol):
93
80
from_addr = self .to_abs_addr (self ._from_section , self ._from_offset )
94
81
return sym_addr - from_addr
95
82
83
+ def set_global (self , symbol ):
84
+ self ._globals [symbol ] = True
85
+ pass
86
+
96
87
97
88
class Assembler :
98
89
99
- def __init__ (self , symbols = None , bases = None ):
100
- self .symbols = SymbolTable (symbols or {}, bases or {})
90
+ def __init__ (self , symbols = None , bases = None , globals = None ):
91
+ self .symbols = SymbolTable (symbols or {}, bases or {}, globals or {} )
101
92
opcodes .symbols = self .symbols # XXX dirty hack
102
93
103
94
def init (self , a_pass ):
104
95
self .a_pass = a_pass
105
- self .symbols .set_pass (a_pass )
106
96
self .sections = dict (text = [], data = [])
107
97
self .offsets = dict (text = 0 , data = 0 , bss = 0 )
108
98
self .section = TEXT
@@ -118,7 +108,7 @@ def parse_line(self, line):
118
108
"""
119
109
if not line :
120
110
return
121
- has_label = line [0 ] not in '\t '
111
+ has_label = line [0 ] not in '\t . '
122
112
if has_label :
123
113
label_line = line .split (None , 1 )
124
114
if len (label_line ) == 2 :
@@ -150,8 +140,10 @@ def append_section(self, value, expected_section=None):
150
140
if expected_section is not None and s is not expected_section :
151
141
raise TypeError ('only allowed in %s section' % expected_section )
152
142
if s is BSS :
153
- # just increase BSS size by value
154
- self .offsets [s ] += value
143
+ if int .from_bytes (value , 'little' ) != 0 :
144
+ raise ValueError ('attempt to store non-zero value in section .bss' )
145
+ # just increase BSS size by length of value
146
+ self .offsets [s ] += len (value )
155
147
else :
156
148
self .sections [s ].append (value )
157
149
self .offsets [s ] += len (value )
@@ -231,9 +223,12 @@ def d_align(self, align=4, fill=None):
231
223
self .fill (self .section , amount , fill )
232
224
233
225
def d_set (self , symbol , expr ):
234
- value = int (expr ) # TODO: support more than just integers
226
+ value = int (opcodes . eval_arg ( expr ))
235
227
self .symbols .set_sym (symbol , ABS , None , value )
236
228
229
+ def d_global (self , symbol ):
230
+ self .symbols .set_global (symbol )
231
+
237
232
def append_data (self , wordlen , args ):
238
233
data = [int (arg ).to_bytes (wordlen , 'little' ) for arg in args ]
239
234
self .append_section (b'' .join (data ))
@@ -245,6 +240,11 @@ def d_word(self, *args):
245
240
self .append_data (2 , args )
246
241
247
242
def d_long (self , * args ):
243
+ self .d_int (* args )
244
+
245
+ def d_int (self , * args ):
246
+ # .long and .int are identical as per GNU assembler documentation
247
+ # https://sourceware.org/binutils/docs/as/Long.html
248
248
self .append_data (4 , args )
249
249
250
250
def assembler_pass (self , lines ):
@@ -263,16 +263,22 @@ def assembler_pass(self, lines):
263
263
continue
264
264
else :
265
265
# machine instruction
266
- func = getattr (opcodes , 'i_' + opcode , None )
266
+ func = getattr (opcodes , 'i_' + opcode . lower () , None )
267
267
if func is not None :
268
- instruction = func (* args )
268
+ # during the first pass, symbols are not all known yet.
269
+ # so some expressions may not evaluate to something (yet).
270
+ # instruction building requires sane arguments however.
271
+ # since all instructions are 4 bytes long, we simply skip
272
+ # building instructions during pass 1, and append an "empty
273
+ # instruction" to the section to get the right section size.
274
+ instruction = 0 if self .a_pass == 1 else func (* args )
269
275
self .append_section (instruction .to_bytes (4 , 'little' ), TEXT )
270
276
continue
271
- raise Exception ('Unknown opcode or directive: %s' % opcode )
277
+ raise ValueError ('Unknown opcode or directive: %s' % opcode )
272
278
self .finalize_sections ()
273
279
274
- def assemble (self , text ):
275
- lines = remove_comments (text )
280
+ def assemble (self , text , remove_comments = True ):
281
+ lines = do_remove_comments (text ) if remove_comments else text . splitlines ( )
276
282
self .init (1 ) # pass 1 is only to get the symbol table right
277
283
self .assembler_pass (lines )
278
284
self .symbols .set_bases (self .compute_bases ())
0 commit comments