Skip to content

Commit d38263b

Browse files
authored
separately track unsafe getters (#29)
1 parent f50b9ef commit d38263b

File tree

9 files changed

+138
-38
lines changed

9 files changed

+138
-38
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c
55
@mkdir -p lib
66
../wasi-sdk-11.0/bin/clang src/lexer.c -I include-wasm --sysroot=../wasi-sdk-11.0/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
77
-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\
8-
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree \
8+
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ru,--export=us,--export=ue \
99
-Wno-logical-op-parentheses -Wno-parentheses \
1010
-Oz
1111

README.md

+46-22
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE)
8686
8787
EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN
8888
89-
EXPORTS_DEFINE: `Object` `.` `defineProperty `(` IDENTIFIER_STRING `, {`
89+
EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` IDENTIFIER_STRING
90+
91+
EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {`
9092
(`enumerable: true,`)?
9193
(
9294
`value:` |
@@ -121,7 +123,9 @@ EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2
121123

122124
Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment.
123125

124-
* The returned export names are taken to be the combination of the `IDENTIFIER` and `IDENTIFIER_STRING` slots for all `EXPORTS_MEMBER`, `EXPORTS_LITERAL` and `EXPORTS_DEFINE` matches.
126+
* The returned export names are taken to be the combination of:
127+
1. All `IDENTIFIER` and `IDENTIFIER_STRING` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches.
128+
2. The first `IDENTIFIER_STRING` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match.
125129
* The reexport specifiers are taken to be the the combination of:
126130
1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`.
127131
2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`.
@@ -162,6 +166,8 @@ It will in turn underclassify in cases where the identifiers are renamed:
162166
})(exports);
163167
```
164168

169+
#### Getter Exports Parsing
170+
165171
`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression:
166172

167173
```js
@@ -188,6 +194,24 @@ Object.defineProperty(exports, 'd', { value: 'd' });
188194
Object.defineProperty(exports, '__esModule', { value: true });
189195
```
190196

197+
To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will
198+
opt-out of the getter matching:
199+
200+
```js
201+
// DETECTS: NO EXPORTS
202+
Object.defineProperty(exports, 'a', {
203+
value: 'no problem'
204+
});
205+
206+
if (false) {
207+
Object.defineProperty(module.exports, 'a', {
208+
get () {
209+
return dynamic();
210+
}
211+
})
212+
}
213+
```
214+
191215
Alternative object definition structures or getter function bodies are not detected:
192216

193217
```js
@@ -337,63 +361,63 @@ JS Build:
337361

338362
```
339363
Module load time
340-
> 5ms
364+
> 4ms
341365
Cold Run, All Samples
342366
test/samples/*.js (3635 KiB)
343-
> 323ms
367+
> 299ms
344368
345369
Warm Runs (average of 25 runs)
346370
test/samples/angular.js (1410 KiB)
347-
> 14.84ms
371+
> 13.96ms
348372
test/samples/angular.min.js (303 KiB)
349-
> 4.8ms
373+
> 4.72ms
350374
test/samples/d3.js (553 KiB)
351-
> 7.84ms
375+
> 6.76ms
352376
test/samples/d3.min.js (250 KiB)
353377
> 4ms
354378
test/samples/magic-string.js (34 KiB)
355-
> 0.72ms
379+
> 0.64ms
356380
test/samples/magic-string.min.js (20 KiB)
357-
> 0.4ms
381+
> 0ms
358382
test/samples/rollup.js (698 KiB)
359-
> 9.32ms
383+
> 8.48ms
360384
test/samples/rollup.min.js (367 KiB)
361-
> 6.52ms
385+
> 5.36ms
362386
363387
Warm Runs, All Samples (average of 25 runs)
364388
test/samples/*.js (3635 KiB)
365-
> 44ms
389+
> 40.28ms
366390
```
367391

368392
Wasm Build:
369393
```
370394
Module load time
371-
> 11ms
395+
> 10ms
372396
Cold Run, All Samples
373397
test/samples/*.js (3635 KiB)
374-
> 42ms
398+
> 43ms
375399
376400
Warm Runs (average of 25 runs)
377401
test/samples/angular.js (1410 KiB)
378-
> 9.92ms
402+
> 9.32ms
379403
test/samples/angular.min.js (303 KiB)
380-
> 3.2ms
404+
> 3.16ms
381405
test/samples/d3.js (553 KiB)
382-
> 5.2ms
406+
> 5ms
383407
test/samples/d3.min.js (250 KiB)
384-
> 2.52ms
408+
> 2.32ms
385409
test/samples/magic-string.js (34 KiB)
386410
> 0.16ms
387411
test/samples/magic-string.min.js (20 KiB)
388-
> 0.04ms
412+
> 0ms
389413
test/samples/rollup.js (698 KiB)
390-
> 6.44ms
414+
> 6.28ms
391415
test/samples/rollup.min.js (367 KiB)
392-
> 3.96ms
416+
> 3.6ms
393417
394418
Warm Runs, All Samples (average of 25 runs)
395419
test/samples/*.js (3635 KiB)
396-
> 30.48ms
420+
> 27.76ms
397421
```
398422

399423
### Wasm Build Steps

bench/index.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Promise.resolve().then(async () => {
3131
console.log('Module load time');
3232
{
3333
const start = process.hrtime.bigint();
34-
var { default: parse } = await import('../lexer.js');
34+
var { parse } = await import('../lexer.js');
3535
console.log(`> ${c.bold.green(Math.round(Number(process.hrtime.bigint() - start) / 1e6) + 'ms')}`);
3636
}
3737

include-wasm/cjs-module-lexer.h

+38-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Slice* export_write_head = NULL;
3030
Slice* first_reexport = NULL;
3131
Slice* reexport_read_head = NULL;
3232
Slice* reexport_write_head = NULL;
33+
Slice* first_unsafe_getter = NULL;
34+
Slice* unsafe_getter_read_head = NULL;
35+
Slice* unsafe_getter_write_head = NULL;
3336
void* analysis_base;
3437
void* analysis_head;
3538

@@ -48,6 +51,9 @@ const uint16_t* sa (uint32_t utf16Len) {
4851
first_reexport = NULL;
4952
reexport_write_head = NULL;
5053
reexport_read_head = NULL;
54+
first_unsafe_getter = NULL;
55+
unsafe_getter_write_head = NULL;
56+
unsafe_getter_read_head = NULL;
5157
return source;
5258
}
5359

@@ -72,6 +78,14 @@ uint32_t res () {
7278
uint32_t ree () {
7379
return reexport_read_head->end - source;
7480
}
81+
// getUnsafeGetterStart
82+
uint32_t us () {
83+
return unsafe_getter_read_head->start - source;
84+
}
85+
// getUnsafeGetterEnd
86+
uint32_t ue () {
87+
return unsafe_getter_read_head->end - source;
88+
}
7589
// readExport
7690
bool re () {
7791
if (export_read_head == NULL)
@@ -92,6 +106,16 @@ bool rre () {
92106
return false;
93107
return true;
94108
}
109+
// readUnsafeGetter
110+
bool ru () {
111+
if (unsafe_getter_read_head == NULL)
112+
unsafe_getter_read_head = first_unsafe_getter;
113+
else
114+
unsafe_getter_read_head = unsafe_getter_read_head->next;
115+
if (unsafe_getter_read_head == NULL)
116+
return false;
117+
return true;
118+
}
95119

96120
bool parse (uint32_t point);
97121

@@ -119,14 +143,27 @@ void _addReexport (const uint16_t* start, const uint16_t* end) {
119143
reexport->end = end;
120144
reexport->next = NULL;
121145
}
146+
void _addUnsafeGetter (const uint16_t* start, const uint16_t* end) {
147+
Slice* unsafe_getter = (Slice*)(analysis_head);
148+
analysis_head = analysis_head + sizeof(Slice);
149+
if (unsafe_getter_write_head == NULL)
150+
first_unsafe_getter = unsafe_getter;
151+
else
152+
unsafe_getter_write_head->next = unsafe_getter;
153+
unsafe_getter_write_head = unsafe_getter;
154+
unsafe_getter->start = start;
155+
unsafe_getter->end = end;
156+
unsafe_getter->next = NULL;
157+
}
122158
void _clearReexports () {
123159
reexport_write_head = NULL;
124160
first_reexport = NULL;
125161
}
126162
void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport;
127163
void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport;
164+
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*) = &_addUnsafeGetter;
128165
void (*clearReexports)() = &_clearReexports;
129-
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*clearReexports)());
166+
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());
130167

131168
enum RequireType {
132169
Import,

include/cjs-module-lexer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ typedef struct StarExportBinding StarExportBinding;
2727

2828
void bail (uint32_t err);
2929

30-
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*clearReexports)());
30+
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());
3131

3232
enum RequireType {
3333
Import,

lexer.js

+10-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ let openTokenDepth,
1111
starExportMap,
1212
lastStarExportSpecifier,
1313
_exports,
14+
unsafeGetters,
1415
reexports;
1516

1617
function resetState () {
@@ -27,6 +28,7 @@ function resetState () {
2728
lastStarExportSpecifier = null;
2829

2930
_exports = new Set();
31+
unsafeGetters = new Set();
3032
reexports = new Set();
3133
}
3234

@@ -47,7 +49,7 @@ function parseCJS (source, name = '@') {
4749
e.loc = pos;
4850
throw e;
4951
}
50-
const result = { exports: [..._exports], reexports: [...reexports] };
52+
const result = { exports: [..._exports].filter(expt => !unsafeGetters.has(expt)), reexports: [...reexports] };
5153
resetState();
5254
return result;
5355
}
@@ -260,6 +262,7 @@ function tryParseObjectDefineOrKeys (keys) {
260262
pos++;
261263
ch = commentWhitespace();
262264
if (ch === 100/*d*/ && source.startsWith('efineProperty', pos + 1)) {
265+
let expt;
263266
while (true) {
264267
pos += 14;
265268
revertPos = pos - 1;
@@ -276,7 +279,7 @@ function tryParseObjectDefineOrKeys (keys) {
276279
let quot = ch;
277280
const exportPos = ++pos;
278281
if (!identifier() || source.charCodeAt(pos) !== quot) break;
279-
const expt = source.slice(exportPos, pos);
282+
expt = source.slice(exportPos, pos);
280283
pos++;
281284
ch = commentWhitespace();
282285
if (ch !== 44/*,*/) break;
@@ -304,9 +307,9 @@ function tryParseObjectDefineOrKeys (keys) {
304307
pos += 5;
305308
ch = commentWhitespace();
306309
if (ch !== 58/*:*/) break;
307-
pos++;
308310
addExport(expt);
309-
break;
311+
pos = revertPos;
312+
return;
310313
}
311314
else if (ch === 103/*g*/) {
312315
if (!source.startsWith('et', pos + 1)) break;
@@ -372,6 +375,9 @@ function tryParseObjectDefineOrKeys (keys) {
372375
}
373376
break;
374377
}
378+
if (expt) {
379+
unsafeGetters.add(expt);
380+
}
375381
}
376382
else if (keys && ch === 107/*k*/ && source.startsWith('eys', pos + 1)) {
377383
while (true) {

src/lexer.c

+13-5
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,19 @@ const StarExportBinding* STAR_EXPORT_STACK_END = &starExportStack_[MAX_STAR_EXPO
4141

4242
void (*addExport)(const uint16_t*, const uint16_t*);
4343
void (*addReexport)(const uint16_t*, const uint16_t*);
44+
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*);
4445
void (*clearReexports)();
4546

4647
// Note: parsing is based on the _assumption_ that the source is already valid
47-
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
48+
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
4849
source = _source;
4950
sourceLen = _sourceLen;
5051
if (_addExport)
5152
addExport = _addExport;
5253
if (_addReexport)
5354
addReexport = _addReexport;
55+
if (_addUnsafeGetter)
56+
addUnsafeGetter = _addUnsafeGetter;
5457

5558
templateStackDepth = 0;
5659
openTokenDepth = 0;
@@ -272,6 +275,8 @@ void tryParseObjectDefineOrKeys (bool keys) {
272275
pos++;
273276
ch = commentWhitespace();
274277
if (ch == 'd' && str_eq13(pos + 1, 'e', 'f', 'i', 'n', 'e', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'y')) {
278+
uint16_t* exportStart = 0;
279+
uint16_t* exportEnd = 0;
275280
while (true) {
276281
pos += 14;
277282
revertPos = pos - 1;
@@ -286,9 +291,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
286291
ch = commentWhitespace();
287292
if (ch != '\'' && ch != '"') break;
288293
uint16_t quot = ch;
289-
uint16_t* exportStart = ++pos;
294+
exportStart = ++pos;
290295
if (!identifier(*pos) || *pos != quot) break;
291-
uint16_t* exportEnd = pos;
296+
exportEnd = pos;
292297
pos++;
293298
ch = commentWhitespace();
294299
if (ch != ',') break;
@@ -316,9 +321,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
316321
pos += 5;
317322
ch = commentWhitespace();
318323
if (ch != ':') break;
319-
pos++;
320324
addExport(exportStart, exportEnd);
321-
break;
325+
pos = revertPos;
326+
return;
322327
}
323328
else if (ch == 'g') {
324329
if (!str_eq2(pos + 1, 'e', 't')) break;
@@ -388,6 +393,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
388393
}
389394
break;
390395
}
396+
if (exportEnd > 0) {
397+
addUnsafeGetter(exportStart, exportEnd);
398+
}
391399
}
392400
else if (keys && ch == 'k' && str_eq3(pos + 1, 'e', 'y', 's')) {
393401
while (true) {

src/lexer.js

+6-3
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,18 @@ export function parse (source, name = '@') {
1818
const addr = wasm.sa(len);
1919
(isLE ? copyLE : copyBE)(source, new Uint16Array(wasm.memory.buffer, addr, len));
2020

21-
if (!wasm.parseCJS(addr, source.length, 0, 0))
21+
if (!wasm.parseCJS(addr, source.length, 0, 0, 0))
2222
throw Object.assign(new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`), { idx: wasm.e() });
2323

24-
let exports = new Set(), reexports = new Set();
24+
let exports = new Set(), reexports = new Set(), unsafeGetters = new Set();
25+
2526
while (wasm.rre())
2627
reexports.add(source.slice(wasm.res(), wasm.ree()));
28+
while (wasm.ru())
29+
unsafeGetters.add(source.slice(wasm.us(), wasm.ue()));
2730
while (wasm.re()) {
2831
let exptStr = source.slice(wasm.es(), wasm.ee());
29-
if (!strictReserved.has(exptStr))
32+
if (!strictReserved.has(exptStr) && !unsafeGetters.has(exptStr))
3033
exports.add(exptStr);
3134
}
3235

0 commit comments

Comments
 (0)