Skip to content

Commit 132a5c1

Browse files
GeoffreyBoothmarco-ippolito
authored andcommittedFeb 11, 2025
module: eliminate performance cost of detection for cjs entry
PR-URL: #52093 Backport-PR-URL: #56927 Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Joyee Cheung <[email protected]> Reviewed-By: Jacob Smith <[email protected]> Reviewed-By: Richard Lau <[email protected]> Refs: #52697
1 parent 697a392 commit 132a5c1

File tree

6 files changed

+192
-97
lines changed

6 files changed

+192
-97
lines changed
 

‎benchmark/misc/startup-core.js

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const bench = common.createBenchmark(main, {
99
script: [
1010
'benchmark/fixtures/require-builtins',
1111
'test/fixtures/semicolon',
12+
'test/fixtures/snapshot/typescript',
1213
],
1314
mode: ['process', 'worker'],
1415
n: [30],

‎lib/internal/modules/cjs/loader.js

+10-2
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ module.exports = {
106106
kModuleExportNames,
107107
kModuleCircularVisited,
108108
initializeCJS,
109+
entryPointSource: undefined, // Set below.
109110
Module,
110111
wrapSafe,
111112
kIsMainSymbol,
@@ -1392,8 +1393,15 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
13921393
return result;
13931394
} catch (err) {
13941395
if (process.mainModule === cjsModuleInstance) {
1395-
const { enrichCJSError } = require('internal/modules/esm/translators');
1396-
enrichCJSError(err, content, filename);
1396+
if (getOptionValue('--experimental-detect-module')) {
1397+
// For the main entry point, cache the source to potentially retry as ESM.
1398+
module.exports.entryPointSource = content;
1399+
} else {
1400+
// We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're
1401+
// retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`.
1402+
const { enrichCJSError } = require('internal/modules/esm/translators');
1403+
enrichCJSError(err, content, filename);
1404+
}
13971405
}
13981406
throw err;
13991407
}

‎lib/internal/modules/helpers.js

+35
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22+
const {
23+
shouldRetryAsESM: contextifyShouldRetryAsESM,
24+
constants: {
25+
syntaxDetectionErrors: {
26+
esmSyntaxErrorMessages,
27+
throwsOnlyInCommonJSErrorMessages,
28+
},
29+
},
30+
} = internalBinding('contextify');
2231
const { validateString } = require('internal/validators');
2332
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
2433
const internalFS = require('internal/fs/utils');
@@ -329,6 +338,31 @@ function urlToFilename(url) {
329338
return url;
330339
}
331340

341+
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
342+
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
343+
/**
344+
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
345+
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
346+
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
347+
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
348+
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
349+
* @param {string} source Module contents
350+
*/
351+
function shouldRetryAsESM(errorMessage, source) {
352+
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
353+
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
354+
return true;
355+
}
356+
357+
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
358+
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
359+
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
360+
}
361+
362+
return false;
363+
}
364+
365+
332366
// Whether we have started executing any user-provided CJS code.
333367
// This is set right before we call the wrapped CJS code (not after,
334368
// in case we are half-way in the execution when internals check this).
@@ -362,6 +396,7 @@ module.exports = {
362396
loadBuiltinModule,
363397
makeRequireFunction,
364398
normalizeReferrerURL,
399+
shouldRetryAsESM,
365400
stripBOM,
366401
toRealPath,
367402
hasStartedUserCJSExecution() {

‎lib/internal/modules/run_main.js

+32-10
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ const {
44
StringPrototypeEndsWith,
55
} = primordials;
66

7-
const { containsModuleSyntax } = internalBinding('contextify');
87
const { getOptionValue } = require('internal/options');
98
const path = require('path');
109
const { pathToFileURL } = require('internal/url');
@@ -85,10 +84,6 @@ function shouldUseESMLoader(mainPath) {
8584
case 'commonjs':
8685
return false;
8786
default: { // No package.json or no `type` field.
88-
if (getOptionValue('--experimental-detect-module')) {
89-
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
90-
return containsModuleSyntax(undefined, mainPath);
91-
}
9287
return false;
9388
}
9489
}
@@ -153,12 +148,43 @@ function runEntryPointWithESMLoader(callback) {
153148
* by `require('module')`) even when the entry point is ESM.
154149
* This monkey-patchable code is bypassed under `--experimental-default-type=module`.
155150
* Because of backwards compatibility, this function is exposed publicly via `import { runMain } from 'node:module'`.
151+
* When `--experimental-detect-module` is passed, this function will attempt to run ambiguous (no explicit extension, no
152+
* `package.json` type field) entry points as CommonJS first; under certain conditions, it will retry running as ESM.
156153
* @param {string} main - First positional CLI argument, such as `'entry.js'` from `node entry.js`
157154
*/
158155
function executeUserEntryPoint(main = process.argv[1]) {
159156
const resolvedMain = resolveMainPath(main);
160157
const useESMLoader = shouldUseESMLoader(resolvedMain);
161-
if (useESMLoader) {
158+
159+
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
160+
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
161+
let retryAsESM = false;
162+
if (!useESMLoader) {
163+
const cjsLoader = require('internal/modules/cjs/loader');
164+
const { Module } = cjsLoader;
165+
if (getOptionValue('--experimental-detect-module')) {
166+
try {
167+
// Module._load is the monkey-patchable CJS module loader.
168+
Module._load(main, null, true);
169+
} catch (error) {
170+
const source = cjsLoader.entryPointSource;
171+
const { shouldRetryAsESM } = require('internal/modules/helpers');
172+
retryAsESM = shouldRetryAsESM(error.message, source);
173+
// In case the entry point is a large file, such as a bundle,
174+
// ensure no further references can prevent it being garbage-collected.
175+
cjsLoader.entryPointSource = undefined;
176+
if (!retryAsESM) {
177+
const { enrichCJSError } = require('internal/modules/esm/translators');
178+
enrichCJSError(error, source, resolvedMain);
179+
throw error;
180+
}
181+
}
182+
} else { // `--experimental-detect-module` is not passed
183+
Module._load(main, null, true);
184+
}
185+
}
186+
187+
if (useESMLoader || retryAsESM) {
162188
const mainPath = resolvedMain || main;
163189
const mainURL = pathToFileURL(mainPath).href;
164190

@@ -167,10 +193,6 @@ function executeUserEntryPoint(main = process.argv[1]) {
167193
// even after the event loop stops running.
168194
return cascadedLoader.import(mainURL, undefined, { __proto__: null }, true);
169195
});
170-
} else {
171-
// Module._load is the monkey-patchable CJS module loader.
172-
const { Module } = require('internal/modules/cjs/loader');
173-
Module._load(main, null, true);
174196
}
175197
}
176198

‎src/node_contextify.cc

+111-85
Original file line numberDiff line numberDiff line change
@@ -364,13 +364,15 @@ void ContextifyContext::CreatePerIsolateProperties(
364364
SetMethod(isolate, target, "makeContext", MakeContext);
365365
SetMethod(isolate, target, "compileFunction", CompileFunction);
366366
SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax);
367+
SetMethod(isolate, target, "shouldRetryAsESM", ShouldRetryAsESM);
367368
}
368369

369370
void ContextifyContext::RegisterExternalReferences(
370371
ExternalReferenceRegistry* registry) {
371372
registry->Register(MakeContext);
372373
registry->Register(CompileFunction);
373374
registry->Register(ContainsModuleSyntax);
375+
registry->Register(ShouldRetryAsESM);
374376
registry->Register(PropertyGetterCallback);
375377
registry->Register(PropertySetterCallback);
376378
registry->Register(PropertyDescriptorCallback);
@@ -1447,7 +1449,7 @@ Local<Object> ContextifyContext::CompileFunctionAndCacheResult(
14471449
// While top-level `await` is not permitted in CommonJS, it returns the same
14481450
// error message as when `await` is used in a sync function, so we don't use it
14491451
// as a disambiguation.
1450-
constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
1452+
static std::vector<std::string_view> esm_syntax_error_messages = {
14511453
"Cannot use import statement outside a module", // `import` statements
14521454
"Unexpected token 'export'", // `export` statements
14531455
"Cannot use 'import.meta' outside a module"}; // `import.meta` references
@@ -1462,7 +1464,7 @@ constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
14621464
// - Top-level `await`: if the user writes `await` at the top level of a
14631465
// CommonJS module, it will throw a syntax error; but the same code is valid
14641466
// in ESM.
1465-
constexpr std::array<std::string_view, 6> throws_only_in_cjs_error_messages = {
1467+
static std::vector<std::string_view> throws_only_in_cjs_error_messages = {
14661468
"Identifier 'module' has already been declared",
14671469
"Identifier 'exports' has already been declared",
14681470
"Identifier 'require' has already been declared",
@@ -1482,35 +1484,17 @@ void ContextifyContext::ContainsModuleSyntax(
14821484
env, "containsModuleSyntax needs at least 1 argument");
14831485
}
14841486

1487+
// Argument 1: source code
1488+
CHECK(args[0]->IsString());
1489+
auto code = args[0].As<String>();
1490+
14851491
// Argument 2: filename; if undefined, use empty string
14861492
Local<String> filename = String::Empty(isolate);
14871493
if (!args[1]->IsUndefined()) {
14881494
CHECK(args[1]->IsString());
14891495
filename = args[1].As<String>();
14901496
}
14911497

1492-
// Argument 1: source code; if undefined, read from filename in argument 2
1493-
Local<String> code;
1494-
if (args[0]->IsUndefined()) {
1495-
CHECK(!filename.IsEmpty());
1496-
const char* filename_str = Utf8Value(isolate, filename).out();
1497-
std::string contents;
1498-
int result = ReadFileSync(&contents, filename_str);
1499-
if (result != 0) {
1500-
isolate->ThrowException(
1501-
ERR_MODULE_NOT_FOUND(isolate, "Cannot read file %s", filename_str));
1502-
return;
1503-
}
1504-
code = String::NewFromUtf8(isolate,
1505-
contents.c_str(),
1506-
v8::NewStringType::kNormal,
1507-
contents.length())
1508-
.ToLocalChecked();
1509-
} else {
1510-
CHECK(args[0]->IsString());
1511-
code = args[0].As<String>();
1512-
}
1513-
15141498
// TODO(geoffreybooth): Centralize this rather than matching the logic in
15151499
// cjs/loader.js and translators.js
15161500
Local<String> script_id = String::Concat(
@@ -1540,73 +1524,95 @@ void ContextifyContext::ContainsModuleSyntax(
15401524

15411525
bool should_retry_as_esm = false;
15421526
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
1543-
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
1544-
auto message = message_value.ToStringView();
1527+
should_retry_as_esm =
1528+
ContextifyContext::ShouldRetryAsESMInternal(env, code);
1529+
}
1530+
args.GetReturnValue().Set(should_retry_as_esm);
1531+
}
15451532

1546-
for (const auto& error_message : esm_syntax_error_messages) {
1547-
if (message.find(error_message) != std::string_view::npos) {
1548-
should_retry_as_esm = true;
1549-
break;
1550-
}
1551-
}
1533+
void ContextifyContext::ShouldRetryAsESM(
1534+
const FunctionCallbackInfo<Value>& args) {
1535+
Environment* env = Environment::GetCurrent(args);
1536+
1537+
CHECK_EQ(args.Length(), 1); // code
1538+
1539+
// Argument 1: source code
1540+
Local<String> code;
1541+
CHECK(args[0]->IsString());
1542+
code = args[0].As<String>();
15521543

1553-
if (!should_retry_as_esm) {
1554-
for (const auto& error_message : throws_only_in_cjs_error_messages) {
1555-
if (message.find(error_message) != std::string_view::npos) {
1556-
// Try parsing again where the CommonJS wrapper is replaced by an
1557-
// async function wrapper. If the new parse succeeds, then the error
1558-
// was caused by either a top-level declaration of one of the CommonJS
1559-
// module variables, or a top-level `await`.
1560-
TryCatchScope second_parse_try_catch(env);
1561-
code =
1562-
String::Concat(isolate,
1563-
String::NewFromUtf8(isolate, "(async function() {")
1564-
.ToLocalChecked(),
1565-
code);
1566-
code = String::Concat(
1567-
isolate,
1568-
code,
1569-
String::NewFromUtf8(isolate, "})();").ToLocalChecked());
1570-
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
1571-
isolate, code, filename, 0, 0, host_defined_options, nullptr);
1572-
std::ignore = ScriptCompiler::CompileFunction(
1573-
context,
1574-
&wrapped_source,
1575-
params.size(),
1576-
params.data(),
1577-
0,
1578-
nullptr,
1579-
options,
1580-
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason);
1581-
if (!second_parse_try_catch.HasTerminated()) {
1582-
if (second_parse_try_catch.HasCaught()) {
1583-
// If on the second parse an error is thrown by ESM syntax, then
1584-
// what happened was that the user had top-level `await` or a
1585-
// top-level declaration of one of the CommonJS module variables
1586-
// above the first `import` or `export`.
1587-
Utf8Value second_message_value(
1588-
env->isolate(), second_parse_try_catch.Message()->Get());
1589-
auto second_message = second_message_value.ToStringView();
1590-
for (const auto& error_message : esm_syntax_error_messages) {
1591-
if (second_message.find(error_message) !=
1592-
std::string_view::npos) {
1593-
should_retry_as_esm = true;
1594-
break;
1595-
}
1596-
}
1597-
} else {
1598-
// No errors thrown in the second parse, so most likely the error
1599-
// was caused by a top-level `await` or a top-level declaration of
1600-
// one of the CommonJS module variables.
1601-
should_retry_as_esm = true;
1602-
}
1603-
}
1604-
break;
1544+
bool should_retry_as_esm =
1545+
ContextifyContext::ShouldRetryAsESMInternal(env, code);
1546+
1547+
args.GetReturnValue().Set(should_retry_as_esm);
1548+
}
1549+
1550+
bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env,
1551+
Local<String> code) {
1552+
Isolate* isolate = env->isolate();
1553+
1554+
Local<String> script_id =
1555+
FIXED_ONE_BYTE_STRING(isolate, "[retry_as_esm_check]");
1556+
Local<Symbol> id_symbol = Symbol::New(isolate, script_id);
1557+
1558+
Local<PrimitiveArray> host_defined_options =
1559+
GetHostDefinedOptions(isolate, id_symbol);
1560+
ScriptCompiler::Source source =
1561+
GetCommonJSSourceInstance(isolate,
1562+
code,
1563+
script_id, // filename
1564+
0, // line offset
1565+
0, // column offset
1566+
host_defined_options,
1567+
nullptr); // cached_data
1568+
1569+
TryCatchScope try_catch(env);
1570+
ShouldNotAbortOnUncaughtScope no_abort_scope(env);
1571+
1572+
// Try parsing where instead of the CommonJS wrapper we use an async function
1573+
// wrapper. If the parse succeeds, then any CommonJS parse error for this
1574+
// module was caused by either a top-level declaration of one of the CommonJS
1575+
// module variables, or a top-level `await`.
1576+
code = String::Concat(
1577+
isolate, FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), code);
1578+
code = String::Concat(isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();"));
1579+
1580+
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
1581+
isolate, code, script_id, 0, 0, host_defined_options, nullptr);
1582+
1583+
Local<Context> context = env->context();
1584+
std::vector<Local<String>> params = GetCJSParameters(env->isolate_data());
1585+
USE(ScriptCompiler::CompileFunction(
1586+
context,
1587+
&wrapped_source,
1588+
params.size(),
1589+
params.data(),
1590+
0,
1591+
nullptr,
1592+
ScriptCompiler::kNoCompileOptions,
1593+
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason));
1594+
1595+
if (!try_catch.HasTerminated()) {
1596+
if (try_catch.HasCaught()) {
1597+
// If on the second parse an error is thrown by ESM syntax, then
1598+
// what happened was that the user had top-level `await` or a
1599+
// top-level declaration of one of the CommonJS module variables
1600+
// above the first `import` or `export`.
1601+
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
1602+
auto message_view = message_value.ToStringView();
1603+
for (const auto& error_message : esm_syntax_error_messages) {
1604+
if (message_view.find(error_message) != std::string_view::npos) {
1605+
return true;
16051606
}
16061607
}
1608+
} else {
1609+
// No errors thrown in the second parse, so most likely the error
1610+
// was caused by a top-level `await` or a top-level declaration of
1611+
// one of the CommonJS module variables.
1612+
return true;
16071613
}
16081614
}
1609-
args.GetReturnValue().Set(should_retry_as_esm);
1615+
return false;
16101616
}
16111617

16121618
static void CompileFunctionForCJSLoader(
@@ -1767,6 +1773,7 @@ static void CreatePerContextProperties(Local<Object> target,
17671773
Local<Object> constants = Object::New(env->isolate());
17681774
Local<Object> measure_memory = Object::New(env->isolate());
17691775
Local<Object> memory_execution = Object::New(env->isolate());
1776+
Local<Object> syntax_detection_errors = Object::New(env->isolate());
17701777

17711778
{
17721779
Local<Object> memory_mode = Object::New(env->isolate());
@@ -1787,6 +1794,25 @@ static void CreatePerContextProperties(Local<Object> target,
17871794

17881795
READONLY_PROPERTY(constants, "measureMemory", measure_memory);
17891796

1797+
{
1798+
Local<Value> esm_syntax_error_messages_array =
1799+
ToV8Value(context, esm_syntax_error_messages).ToLocalChecked();
1800+
READONLY_PROPERTY(syntax_detection_errors,
1801+
"esmSyntaxErrorMessages",
1802+
esm_syntax_error_messages_array);
1803+
}
1804+
1805+
{
1806+
Local<Value> throws_only_in_cjs_error_messages_array =
1807+
ToV8Value(context, throws_only_in_cjs_error_messages).ToLocalChecked();
1808+
READONLY_PROPERTY(syntax_detection_errors,
1809+
"throwsOnlyInCommonJSErrorMessages",
1810+
throws_only_in_cjs_error_messages_array);
1811+
}
1812+
1813+
READONLY_PROPERTY(
1814+
constants, "syntaxDetectionErrors", syntax_detection_errors);
1815+
17901816
target->Set(context, env->constants_string(), constants).Check();
17911817
}
17921818

‎src/node_contextify.h

+3
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ class ContextifyContext : public BaseObject {
110110
const v8::ScriptCompiler::Source& source);
111111
static void ContainsModuleSyntax(
112112
const v8::FunctionCallbackInfo<v8::Value>& args);
113+
static void ShouldRetryAsESM(const v8::FunctionCallbackInfo<v8::Value>& args);
114+
static bool ShouldRetryAsESMInternal(Environment* env,
115+
v8::Local<v8::String> code);
113116
static void WeakCallback(
114117
const v8::WeakCallbackInfo<ContextifyContext>& data);
115118
static void PropertyGetterCallback(

0 commit comments

Comments
 (0)
Please sign in to comment.