Skip to content

Commit 1df54d4

Browse files
committed
polish: apply scf() to the class set operand
1 parent 376a0c0 commit 1df54d4

File tree

3 files changed

+53
-19
lines changed

3 files changed

+53
-19
lines changed

rewrite-pattern.js

+34-14
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const unicodeMatchProperty = require('unicode-match-property-ecmascript');
77
const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
88
const iuMappings = require('./data/iu-mappings.js');
99
const iBMPMappings = require('./data/i-bmp-mappings.js');
10+
const iuFoldings = require('./data/iu-foldings.js');
1011
const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
1112
const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
1213

@@ -251,6 +252,21 @@ const getCaseEquivalents = (codePoint, flags) => {
251252
return result.length == 0 ? false : result;
252253
};
253254

255+
// https://tc39.es/ecma262/#sec-maybesimplecasefolding
256+
const maybeSimpleCaseFolding = (codePoint, isUnicodeCaseIgnore) => {
257+
if (!isUnicodeCaseIgnore) {
258+
return codePoint;
259+
}
260+
// Fast path for ASCII characters
261+
if (codePoint <= 0x7F) {
262+
if (codePoint >= 0x41 && codePoint <= 0x5A) {
263+
return codePoint + 0x20;
264+
}
265+
return codePoint;
266+
}
267+
return iuFoldings.get(codePoint) || codePoint;
268+
}
269+
254270
const buildHandler = (action) => {
255271
switch (action) {
256272
case 'union':
@@ -396,24 +412,32 @@ const concatCaseEquivalents = (codePoint, caseEqFlags) => {
396412
return [codePoint];
397413
};
398414

399-
const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags) => {
415+
const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, isUnicodeSetIgnoreCaseMode) => {
400416
let data = getCharacterClassEmptyData();
401417

402418
for (const string of classStrings.strings) {
403419
if (string.characters.length === 1) {
404-
concatCaseEquivalents(string.characters[0].codePoint, caseEqFlags).forEach((cp) => {
420+
const codePoint = maybeSimpleCaseFolding(string.characters[0].codePoint, isUnicodeSetIgnoreCaseMode)
421+
concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
405422
data.singleChars.add(cp);
406423
});
407424
} else {
408-
let stringifiedString;
425+
let stringifiedString = '';
409426
if (caseEqFlags) {
410-
stringifiedString = '';
411427
for (const ch of string.characters) {
412-
const set = regenerate(concatCaseEquivalents(ch.codePoint, caseEqFlags));
428+
const codePoint = maybeSimpleCaseFolding(ch.codePoint, isUnicodeSetIgnoreCaseMode)
429+
const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
413430
stringifiedString += set.toString(regenerateOptions);
414431
}
415432
} else {
416-
stringifiedString = string.characters.map(ch => generate(ch)).join('')
433+
for (const ch of string.characters) {
434+
const codePoint = maybeSimpleCaseFolding(ch.codePoint, isUnicodeSetIgnoreCaseMode)
435+
if (codePoint !== ch.codePoint) {
436+
stringifiedString += regenerate(codePoint).toString(regenerateOptions);
437+
} else {
438+
stringifiedString += generate(ch);
439+
}
440+
}
417441
}
418442

419443
data.longStrings.add(stringifiedString);
@@ -431,6 +455,7 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
431455
let handleNegative;
432456

433457
let caseEqFlags = configGetCaseEqFlags();
458+
const isUnicodeSetIgnoreCaseMode = config.flags.unicodeSets && config.isIgnoreCaseMode;
434459

435460
switch (characterClassItem.kind) {
436461
case 'union':
@@ -440,17 +465,11 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
440465
case 'intersection':
441466
handlePositive = buildHandler('intersection');
442467
handleNegative = buildHandler('subtraction');
443-
if (config.isIgnoreCaseMode) {
444-
caseEqFlags |= CASE_EQ_FLAG_BMP | CASE_EQ_FLAG_UNICODE;
445-
}
446468
if (config.transform.unicodeSetsFlag) data.transformed = true;
447469
break;
448470
case 'subtraction':
449471
handlePositive = buildHandler('subtraction');
450472
handleNegative = buildHandler('intersection');
451-
if (config.isIgnoreCaseMode) {
452-
caseEqFlags |= CASE_EQ_FLAG_BMP | CASE_EQ_FLAG_UNICODE;
453-
}
454473
if (config.transform.unicodeSetsFlag) data.transformed = true;
455474
break;
456475
// The `default` clause is only here as a safeguard; it should never be
@@ -463,7 +482,8 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
463482
for (const item of characterClassItem.body) {
464483
switch (item.type) {
465484
case 'value':
466-
const list = concatCaseEquivalents(item.codePoint, caseEqFlags);
485+
const codePoint = maybeSimpleCaseFolding(item.codePoint, isUnicodeSetIgnoreCaseMode);
486+
const list = concatCaseEquivalents(codePoint, caseEqFlags);
467487
handlePositive.regSet(data, regenerate(list));
468488
if (list.length > 1) {
469489
data.transformed = true;
@@ -504,7 +524,7 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
504524
data.transformed = true;
505525
break;
506526
case 'classStrings':
507-
handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags));
527+
handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, isUnicodeSetIgnoreCaseMode));
508528
data.transformed = true;
509529
break;
510530
// The `default` clause is only here as a safeguard; it should never be

tests/fixtures/modifiers.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ const modifiersFixtures = [
6161
'pattern': '(?i:[\\q{ab|cd|abc}--\\q{abc}--\\q{cd}])',
6262
'flags': 'v',
6363
'options': { unicodeSetsFlag: 'transform', modifiers: false },
64-
'expected': '(?i:(?:[Aa][Bb]))',
64+
'expected': '(?i:(?:ab))',
6565
'expectedFlags': 'u',
6666
}, {
6767
'pattern': '(?i:[\\q{ab|cd|abc}--\\q{abc}--\\q{cd}])',

tests/fixtures/unicode-set.js

+18-4
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,15 @@ const unicodeSetFixtures = [
173173
{
174174
pattern: '[\\q{sA}asb]',
175175
flags: 'iv',
176-
expected: '(?:sA|[abs])'
176+
expected: '(?:sa|[abs])',
177+
expectedFlags: 'iu'
177178
},
178179
{
179180
pattern: '[\\q{sA}asb]',
180181
flags: 'iv',
181182
options: TRANSFORM_U,
182-
expected: '(?:[s\\u017F]A|[abs\\u017F])'
183+
expected: '(?:[s\\u017F]a|[abs\\u017F])',
184+
expectedFlags: 'i'
183185
},
184186
{
185187
pattern: '[[ab\\q{cd}]--a]',
@@ -406,13 +408,13 @@ const unicodeSetFixtures = [
406408
{
407409
pattern: '[K&&k]',
408410
flags: 'iv',
409-
expected: '[Kk\\u212A]',
411+
expected: 'k',
410412
expectedFlags: 'iu'
411413
},
412414
{
413415
pattern: '[K&&\\u212A]',
414416
flags: 'iv',
415-
expected: '[Kk\\u212A]',
417+
expected: 'k',
416418
expectedFlags: 'iu'
417419
},
418420
{
@@ -450,6 +452,18 @@ const unicodeSetFixtures = [
450452
flags: 'iv',
451453
expected: '[]',
452454
expectedFlags: 'iu'
455+
},
456+
{
457+
pattern: '[\\q{KK}&&\\q{kk}]',
458+
flags: 'iv',
459+
expected: '(?:kk)',
460+
expectedFlags: 'iu',
461+
},
462+
{
463+
pattern: '[\\q{KK}--\\q{k\\u212A}]',
464+
flags: 'iv',
465+
expected: '[]',
466+
expectedFlags: 'iu'
453467
}
454468
];
455469

0 commit comments

Comments
 (0)