@@ -7,6 +7,7 @@ const unicodeMatchProperty = require('unicode-match-property-ecmascript');
7
7
const unicodeMatchPropertyValue = require ( 'unicode-match-property-value-ecmascript' ) ;
8
8
const iuMappings = require ( './data/iu-mappings.js' ) ;
9
9
const iBMPMappings = require ( './data/i-bmp-mappings.js' ) ;
10
+ const iuFoldings = require ( './data/iu-foldings.js' ) ;
10
11
const ESCAPE_SETS = require ( './data/character-class-escape-sets.js' ) ;
11
12
const { UNICODE_SET , UNICODE_IV_SET } = require ( './data/all-characters.js' ) ;
12
13
@@ -251,6 +252,21 @@ const getCaseEquivalents = (codePoint, flags) => {
251
252
return result . length == 0 ? false : result ;
252
253
} ;
253
254
255
+ // https://tc39.es/ecma262/#sec-maybesimplecasefolding
256
+ const maybeSimpleCaseFolding = ( codePoint , isUnicodeCaseIgnore ) => {
257
+ if ( ! isUnicodeCaseIgnore ) {
258
+ return codePoint ;
259
+ }
260
+ // Fast path for ASCII characters
261
+ if ( codePoint <= 0x7F ) {
262
+ if ( codePoint >= 0x41 && codePoint <= 0x5A ) {
263
+ return codePoint + 0x20 ;
264
+ }
265
+ return codePoint ;
266
+ }
267
+ return iuFoldings . get ( codePoint ) || codePoint ;
268
+ }
269
+
254
270
const buildHandler = ( action ) => {
255
271
switch ( action ) {
256
272
case 'union' :
@@ -396,24 +412,32 @@ const concatCaseEquivalents = (codePoint, caseEqFlags) => {
396
412
return [ codePoint ] ;
397
413
} ;
398
414
399
- const computeClassStrings = ( classStrings , regenerateOptions , caseEqFlags ) => {
415
+ const computeClassStrings = ( classStrings , regenerateOptions , caseEqFlags , isUnicodeSetIgnoreCaseMode ) => {
400
416
let data = getCharacterClassEmptyData ( ) ;
401
417
402
418
for ( const string of classStrings . strings ) {
403
419
if ( string . characters . length === 1 ) {
404
- concatCaseEquivalents ( string . characters [ 0 ] . codePoint , caseEqFlags ) . forEach ( ( cp ) => {
420
+ const codePoint = maybeSimpleCaseFolding ( string . characters [ 0 ] . codePoint , isUnicodeSetIgnoreCaseMode )
421
+ concatCaseEquivalents ( codePoint , caseEqFlags ) . forEach ( ( cp ) => {
405
422
data . singleChars . add ( cp ) ;
406
423
} ) ;
407
424
} else {
408
- let stringifiedString ;
425
+ let stringifiedString = '' ;
409
426
if ( caseEqFlags ) {
410
- stringifiedString = '' ;
411
427
for ( const ch of string . characters ) {
412
- const set = regenerate ( concatCaseEquivalents ( ch . codePoint , caseEqFlags ) ) ;
428
+ const codePoint = maybeSimpleCaseFolding ( ch . codePoint , isUnicodeSetIgnoreCaseMode )
429
+ const set = regenerate ( concatCaseEquivalents ( codePoint , caseEqFlags ) ) ;
413
430
stringifiedString += set . toString ( regenerateOptions ) ;
414
431
}
415
432
} else {
416
- stringifiedString = string . characters . map ( ch => generate ( ch ) ) . join ( '' )
433
+ for ( const ch of string . characters ) {
434
+ const codePoint = maybeSimpleCaseFolding ( ch . codePoint , isUnicodeSetIgnoreCaseMode )
435
+ if ( codePoint !== ch . codePoint ) {
436
+ stringifiedString += regenerate ( codePoint ) . toString ( regenerateOptions ) ;
437
+ } else {
438
+ stringifiedString += generate ( ch ) ;
439
+ }
440
+ }
417
441
}
418
442
419
443
data . longStrings . add ( stringifiedString ) ;
@@ -431,6 +455,7 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
431
455
let handleNegative ;
432
456
433
457
let caseEqFlags = configGetCaseEqFlags ( ) ;
458
+ const isUnicodeSetIgnoreCaseMode = config . flags . unicodeSets && config . isIgnoreCaseMode ;
434
459
435
460
switch ( characterClassItem . kind ) {
436
461
case 'union' :
@@ -440,17 +465,11 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
440
465
case 'intersection' :
441
466
handlePositive = buildHandler ( 'intersection' ) ;
442
467
handleNegative = buildHandler ( 'subtraction' ) ;
443
- if ( config . isIgnoreCaseMode ) {
444
- caseEqFlags |= CASE_EQ_FLAG_BMP | CASE_EQ_FLAG_UNICODE ;
445
- }
446
468
if ( config . transform . unicodeSetsFlag ) data . transformed = true ;
447
469
break ;
448
470
case 'subtraction' :
449
471
handlePositive = buildHandler ( 'subtraction' ) ;
450
472
handleNegative = buildHandler ( 'intersection' ) ;
451
- if ( config . isIgnoreCaseMode ) {
452
- caseEqFlags |= CASE_EQ_FLAG_BMP | CASE_EQ_FLAG_UNICODE ;
453
- }
454
473
if ( config . transform . unicodeSetsFlag ) data . transformed = true ;
455
474
break ;
456
475
// The `default` clause is only here as a safeguard; it should never be
@@ -463,7 +482,8 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
463
482
for ( const item of characterClassItem . body ) {
464
483
switch ( item . type ) {
465
484
case 'value' :
466
- const list = concatCaseEquivalents ( item . codePoint , caseEqFlags ) ;
485
+ const codePoint = maybeSimpleCaseFolding ( item . codePoint , isUnicodeSetIgnoreCaseMode ) ;
486
+ const list = concatCaseEquivalents ( codePoint , caseEqFlags ) ;
467
487
handlePositive . regSet ( data , regenerate ( list ) ) ;
468
488
if ( list . length > 1 ) {
469
489
data . transformed = true ;
@@ -504,7 +524,7 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => {
504
524
data . transformed = true ;
505
525
break ;
506
526
case 'classStrings' :
507
- handlePositive . nested ( data , computeClassStrings ( item , regenerateOptions , caseEqFlags ) ) ;
527
+ handlePositive . nested ( data , computeClassStrings ( item , regenerateOptions , caseEqFlags , isUnicodeSetIgnoreCaseMode ) ) ;
508
528
data . transformed = true ;
509
529
break ;
510
530
// The `default` clause is only here as a safeguard; it should never be
0 commit comments