Skip to content

Commit b7a56bd

Browse files
authored
Add Unicode 13 properties
FIX: Add support for Unicode 13 properties in regular expressions.
1 parent c528c79 commit b7a56bd

File tree

3 files changed

+13
-29
lines changed

3 files changed

+13
-29
lines changed

acorn/src/regexp.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export class RegExpValidationState {
99
constructor(parser) {
1010
this.parser = parser
1111
this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}`
12-
this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 12 ? 12 : parser.options.ecmaVersion]
12+
this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 13 ? 13 : parser.options.ecmaVersion]
1313
this.source = ""
1414
this.flags = ""
1515
this.start = 0

acorn/src/unicode-property-data.js

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,35 @@ const ecma9BinaryProperties = "ASCII ASCII_Hex_Digit AHex Alphabetic Alpha Any A
99
const ecma10BinaryProperties = ecma9BinaryProperties + " Extended_Pictographic"
1010
const ecma11BinaryProperties = ecma10BinaryProperties
1111
const ecma12BinaryProperties = ecma11BinaryProperties + " EBase EComp EMod EPres ExtPict"
12+
const ecma13BinaryProperties = ecma12BinaryProperties
1213
const unicodeBinaryProperties = {
1314
9: ecma9BinaryProperties,
1415
10: ecma10BinaryProperties,
1516
11: ecma11BinaryProperties,
16-
12: ecma12BinaryProperties
17+
12: ecma12BinaryProperties,
18+
13: ecma13BinaryProperties
1719
}
1820

1921
// #table-unicode-general-category-values
2022
const unicodeGeneralCategoryValues = "Cased_Letter LC Close_Punctuation Pe Connector_Punctuation Pc Control Cc cntrl Currency_Symbol Sc Dash_Punctuation Pd Decimal_Number Nd digit Enclosing_Mark Me Final_Punctuation Pf Format Cf Initial_Punctuation Pi Letter L Letter_Number Nl Line_Separator Zl Lowercase_Letter Ll Mark M Combining_Mark Math_Symbol Sm Modifier_Letter Lm Modifier_Symbol Sk Nonspacing_Mark Mn Number N Open_Punctuation Ps Other C Other_Letter Lo Other_Number No Other_Punctuation Po Other_Symbol So Paragraph_Separator Zp Private_Use Co Punctuation P punct Separator Z Space_Separator Zs Spacing_Mark Mc Surrogate Cs Symbol S Titlecase_Letter Lt Unassigned Cn Uppercase_Letter Lu"
2123

2224
// #table-unicode-script-values
23-
const ecma9ScriptValues = "Adlam Adlm Ahom Ahom Anatolian_Hieroglyphs Hluw Arabic Arab Armenian Armn Avestan Avst Balinese Bali Bamum Bamu Bassa_Vah Bass Batak Batk Bengali Beng Bhaiksuki Bhks Bopomofo Bopo Brahmi Brah Braille Brai Buginese Bugi Buhid Buhd Canadian_Aboriginal Cans Carian Cari Caucasian_Albanian Aghb Chakma Cakm Cham Cham Cherokee Cher Common Zyyy Coptic Copt Qaac Cuneiform Xsux Cypriot Cprt Cyrillic Cyrl Deseret Dsrt Devanagari Deva Duployan Dupl Egyptian_Hieroglyphs Egyp Elbasan Elba Ethiopic Ethi Georgian Geor Glagolitic Glag Gothic Goth Grantha Gran Greek Grek Gujarati Gujr Gurmukhi Guru Han Hani Hangul Hang Hanunoo Hano Hatran Hatr Hebrew Hebr Hiragana Hira Imperial_Aramaic Armi Inherited Zinh Qaai Inscriptional_Pahlavi Phli Inscriptional_Parthian Prti Javanese Java Kaithi Kthi Kannada Knda Katakana Kana Kayah_Li Kali Kharoshthi Khar Khmer Khmr Khojki Khoj Khudawadi Sind Lao Laoo Latin Latn Lepcha Lepc Limbu Limb Linear_A Lina Linear_B Linb Lisu Lisu Lycian Lyci Lydian Lydi Mahajani Mahj Malayalam Mlym Mandaic Mand Manichaean Mani Marchen Marc Masaram_Gondi Gonm Meetei_Mayek Mtei Mende_Kikakui Mend Meroitic_Cursive Merc Meroitic_Hieroglyphs Mero Miao Plrd Modi Modi Mongolian Mong Mro Mroo Multani Mult Myanmar Mymr Nabataean Nbat New_Tai_Lue Talu Newa Newa Nko Nkoo Nushu Nshu Ogham Ogam Ol_Chiki Olck Old_Hungarian Hung Old_Italic Ital Old_North_Arabian Narb Old_Permic Perm Old_Persian Xpeo Old_South_Arabian Sarb Old_Turkic Orkh Oriya Orya Osage Osge Osmanya Osma Pahawh_Hmong Hmng Palmyrene Palm Pau_Cin_Hau Pauc Phags_Pa Phag Phoenician Phnx Psalter_Pahlavi Phlp Rejang Rjng Runic Runr Samaritan Samr Saurashtra Saur Sharada Shrd Shavian Shaw Siddham Sidd SignWriting Sgnw Sinhala Sinh Sora_Sompeng Sora Soyombo Soyo Sundanese Sund Syloti_Nagri Sylo Syriac Syrc Tagalog Tglg Tagbanwa Tagb Tai_Le Tale Tai_Tham Lana Tai_Viet Tavt Takri Takr Tamil Taml Tangut Tang Telugu Telu Thaana Thaa Thai Thai Tibetan Tibt Tifinagh Tfng Tirhuta Tirh Ugaritic Ugar Vai Vaii Warang_Citi Wara Yi Yiii Zanabazar_Square Zanb"
25+
const ecma9ScriptValues = "Adlam Adlm Ahom Anatolian_Hieroglyphs Hluw Arabic Arab Armenian Armn Avestan Avst Balinese Bali Bamum Bamu Bassa_Vah Bass Batak Batk Bengali Beng Bhaiksuki Bhks Bopomofo Bopo Brahmi Brah Braille Brai Buginese Bugi Buhid Buhd Canadian_Aboriginal Cans Carian Cari Caucasian_Albanian Aghb Chakma Cakm Cham Cham Cherokee Cher Common Zyyy Coptic Copt Qaac Cuneiform Xsux Cypriot Cprt Cyrillic Cyrl Deseret Dsrt Devanagari Deva Duployan Dupl Egyptian_Hieroglyphs Egyp Elbasan Elba Ethiopic Ethi Georgian Geor Glagolitic Glag Gothic Goth Grantha Gran Greek Grek Gujarati Gujr Gurmukhi Guru Han Hani Hangul Hang Hanunoo Hano Hatran Hatr Hebrew Hebr Hiragana Hira Imperial_Aramaic Armi Inherited Zinh Qaai Inscriptional_Pahlavi Phli Inscriptional_Parthian Prti Javanese Java Kaithi Kthi Kannada Knda Katakana Kana Kayah_Li Kali Kharoshthi Khar Khmer Khmr Khojki Khoj Khudawadi Sind Lao Laoo Latin Latn Lepcha Lepc Limbu Limb Linear_A Lina Linear_B Linb Lisu Lisu Lycian Lyci Lydian Lydi Mahajani Mahj Malayalam Mlym Mandaic Mand Manichaean Mani Marchen Marc Masaram_Gondi Gonm Meetei_Mayek Mtei Mende_Kikakui Mend Meroitic_Cursive Merc Meroitic_Hieroglyphs Mero Miao Plrd Modi Mongolian Mong Mro Mroo Multani Mult Myanmar Mymr Nabataean Nbat New_Tai_Lue Talu Newa Newa Nko Nkoo Nushu Nshu Ogham Ogam Ol_Chiki Olck Old_Hungarian Hung Old_Italic Ital Old_North_Arabian Narb Old_Permic Perm Old_Persian Xpeo Old_South_Arabian Sarb Old_Turkic Orkh Oriya Orya Osage Osge Osmanya Osma Pahawh_Hmong Hmng Palmyrene Palm Pau_Cin_Hau Pauc Phags_Pa Phag Phoenician Phnx Psalter_Pahlavi Phlp Rejang Rjng Runic Runr Samaritan Samr Saurashtra Saur Sharada Shrd Shavian Shaw Siddham Sidd SignWriting Sgnw Sinhala Sinh Sora_Sompeng Sora Soyombo Soyo Sundanese Sund Syloti_Nagri Sylo Syriac Syrc Tagalog Tglg Tagbanwa Tagb Tai_Le Tale Tai_Tham Lana Tai_Viet Tavt Takri Takr Tamil Taml Tangut Tang Telugu Telu Thaana Thaa Thai Thai Tibetan Tibt Tifinagh Tfng Tirhuta Tirh Ugaritic Ugar Vai Vaii Warang_Citi Wara Yi Yiii Zanabazar_Square Zanb"
2426
const ecma10ScriptValues = ecma9ScriptValues + " Dogra Dogr Gunjala_Gondi Gong Hanifi_Rohingya Rohg Makasar Maka Medefaidrin Medf Old_Sogdian Sogo Sogdian Sogd"
2527
const ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho"
2628
const ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi"
29+
const ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith"
2730
const unicodeScriptValues = {
2831
9: ecma9ScriptValues,
2932
10: ecma10ScriptValues,
3033
11: ecma11ScriptValues,
31-
12: ecma12ScriptValues
34+
12: ecma12ScriptValues,
35+
13: ecma13ScriptValues
3236
}
3337

3438
const data = {}
3539
function buildUnicodeData(ecmaVersion) {
36-
let d = data[ecmaVersion] = {
40+
const d = data[ecmaVersion] = {
3741
binary: wordsRegexp(unicodeBinaryProperties[ecmaVersion] + " " + unicodeGeneralCategoryValues),
3842
nonBinary: {
3943
General_Category: wordsRegexp(unicodeGeneralCategoryValues),
@@ -46,9 +50,9 @@ function buildUnicodeData(ecmaVersion) {
4650
d.nonBinary.sc = d.nonBinary.Script
4751
d.nonBinary.scx = d.nonBinary.Script_Extensions
4852
}
49-
buildUnicodeData(9)
50-
buildUnicodeData(10)
51-
buildUnicodeData(11)
52-
buildUnicodeData(12)
53+
54+
for (const ecmaVersion of [9, 10, 11, 12, 13]) {
55+
buildUnicodeData(ecmaVersion)
56+
}
5357

5458
export default data

bin/test262.whitelist

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +0,0 @@
1-
built-ins/RegExp/property-escapes/generated/Script_-_Cypro_Minoan.js (default)
2-
built-ins/RegExp/property-escapes/generated/Script_-_Cypro_Minoan.js (strict mode)
3-
built-ins/RegExp/property-escapes/generated/Script_-_Old_Uyghur.js (default)
4-
built-ins/RegExp/property-escapes/generated/Script_-_Old_Uyghur.js (strict mode)
5-
built-ins/RegExp/property-escapes/generated/Script_-_Tangsa.js (default)
6-
built-ins/RegExp/property-escapes/generated/Script_-_Tangsa.js (strict mode)
7-
built-ins/RegExp/property-escapes/generated/Script_-_Toto.js (default)
8-
built-ins/RegExp/property-escapes/generated/Script_-_Toto.js (strict mode)
9-
built-ins/RegExp/property-escapes/generated/Script_-_Vithkuqi.js (default)
10-
built-ins/RegExp/property-escapes/generated/Script_-_Vithkuqi.js (strict mode)
11-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Cypro_Minoan.js (default)
12-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Cypro_Minoan.js (strict mode)
13-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Old_Uyghur.js (default)
14-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Old_Uyghur.js (strict mode)
15-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Tangsa.js (default)
16-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Tangsa.js (strict mode)
17-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Toto.js (default)
18-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Toto.js (strict mode)
19-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Vithkuqi.js (default)
20-
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Vithkuqi.js (strict mode)

0 commit comments

Comments
 (0)