diff --git a/core/string/char_range.inc b/core/string/char_range.inc index 2b081b96dec5..efae7578028f 100644 --- a/core/string/char_range.inc +++ b/core/string/char_range.inc @@ -33,14 +33,17 @@ #include "core/typedefs.h" +// Unicode Derived Core Properties +// Source: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt + struct CharRange { char32_t start; char32_t end; }; -inline constexpr CharRange xid_start[] = { +constexpr inline CharRange xid_start[] = { { 0x41, 0x5a }, - { 0x5f, 0x5f }, + { 0x5f, 0x5f }, // Underscore technically isn't in XID_Start, but for our purposes it's included. { 0x61, 0x7a }, { 0xaa, 0xaa }, { 0xb5, 0xb5 }, @@ -54,7 +57,7 @@ inline constexpr CharRange xid_start[] = { { 0x2ee, 0x2ee }, { 0x370, 0x374 }, { 0x376, 0x377 }, - { 0x37a, 0x37d }, + { 0x37b, 0x37d }, { 0x37f, 0x37f }, { 0x386, 0x386 }, { 0x388, 0x38a }, @@ -182,7 +185,7 @@ inline constexpr CharRange xid_start[] = { { 0xdbd, 0xdbd }, { 0xdc0, 0xdc6 }, { 0xe01, 0xe30 }, - { 0xe32, 0xe33 }, + { 0xe32, 0xe32 }, { 0xe40, 0xe46 }, { 0xe81, 0xe82 }, { 0xe84, 0xe84 }, @@ -190,7 +193,7 @@ inline constexpr CharRange xid_start[] = { { 0xe8c, 0xea3 }, { 0xea5, 0xea5 }, { 0xea7, 0xeb0 }, - { 0xeb2, 0xeb3 }, + { 0xeb2, 0xeb2 }, { 0xebd, 0xebd }, { 0xec0, 0xec4 }, { 0xec6, 0xec6 }, @@ -245,8 +248,7 @@ inline constexpr CharRange xid_start[] = { { 0x17d7, 0x17d7 }, { 0x17dc, 0x17dc }, { 0x1820, 0x1878 }, - { 0x1880, 0x1884 }, - { 0x1887, 0x18a8 }, + { 0x1880, 0x18a8 }, { 0x18aa, 0x18aa }, { 0x18b0, 0x18f5 }, { 0x1900, 0x191e }, @@ -265,7 +267,7 @@ inline constexpr CharRange xid_start[] = { { 0x1c00, 0x1c23 }, { 0x1c4d, 0x1c4f }, { 0x1c5a, 0x1c7d }, - { 0x1c80, 0x1c88 }, + { 0x1c80, 0x1c8a }, { 0x1c90, 0x1cba }, { 0x1cbd, 0x1cbf }, { 0x1ce9, 0x1cec }, @@ -330,7 +332,7 @@ inline constexpr CharRange xid_start[] = { { 0x3031, 0x3035 }, { 0x3038, 0x303c }, { 0x3041, 0x3096 }, - { 0x309b, 0x309f }, + { 0x309d, 0x309f }, { 0x30a1, 0x30fa }, { 0x30fc, 0x30ff }, { 0x3105, 0x312f }, @@ -348,10 +350,10 @@ inline constexpr CharRange xid_start[] = { { 0xa6a0, 0xa6ef }, { 0xa717, 0xa71f }, { 0xa722, 0xa788 }, - { 0xa78b, 0xa7ca }, + { 0xa78b, 0xa7cd }, { 0xa7d0, 0xa7d1 }, { 0xa7d3, 0xa7d3 }, - { 0xa7d5, 0xa7d9 }, + { 0xa7d5, 0xa7dc }, { 0xa7f2, 0xa801 }, { 0xa803, 0xa805 }, { 0xa807, 0xa80a }, @@ -406,15 +408,22 @@ inline constexpr CharRange xid_start[] = { { 0xfb40, 0xfb41 }, { 0xfb43, 0xfb44 }, { 0xfb46, 0xfbb1 }, - { 0xfbd3, 0xfd3d }, + { 0xfbd3, 0xfc5d }, + { 0xfc64, 0xfd3d }, { 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, - { 0xfdf0, 0xfdfb }, - { 0xfe70, 0xfe74 }, - { 0xfe76, 0xfefc }, + { 0xfdf0, 0xfdf9 }, + { 0xfe71, 0xfe71 }, + { 0xfe73, 0xfe73 }, + { 0xfe77, 0xfe77 }, + { 0xfe79, 0xfe79 }, + { 0xfe7b, 0xfe7b }, + { 0xfe7d, 0xfe7d }, + { 0xfe7f, 0xfefc }, { 0xff21, 0xff3a }, { 0xff41, 0xff5a }, - { 0xff66, 0xffbe }, + { 0xff66, 0xff9d }, + { 0xffa0, 0xffbe }, { 0xffc2, 0xffc7 }, { 0xffca, 0xffcf }, { 0xffd2, 0xffd7 }, @@ -449,6 +458,7 @@ inline constexpr CharRange xid_start[] = { { 0x105a3, 0x105b1 }, { 0x105b3, 0x105b9 }, { 0x105bb, 0x105bc }, + { 0x105c0, 0x105f3 }, { 0x10600, 0x10736 }, { 0x10740, 0x10755 }, { 0x10760, 0x10767 }, @@ -485,8 +495,11 @@ inline constexpr CharRange xid_start[] = { { 0x10c80, 0x10cb2 }, { 0x10cc0, 0x10cf2 }, { 0x10d00, 0x10d23 }, + { 0x10d4a, 0x10d65 }, + { 0x10d6f, 0x10d85 }, { 0x10e80, 0x10ea9 }, { 0x10eb0, 0x10eb1 }, + { 0x10ec2, 0x10ec4 }, { 0x10f00, 0x10f1c }, { 0x10f27, 0x10f27 }, { 0x10f30, 0x10f45 }, @@ -509,6 +522,7 @@ inline constexpr CharRange xid_start[] = { { 0x111dc, 0x111dc }, { 0x11200, 0x11211 }, { 0x11213, 0x1122b }, + { 0x1123f, 0x11240 }, { 0x11280, 0x11286 }, { 0x11288, 0x11288 }, { 0x1128a, 0x1128d }, @@ -524,6 +538,13 @@ inline constexpr CharRange xid_start[] = { { 0x1133d, 0x1133d }, { 0x11350, 0x11350 }, { 0x1135d, 0x11361 }, + { 0x11380, 0x11389 }, + { 0x1138b, 0x1138b }, + { 0x1138e, 0x1138e }, + { 0x11390, 0x113b5 }, + { 0x113b7, 0x113b7 }, + { 0x113d1, 0x113d1 }, + { 0x113d3, 0x113d3 }, { 0x11400, 0x11434 }, { 0x11447, 0x1144a }, { 0x1145f, 0x11461 }, @@ -558,6 +579,7 @@ inline constexpr CharRange xid_start[] = { { 0x11a5c, 0x11a89 }, { 0x11a9d, 0x11a9d }, { 0x11ab0, 0x11af8 }, + { 0x11bc0, 0x11be0 }, { 0x11c00, 0x11c08 }, { 0x11c0a, 0x11c2e }, { 0x11c40, 0x11c40 }, @@ -571,13 +593,19 @@ inline constexpr CharRange xid_start[] = { { 0x11d6a, 0x11d89 }, { 0x11d98, 0x11d98 }, { 0x11ee0, 0x11ef2 }, + { 0x11f02, 0x11f02 }, + { 0x11f04, 0x11f10 }, + { 0x11f12, 0x11f33 }, { 0x11fb0, 0x11fb0 }, { 0x12000, 0x12399 }, { 0x12400, 0x1246e }, { 0x12480, 0x12543 }, { 0x12f90, 0x12ff0 }, - { 0x13000, 0x1342e }, + { 0x13000, 0x1342f }, + { 0x13441, 0x13446 }, + { 0x13460, 0x143fa }, { 0x14400, 0x14646 }, + { 0x16100, 0x1611d }, { 0x16800, 0x16a38 }, { 0x16a40, 0x16a5e }, { 0x16a70, 0x16abe }, @@ -586,6 +614,7 @@ inline constexpr CharRange xid_start[] = { { 0x16b40, 0x16b43 }, { 0x16b63, 0x16b77 }, { 0x16b7d, 0x16b8f }, + { 0x16d40, 0x16d6c }, { 0x16e40, 0x16e7f }, { 0x16f00, 0x16f4a }, { 0x16f50, 0x16f50 }, @@ -594,12 +623,14 @@ inline constexpr CharRange xid_start[] = { { 0x16fe3, 0x16fe3 }, { 0x17000, 0x187f7 }, { 0x18800, 0x18cd5 }, - { 0x18d00, 0x18d08 }, + { 0x18cff, 0x18d08 }, { 0x1aff0, 0x1aff3 }, { 0x1aff5, 0x1affb }, { 0x1affd, 0x1affe }, { 0x1b000, 0x1b122 }, + { 0x1b132, 0x1b132 }, { 0x1b150, 0x1b152 }, + { 0x1b155, 0x1b155 }, { 0x1b164, 0x1b167 }, { 0x1b170, 0x1b2fb }, { 0x1bc00, 0x1bc6a }, @@ -637,11 +668,16 @@ inline constexpr CharRange xid_start[] = { { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7cb }, { 0x1df00, 0x1df1e }, + { 0x1df25, 0x1df2a }, + { 0x1e030, 0x1e06d }, { 0x1e100, 0x1e12c }, { 0x1e137, 0x1e13d }, { 0x1e14e, 0x1e14e }, { 0x1e290, 0x1e2ad }, { 0x1e2c0, 0x1e2eb }, + { 0x1e4d0, 0x1e4eb }, + { 0x1e5d0, 0x1e5ed }, + { 0x1e5f0, 0x1e5f0 }, { 0x1e7e0, 0x1e7e6 }, { 0x1e7e8, 0x1e7eb }, { 0x1e7ed, 0x1e7ee }, @@ -683,15 +719,17 @@ inline constexpr CharRange xid_start[] = { { 0x1eea5, 0x1eea9 }, { 0x1eeab, 0x1eebb }, { 0x20000, 0x2a6df }, - { 0x2a700, 0x2b738 }, + { 0x2a700, 0x2b739 }, { 0x2b740, 0x2b81d }, { 0x2b820, 0x2cea1 }, { 0x2ceb0, 0x2ebe0 }, + { 0x2ebf0, 0x2ee5d }, { 0x2f800, 0x2fa1d }, { 0x30000, 0x3134a }, + { 0x31350, 0x323af }, }; -inline constexpr CharRange xid_continue[] = { +constexpr inline CharRange xid_continue[] = { { 0x30, 0x39 }, { 0x41, 0x5a }, { 0x5f, 0x5f }, @@ -709,7 +747,7 @@ inline constexpr CharRange xid_continue[] = { { 0x2ee, 0x2ee }, { 0x300, 0x374 }, { 0x376, 0x377 }, - { 0x37a, 0x37d }, + { 0x37b, 0x37d }, { 0x37f, 0x37f }, { 0x386, 0x38a }, { 0x38c, 0x38c }, @@ -745,7 +783,7 @@ inline constexpr CharRange xid_continue[] = { { 0x860, 0x86a }, { 0x870, 0x887 }, { 0x889, 0x88e }, - { 0x898, 0x8e1 }, + { 0x897, 0x8e1 }, { 0x8e3, 0x963 }, { 0x966, 0x96f }, { 0x971, 0x983 }, @@ -850,7 +888,7 @@ inline constexpr CharRange xid_continue[] = { { 0xcdd, 0xcde }, { 0xce0, 0xce3 }, { 0xce6, 0xcef }, - { 0xcf1, 0xcf2 }, + { 0xcf1, 0xcf3 }, { 0xd00, 0xd0c }, { 0xd0e, 0xd10 }, { 0xd12, 0xd44 }, @@ -883,7 +921,7 @@ inline constexpr CharRange xid_continue[] = { { 0xea7, 0xebd }, { 0xec0, 0xec4 }, { 0xec6, 0xec6 }, - { 0xec8, 0xecd }, + { 0xec8, 0xece }, { 0xed0, 0xed9 }, { 0xedc, 0xedf }, { 0xf00, 0xf00 }, @@ -921,8 +959,7 @@ inline constexpr CharRange xid_continue[] = { { 0x1312, 0x1315 }, { 0x1318, 0x135a }, { 0x135d, 0x135f }, - { 0x1369, 0x1369 }, - { 0x1371, 0x1371 }, + { 0x1369, 0x1371 }, { 0x1380, 0x138f }, { 0x13a0, 0x13f5 }, { 0x13f8, 0x13fd }, @@ -969,7 +1006,7 @@ inline constexpr CharRange xid_continue[] = { { 0x1c00, 0x1c37 }, { 0x1c40, 0x1c49 }, { 0x1c4d, 0x1c7d }, - { 0x1c80, 0x1c88 }, + { 0x1c80, 0x1c8a }, { 0x1c90, 0x1cba }, { 0x1cbd, 0x1cbf }, { 0x1cd0, 0x1cd2 }, @@ -993,6 +1030,7 @@ inline constexpr CharRange xid_continue[] = { { 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc }, + { 0x200c, 0x200d }, { 0x203f, 0x2040 }, { 0x2054, 0x2054 }, { 0x2071, 0x2071 }, @@ -1036,9 +1074,9 @@ inline constexpr CharRange xid_continue[] = { { 0x3031, 0x3035 }, { 0x3038, 0x303c }, { 0x3041, 0x3096 }, - { 0x3099, 0x309f }, - { 0x30a1, 0x30fa }, - { 0x30fc, 0x30ff }, + { 0x3099, 0x309a }, + { 0x309d, 0x309f }, + { 0x30a1, 0x30ff }, { 0x3105, 0x312f }, { 0x3131, 0x318e }, { 0x31a0, 0x31bf }, @@ -1053,10 +1091,10 @@ inline constexpr CharRange xid_continue[] = { { 0xa67f, 0xa6f1 }, { 0xa717, 0xa71f }, { 0xa722, 0xa788 }, - { 0xa78b, 0xa7ca }, + { 0xa78b, 0xa7cd }, { 0xa7d0, 0xa7d1 }, { 0xa7d3, 0xa7d3 }, - { 0xa7d5, 0xa7d9 }, + { 0xa7d5, 0xa7dc }, { 0xa7f2, 0xa827 }, { 0xa82c, 0xa82c }, { 0xa840, 0xa873 }, @@ -1102,21 +1140,27 @@ inline constexpr CharRange xid_continue[] = { { 0xfb40, 0xfb41 }, { 0xfb43, 0xfb44 }, { 0xfb46, 0xfbb1 }, - { 0xfbd3, 0xfd3d }, + { 0xfbd3, 0xfc5d }, + { 0xfc64, 0xfd3d }, { 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, - { 0xfdf0, 0xfdfb }, + { 0xfdf0, 0xfdf9 }, { 0xfe00, 0xfe0f }, { 0xfe20, 0xfe2f }, { 0xfe33, 0xfe34 }, { 0xfe4d, 0xfe4f }, - { 0xfe70, 0xfe74 }, - { 0xfe76, 0xfefc }, + { 0xfe71, 0xfe71 }, + { 0xfe73, 0xfe73 }, + { 0xfe77, 0xfe77 }, + { 0xfe79, 0xfe79 }, + { 0xfe7b, 0xfe7b }, + { 0xfe7d, 0xfe7d }, + { 0xfe7f, 0xfefc }, { 0xff10, 0xff19 }, { 0xff21, 0xff3a }, { 0xff3f, 0xff3f }, { 0xff41, 0xff5a }, - { 0xff66, 0xffbe }, + { 0xff65, 0xffbe }, { 0xffc2, 0xffc7 }, { 0xffca, 0xffcf }, { 0xffd2, 0xffd7 }, @@ -1154,6 +1198,7 @@ inline constexpr CharRange xid_continue[] = { { 0x105a3, 0x105b1 }, { 0x105b3, 0x105b9 }, { 0x105bb, 0x105bc }, + { 0x105c0, 0x105f3 }, { 0x10600, 0x10736 }, { 0x10740, 0x10755 }, { 0x10760, 0x10767 }, @@ -1194,10 +1239,14 @@ inline constexpr CharRange xid_continue[] = { { 0x10cc0, 0x10cf2 }, { 0x10d00, 0x10d27 }, { 0x10d30, 0x10d39 }, + { 0x10d40, 0x10d65 }, + { 0x10d69, 0x10d6d }, + { 0x10d6f, 0x10d85 }, { 0x10e80, 0x10ea9 }, { 0x10eab, 0x10eac }, { 0x10eb0, 0x10eb1 }, - { 0x10f00, 0x10f1c }, + { 0x10ec2, 0x10ec4 }, + { 0x10efc, 0x10f1c }, { 0x10f27, 0x10f27 }, { 0x10f30, 0x10f50 }, { 0x10f70, 0x10f85 }, @@ -1220,7 +1269,7 @@ inline constexpr CharRange xid_continue[] = { { 0x111dc, 0x111dc }, { 0x11200, 0x11211 }, { 0x11213, 0x11237 }, - { 0x1123e, 0x1123e }, + { 0x1123e, 0x11241 }, { 0x11280, 0x11286 }, { 0x11288, 0x11288 }, { 0x1128a, 0x1128d }, @@ -1243,6 +1292,16 @@ inline constexpr CharRange xid_continue[] = { { 0x1135d, 0x11363 }, { 0x11366, 0x1136c }, { 0x11370, 0x11374 }, + { 0x11380, 0x11389 }, + { 0x1138b, 0x1138b }, + { 0x1138e, 0x1138e }, + { 0x11390, 0x113b5 }, + { 0x113b7, 0x113c0 }, + { 0x113c2, 0x113c2 }, + { 0x113c5, 0x113c5 }, + { 0x113c7, 0x113ca }, + { 0x113cc, 0x113d3 }, + { 0x113e1, 0x113e2 }, { 0x11400, 0x1144a }, { 0x11450, 0x11459 }, { 0x1145e, 0x11461 }, @@ -1257,6 +1316,7 @@ inline constexpr CharRange xid_continue[] = { { 0x11650, 0x11659 }, { 0x11680, 0x116b8 }, { 0x116c0, 0x116c9 }, + { 0x116d0, 0x116e3 }, { 0x11700, 0x1171a }, { 0x1171d, 0x1172b }, { 0x11730, 0x11739 }, @@ -1280,6 +1340,8 @@ inline constexpr CharRange xid_continue[] = { { 0x11a50, 0x11a99 }, { 0x11a9d, 0x11a9d }, { 0x11ab0, 0x11af8 }, + { 0x11bc0, 0x11be0 }, + { 0x11bf0, 0x11bf9 }, { 0x11c00, 0x11c08 }, { 0x11c0a, 0x11c36 }, { 0x11c38, 0x11c40 }, @@ -1301,13 +1363,20 @@ inline constexpr CharRange xid_continue[] = { { 0x11d93, 0x11d98 }, { 0x11da0, 0x11da9 }, { 0x11ee0, 0x11ef6 }, + { 0x11f00, 0x11f10 }, + { 0x11f12, 0x11f3a }, + { 0x11f3e, 0x11f42 }, + { 0x11f50, 0x11f5a }, { 0x11fb0, 0x11fb0 }, { 0x12000, 0x12399 }, { 0x12400, 0x1246e }, { 0x12480, 0x12543 }, { 0x12f90, 0x12ff0 }, - { 0x13000, 0x1342e }, + { 0x13000, 0x1342f }, + { 0x13440, 0x13455 }, + { 0x13460, 0x143fa }, { 0x14400, 0x14646 }, + { 0x16100, 0x16139 }, { 0x16800, 0x16a38 }, { 0x16a40, 0x16a5e }, { 0x16a60, 0x16a69 }, @@ -1320,6 +1389,8 @@ inline constexpr CharRange xid_continue[] = { { 0x16b50, 0x16b59 }, { 0x16b63, 0x16b77 }, { 0x16b7d, 0x16b8f }, + { 0x16d40, 0x16d6c }, + { 0x16d70, 0x16d79 }, { 0x16e40, 0x16e7f }, { 0x16f00, 0x16f4a }, { 0x16f4f, 0x16f87 }, @@ -1329,12 +1400,14 @@ inline constexpr CharRange xid_continue[] = { { 0x16ff0, 0x16ff1 }, { 0x17000, 0x187f7 }, { 0x18800, 0x18cd5 }, - { 0x18d00, 0x18d08 }, + { 0x18cff, 0x18d08 }, { 0x1aff0, 0x1aff3 }, { 0x1aff5, 0x1affb }, { 0x1affd, 0x1affe }, { 0x1b000, 0x1b122 }, + { 0x1b132, 0x1b132 }, { 0x1b150, 0x1b152 }, + { 0x1b155, 0x1b155 }, { 0x1b164, 0x1b167 }, { 0x1b170, 0x1b2fb }, { 0x1bc00, 0x1bc6a }, @@ -1342,6 +1415,7 @@ inline constexpr CharRange xid_continue[] = { { 0x1bc80, 0x1bc88 }, { 0x1bc90, 0x1bc99 }, { 0x1bc9d, 0x1bc9e }, + { 0x1ccf0, 0x1ccf9 }, { 0x1cf00, 0x1cf2d }, { 0x1cf30, 0x1cf46 }, { 0x1d165, 0x1d169 }, @@ -1388,17 +1462,22 @@ inline constexpr CharRange xid_continue[] = { { 0x1da9b, 0x1da9f }, { 0x1daa1, 0x1daaf }, { 0x1df00, 0x1df1e }, + { 0x1df25, 0x1df2a }, { 0x1e000, 0x1e006 }, { 0x1e008, 0x1e018 }, { 0x1e01b, 0x1e021 }, { 0x1e023, 0x1e024 }, { 0x1e026, 0x1e02a }, + { 0x1e030, 0x1e06d }, + { 0x1e08f, 0x1e08f }, { 0x1e100, 0x1e12c }, { 0x1e130, 0x1e13d }, { 0x1e140, 0x1e149 }, { 0x1e14e, 0x1e14e }, { 0x1e290, 0x1e2ae }, { 0x1e2c0, 0x1e2f9 }, + { 0x1e4d0, 0x1e4f9 }, + { 0x1e5d0, 0x1e5fa }, { 0x1e7e0, 0x1e7e6 }, { 0x1e7e8, 0x1e7eb }, { 0x1e7ed, 0x1e7ee }, @@ -1442,16 +1521,18 @@ inline constexpr CharRange xid_continue[] = { { 0x1eeab, 0x1eebb }, { 0x1fbf0, 0x1fbf9 }, { 0x20000, 0x2a6df }, - { 0x2a700, 0x2b738 }, + { 0x2a700, 0x2b739 }, { 0x2b740, 0x2b81d }, { 0x2b820, 0x2cea1 }, { 0x2ceb0, 0x2ebe0 }, + { 0x2ebf0, 0x2ee5d }, { 0x2f800, 0x2fa1d }, { 0x30000, 0x3134a }, + { 0x31350, 0x323af }, { 0xe0100, 0xe01ef }, }; -inline constexpr CharRange uppercase_letter[] = { +constexpr inline CharRange uppercase_letter[] = { { 0x41, 0x5a }, { 0xc0, 0xd6 }, { 0xd8, 0xde }, @@ -1728,6 +1809,7 @@ inline constexpr CharRange uppercase_letter[] = { { 0x10c7, 0x10c7 }, { 0x10cd, 0x10cd }, { 0x13a0, 0x13f5 }, + { 0x1c89, 0x1c89 }, { 0x1c90, 0x1cba }, { 0x1cbd, 0x1cbf }, { 0x1e00, 0x1e00 }, @@ -1882,7 +1964,9 @@ inline constexpr CharRange uppercase_letter[] = { { 0x2130, 0x2133 }, { 0x213e, 0x213f }, { 0x2145, 0x2145 }, + { 0x2160, 0x216f }, { 0x2183, 0x2183 }, + { 0x24b6, 0x24cf }, { 0x2c00, 0x2c2f }, { 0x2c60, 0x2c60 }, { 0x2c62, 0x2c64 }, @@ -2052,9 +2136,12 @@ inline constexpr CharRange uppercase_letter[] = { { 0xa7c2, 0xa7c2 }, { 0xa7c4, 0xa7c7 }, { 0xa7c9, 0xa7c9 }, + { 0xa7cb, 0xa7cc }, { 0xa7d0, 0xa7d0 }, { 0xa7d6, 0xa7d6 }, { 0xa7d8, 0xa7d8 }, + { 0xa7da, 0xa7da }, + { 0xa7dc, 0xa7dc }, { 0xa7f5, 0xa7f5 }, { 0xff21, 0xff3a }, { 0x10400, 0x10427 }, @@ -2064,6 +2151,7 @@ inline constexpr CharRange uppercase_letter[] = { { 0x1058c, 0x10592 }, { 0x10594, 0x10595 }, { 0x10c80, 0x10cb2 }, + { 0x10d50, 0x10d65 }, { 0x118a0, 0x118bf }, { 0x16e40, 0x16e5f }, { 0x1d400, 0x1d419 }, @@ -2098,11 +2186,16 @@ inline constexpr CharRange uppercase_letter[] = { { 0x1d790, 0x1d7a8 }, { 0x1d7ca, 0x1d7ca }, { 0x1e900, 0x1e921 }, + { 0x1f130, 0x1f149 }, + { 0x1f150, 0x1f169 }, + { 0x1f170, 0x1f189 }, }; -inline constexpr CharRange lowercase_letter[] = { +constexpr inline CharRange lowercase_letter[] = { { 0x61, 0x7a }, + { 0xaa, 0xaa }, { 0xb5, 0xb5 }, + { 0xba, 0xba }, { 0xdf, 0xf6 }, { 0xf8, 0xff }, { 0x101, 0x101 }, @@ -2246,11 +2339,14 @@ inline constexpr CharRange lowercase_letter[] = { { 0x24b, 0x24b }, { 0x24d, 0x24d }, { 0x24f, 0x293 }, - { 0x295, 0x2af }, + { 0x295, 0x2b8 }, + { 0x2c0, 0x2c1 }, + { 0x2e0, 0x2e4 }, + { 0x345, 0x345 }, { 0x371, 0x371 }, { 0x373, 0x373 }, { 0x377, 0x377 }, - { 0x37b, 0x37d }, + { 0x37a, 0x37d }, { 0x390, 0x390 }, { 0x3ac, 0x3ce }, { 0x3d0, 0x3d1 }, @@ -2372,12 +2468,11 @@ inline constexpr CharRange lowercase_letter[] = { { 0x52f, 0x52f }, { 0x560, 0x588 }, { 0x10d0, 0x10fa }, - { 0x10fd, 0x10ff }, + { 0x10fc, 0x10ff }, { 0x13f8, 0x13fd }, { 0x1c80, 0x1c88 }, - { 0x1d00, 0x1d2b }, - { 0x1d6b, 0x1d77 }, - { 0x1d79, 0x1d9a }, + { 0x1c8a, 0x1c8a }, + { 0x1d00, 0x1dbf }, { 0x1e01, 0x1e01 }, { 0x1e03, 0x1e03 }, { 0x1e05, 0x1e05 }, @@ -2522,6 +2617,9 @@ inline constexpr CharRange lowercase_letter[] = { { 0x1fe0, 0x1fe7 }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ff7 }, + { 0x2071, 0x2071 }, + { 0x207f, 0x207f }, + { 0x2090, 0x209c }, { 0x210a, 0x210a }, { 0x210e, 0x210f }, { 0x2113, 0x2113 }, @@ -2531,7 +2629,9 @@ inline constexpr CharRange lowercase_letter[] = { { 0x213c, 0x213d }, { 0x2146, 0x2149 }, { 0x214e, 0x214e }, + { 0x2170, 0x217f }, { 0x2184, 0x2184 }, + { 0x24d0, 0x24e9 }, { 0x2c30, 0x2c5f }, { 0x2c61, 0x2c61 }, { 0x2c65, 0x2c66 }, @@ -2540,7 +2640,7 @@ inline constexpr CharRange lowercase_letter[] = { { 0x2c6c, 0x2c6c }, { 0x2c71, 0x2c71 }, { 0x2c73, 0x2c74 }, - { 0x2c76, 0x2c7b }, + { 0x2c76, 0x2c7d }, { 0x2c81, 0x2c81 }, { 0x2c83, 0x2c83 }, { 0x2c85, 0x2c85 }, @@ -2633,7 +2733,7 @@ inline constexpr CharRange lowercase_letter[] = { { 0xa695, 0xa695 }, { 0xa697, 0xa697 }, { 0xa699, 0xa699 }, - { 0xa69b, 0xa69b }, + { 0xa69b, 0xa69d }, { 0xa723, 0xa723 }, { 0xa725, 0xa725 }, { 0xa727, 0xa727 }, @@ -2671,8 +2771,7 @@ inline constexpr CharRange lowercase_letter[] = { { 0xa769, 0xa769 }, { 0xa76b, 0xa76b }, { 0xa76d, 0xa76d }, - { 0xa76f, 0xa76f }, - { 0xa771, 0xa778 }, + { 0xa76f, 0xa778 }, { 0xa77a, 0xa77a }, { 0xa77c, 0xa77c }, { 0xa77f, 0xa77f }, @@ -2705,15 +2804,18 @@ inline constexpr CharRange lowercase_letter[] = { { 0xa7c3, 0xa7c3 }, { 0xa7c8, 0xa7c8 }, { 0xa7ca, 0xa7ca }, + { 0xa7cd, 0xa7cd }, { 0xa7d1, 0xa7d1 }, { 0xa7d3, 0xa7d3 }, { 0xa7d5, 0xa7d5 }, { 0xa7d7, 0xa7d7 }, { 0xa7d9, 0xa7d9 }, + { 0xa7db, 0xa7db }, + { 0xa7f2, 0xa7f4 }, { 0xa7f6, 0xa7f6 }, - { 0xa7fa, 0xa7fa }, + { 0xa7f8, 0xa7fa }, { 0xab30, 0xab5a }, - { 0xab60, 0xab68 }, + { 0xab5c, 0xab69 }, { 0xab70, 0xabbf }, { 0xfb00, 0xfb06 }, { 0xfb13, 0xfb17 }, @@ -2724,7 +2826,12 @@ inline constexpr CharRange lowercase_letter[] = { { 0x105a3, 0x105b1 }, { 0x105b3, 0x105b9 }, { 0x105bb, 0x105bc }, + { 0x10780, 0x10780 }, + { 0x10783, 0x10785 }, + { 0x10787, 0x107b0 }, + { 0x107b2, 0x107ba }, { 0x10cc0, 0x10cf2 }, + { 0x10d70, 0x10d85 }, { 0x118c0, 0x118df }, { 0x16e60, 0x16e7f }, { 0x1d41a, 0x1d433 }, @@ -2758,10 +2865,11 @@ inline constexpr CharRange lowercase_letter[] = { { 0x1df00, 0x1df09 }, { 0x1df0b, 0x1df1e }, { 0x1df25, 0x1df2a }, + { 0x1e030, 0x1e06d }, { 0x1e922, 0x1e943 }, }; -inline constexpr CharRange unicode_letter[] = { +constexpr inline CharRange unicode_letter[] = { { 0x41, 0x5a }, { 0x61, 0x7a }, { 0xaa, 0xaa }, @@ -2774,7 +2882,8 @@ inline constexpr CharRange unicode_letter[] = { { 0x2e0, 0x2e4 }, { 0x2ec, 0x2ec }, { 0x2ee, 0x2ee }, - { 0x370, 0x374 }, + { 0x345, 0x345 }, + { 0x363, 0x374 }, { 0x376, 0x377 }, { 0x37a, 0x37d }, { 0x37f, 0x37f }, @@ -2788,49 +2897,58 @@ inline constexpr CharRange unicode_letter[] = { { 0x531, 0x556 }, { 0x559, 0x559 }, { 0x560, 0x588 }, + { 0x5b0, 0x5bd }, + { 0x5bf, 0x5bf }, + { 0x5c1, 0x5c2 }, + { 0x5c4, 0x5c5 }, + { 0x5c7, 0x5c7 }, { 0x5d0, 0x5ea }, { 0x5ef, 0x5f2 }, - { 0x620, 0x64a }, - { 0x66e, 0x66f }, - { 0x671, 0x6d3 }, - { 0x6d5, 0x6d5 }, - { 0x6e5, 0x6e6 }, - { 0x6ee, 0x6ef }, + { 0x610, 0x61a }, + { 0x620, 0x657 }, + { 0x659, 0x65f }, + { 0x66e, 0x6d3 }, + { 0x6d5, 0x6dc }, + { 0x6e1, 0x6e8 }, + { 0x6ed, 0x6ef }, { 0x6fa, 0x6fc }, { 0x6ff, 0x6ff }, - { 0x710, 0x710 }, - { 0x712, 0x72f }, - { 0x74d, 0x7a5 }, - { 0x7b1, 0x7b1 }, + { 0x710, 0x73f }, + { 0x74d, 0x7b1 }, { 0x7ca, 0x7ea }, { 0x7f4, 0x7f5 }, { 0x7fa, 0x7fa }, - { 0x800, 0x815 }, - { 0x81a, 0x81a }, - { 0x824, 0x824 }, - { 0x828, 0x828 }, + { 0x800, 0x817 }, + { 0x81a, 0x82c }, { 0x840, 0x858 }, { 0x860, 0x86a }, { 0x870, 0x887 }, { 0x889, 0x88e }, + { 0x897, 0x897 }, { 0x8a0, 0x8c9 }, - { 0x904, 0x939 }, - { 0x93d, 0x93d }, - { 0x950, 0x950 }, - { 0x958, 0x961 }, - { 0x971, 0x980 }, + { 0x8d4, 0x8df }, + { 0x8e3, 0x8e9 }, + { 0x8f0, 0x93b }, + { 0x93d, 0x94c }, + { 0x94e, 0x950 }, + { 0x955, 0x963 }, + { 0x971, 0x983 }, { 0x985, 0x98c }, { 0x98f, 0x990 }, { 0x993, 0x9a8 }, { 0x9aa, 0x9b0 }, { 0x9b2, 0x9b2 }, { 0x9b6, 0x9b9 }, - { 0x9bd, 0x9bd }, + { 0x9bd, 0x9c4 }, + { 0x9c7, 0x9c8 }, + { 0x9cb, 0x9cc }, { 0x9ce, 0x9ce }, + { 0x9d7, 0x9d7 }, { 0x9dc, 0x9dd }, - { 0x9df, 0x9e1 }, + { 0x9df, 0x9e3 }, { 0x9f0, 0x9f1 }, { 0x9fc, 0x9fc }, + { 0xa01, 0xa03 }, { 0xa05, 0xa0a }, { 0xa0f, 0xa10 }, { 0xa13, 0xa28 }, @@ -2838,30 +2956,41 @@ inline constexpr CharRange unicode_letter[] = { { 0xa32, 0xa33 }, { 0xa35, 0xa36 }, { 0xa38, 0xa39 }, + { 0xa3e, 0xa42 }, + { 0xa47, 0xa48 }, + { 0xa4b, 0xa4c }, + { 0xa51, 0xa51 }, { 0xa59, 0xa5c }, { 0xa5e, 0xa5e }, - { 0xa72, 0xa74 }, + { 0xa70, 0xa75 }, + { 0xa81, 0xa83 }, { 0xa85, 0xa8d }, { 0xa8f, 0xa91 }, { 0xa93, 0xaa8 }, { 0xaaa, 0xab0 }, { 0xab2, 0xab3 }, { 0xab5, 0xab9 }, - { 0xabd, 0xabd }, + { 0xabd, 0xac5 }, + { 0xac7, 0xac9 }, + { 0xacb, 0xacc }, { 0xad0, 0xad0 }, - { 0xae0, 0xae1 }, - { 0xaf9, 0xaf9 }, + { 0xae0, 0xae3 }, + { 0xaf9, 0xafc }, + { 0xb01, 0xb03 }, { 0xb05, 0xb0c }, { 0xb0f, 0xb10 }, { 0xb13, 0xb28 }, { 0xb2a, 0xb30 }, { 0xb32, 0xb33 }, { 0xb35, 0xb39 }, - { 0xb3d, 0xb3d }, + { 0xb3d, 0xb44 }, + { 0xb47, 0xb48 }, + { 0xb4b, 0xb4c }, + { 0xb56, 0xb57 }, { 0xb5c, 0xb5d }, - { 0xb5f, 0xb61 }, + { 0xb5f, 0xb63 }, { 0xb71, 0xb71 }, - { 0xb83, 0xb83 }, + { 0xb82, 0xb83 }, { 0xb85, 0xb8a }, { 0xb8e, 0xb90 }, { 0xb92, 0xb95 }, @@ -2871,65 +3000,80 @@ inline constexpr CharRange unicode_letter[] = { { 0xba3, 0xba4 }, { 0xba8, 0xbaa }, { 0xbae, 0xbb9 }, + { 0xbbe, 0xbc2 }, + { 0xbc6, 0xbc8 }, + { 0xbca, 0xbcc }, { 0xbd0, 0xbd0 }, - { 0xc05, 0xc0c }, + { 0xbd7, 0xbd7 }, + { 0xc00, 0xc0c }, { 0xc0e, 0xc10 }, { 0xc12, 0xc28 }, { 0xc2a, 0xc39 }, - { 0xc3d, 0xc3d }, + { 0xc3d, 0xc44 }, + { 0xc46, 0xc48 }, + { 0xc4a, 0xc4c }, + { 0xc55, 0xc56 }, { 0xc58, 0xc5a }, { 0xc5d, 0xc5d }, - { 0xc60, 0xc61 }, - { 0xc80, 0xc80 }, + { 0xc60, 0xc63 }, + { 0xc80, 0xc83 }, { 0xc85, 0xc8c }, { 0xc8e, 0xc90 }, { 0xc92, 0xca8 }, { 0xcaa, 0xcb3 }, { 0xcb5, 0xcb9 }, - { 0xcbd, 0xcbd }, + { 0xcbd, 0xcc4 }, + { 0xcc6, 0xcc8 }, + { 0xcca, 0xccc }, + { 0xcd5, 0xcd6 }, { 0xcdd, 0xcde }, - { 0xce0, 0xce1 }, - { 0xcf1, 0xcf2 }, - { 0xd04, 0xd0c }, + { 0xce0, 0xce3 }, + { 0xcf1, 0xcf3 }, + { 0xd00, 0xd0c }, { 0xd0e, 0xd10 }, { 0xd12, 0xd3a }, - { 0xd3d, 0xd3d }, + { 0xd3d, 0xd44 }, + { 0xd46, 0xd48 }, + { 0xd4a, 0xd4c }, { 0xd4e, 0xd4e }, - { 0xd54, 0xd56 }, - { 0xd5f, 0xd61 }, + { 0xd54, 0xd57 }, + { 0xd5f, 0xd63 }, { 0xd7a, 0xd7f }, + { 0xd81, 0xd83 }, { 0xd85, 0xd96 }, { 0xd9a, 0xdb1 }, { 0xdb3, 0xdbb }, { 0xdbd, 0xdbd }, { 0xdc0, 0xdc6 }, - { 0xe01, 0xe30 }, - { 0xe32, 0xe33 }, + { 0xdcf, 0xdd4 }, + { 0xdd6, 0xdd6 }, + { 0xdd8, 0xddf }, + { 0xdf2, 0xdf3 }, + { 0xe01, 0xe3a }, { 0xe40, 0xe46 }, + { 0xe4d, 0xe4d }, { 0xe81, 0xe82 }, { 0xe84, 0xe84 }, { 0xe86, 0xe8a }, { 0xe8c, 0xea3 }, { 0xea5, 0xea5 }, - { 0xea7, 0xeb0 }, - { 0xeb2, 0xeb3 }, - { 0xebd, 0xebd }, + { 0xea7, 0xeb9 }, + { 0xebb, 0xebd }, { 0xec0, 0xec4 }, { 0xec6, 0xec6 }, + { 0xecd, 0xecd }, { 0xedc, 0xedf }, { 0xf00, 0xf00 }, { 0xf40, 0xf47 }, { 0xf49, 0xf6c }, - { 0xf88, 0xf8c }, - { 0x1000, 0x102a }, - { 0x103f, 0x103f }, - { 0x1050, 0x1055 }, - { 0x105a, 0x105d }, - { 0x1061, 0x1061 }, - { 0x1065, 0x1066 }, - { 0x106e, 0x1070 }, - { 0x1075, 0x1081 }, - { 0x108e, 0x108e }, + { 0xf71, 0xf83 }, + { 0xf88, 0xf97 }, + { 0xf99, 0xfbc }, + { 0x1000, 0x1036 }, + { 0x1038, 0x1038 }, + { 0x103b, 0x103f }, + { 0x1050, 0x108f }, + { 0x109a, 0x109d }, { 0x10a0, 0x10c5 }, { 0x10c7, 0x10c7 }, { 0x10cd, 0x10cd }, @@ -2957,37 +3101,44 @@ inline constexpr CharRange unicode_letter[] = { { 0x166f, 0x167f }, { 0x1681, 0x169a }, { 0x16a0, 0x16ea }, - { 0x16f1, 0x16f8 }, - { 0x1700, 0x1711 }, - { 0x171f, 0x1731 }, - { 0x1740, 0x1751 }, + { 0x16ee, 0x16f8 }, + { 0x1700, 0x1713 }, + { 0x171f, 0x1733 }, + { 0x1740, 0x1753 }, { 0x1760, 0x176c }, { 0x176e, 0x1770 }, + { 0x1772, 0x1773 }, { 0x1780, 0x17b3 }, + { 0x17b6, 0x17c8 }, { 0x17d7, 0x17d7 }, { 0x17dc, 0x17dc }, { 0x1820, 0x1878 }, - { 0x1880, 0x1884 }, - { 0x1887, 0x18a8 }, - { 0x18aa, 0x18aa }, + { 0x1880, 0x18aa }, { 0x18b0, 0x18f5 }, { 0x1900, 0x191e }, + { 0x1920, 0x192b }, + { 0x1930, 0x1938 }, { 0x1950, 0x196d }, { 0x1970, 0x1974 }, { 0x1980, 0x19ab }, { 0x19b0, 0x19c9 }, - { 0x1a00, 0x1a16 }, - { 0x1a20, 0x1a54 }, + { 0x1a00, 0x1a1b }, + { 0x1a20, 0x1a5e }, + { 0x1a61, 0x1a74 }, { 0x1aa7, 0x1aa7 }, - { 0x1b05, 0x1b33 }, + { 0x1abf, 0x1ac0 }, + { 0x1acc, 0x1ace }, + { 0x1b00, 0x1b33 }, + { 0x1b35, 0x1b43 }, { 0x1b45, 0x1b4c }, - { 0x1b83, 0x1ba0 }, - { 0x1bae, 0x1baf }, + { 0x1b80, 0x1ba9 }, + { 0x1bac, 0x1baf }, { 0x1bba, 0x1be5 }, - { 0x1c00, 0x1c23 }, + { 0x1be7, 0x1bf1 }, + { 0x1c00, 0x1c36 }, { 0x1c4d, 0x1c4f }, { 0x1c5a, 0x1c7d }, - { 0x1c80, 0x1c88 }, + { 0x1c80, 0x1c8a }, { 0x1c90, 0x1cba }, { 0x1cbd, 0x1cbf }, { 0x1ce9, 0x1cec }, @@ -2995,6 +3146,7 @@ inline constexpr CharRange unicode_letter[] = { { 0x1cf5, 0x1cf6 }, { 0x1cfa, 0x1cfa }, { 0x1d00, 0x1dbf }, + { 0x1dd3, 0x1df4 }, { 0x1e00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, @@ -3030,7 +3182,8 @@ inline constexpr CharRange unicode_letter[] = { { 0x213c, 0x213f }, { 0x2145, 0x2149 }, { 0x214e, 0x214e }, - { 0x2183, 0x2184 }, + { 0x2160, 0x2188 }, + { 0x24b6, 0x24e9 }, { 0x2c00, 0x2ce4 }, { 0x2ceb, 0x2cee }, { 0x2cf2, 0x2cf3 }, @@ -3048,10 +3201,12 @@ inline constexpr CharRange unicode_letter[] = { { 0x2dc8, 0x2dce }, { 0x2dd0, 0x2dd6 }, { 0x2dd8, 0x2dde }, + { 0x2de0, 0x2dff }, { 0x2e2f, 0x2e2f }, - { 0x3005, 0x3006 }, + { 0x3005, 0x3007 }, + { 0x3021, 0x3029 }, { 0x3031, 0x3035 }, - { 0x303b, 0x303c }, + { 0x3038, 0x303c }, { 0x3041, 0x3096 }, { 0x309d, 0x309f }, { 0x30a1, 0x30fa }, @@ -3067,45 +3222,39 @@ inline constexpr CharRange unicode_letter[] = { { 0xa610, 0xa61f }, { 0xa62a, 0xa62b }, { 0xa640, 0xa66e }, - { 0xa67f, 0xa69d }, - { 0xa6a0, 0xa6e5 }, + { 0xa674, 0xa67b }, + { 0xa67f, 0xa6ef }, { 0xa717, 0xa71f }, { 0xa722, 0xa788 }, - { 0xa78b, 0xa7ca }, + { 0xa78b, 0xa7cd }, { 0xa7d0, 0xa7d1 }, { 0xa7d3, 0xa7d3 }, - { 0xa7d5, 0xa7d9 }, - { 0xa7f2, 0xa801 }, - { 0xa803, 0xa805 }, - { 0xa807, 0xa80a }, - { 0xa80c, 0xa822 }, + { 0xa7d5, 0xa7dc }, + { 0xa7f2, 0xa805 }, + { 0xa807, 0xa827 }, { 0xa840, 0xa873 }, - { 0xa882, 0xa8b3 }, + { 0xa880, 0xa8c3 }, + { 0xa8c5, 0xa8c5 }, { 0xa8f2, 0xa8f7 }, { 0xa8fb, 0xa8fb }, - { 0xa8fd, 0xa8fe }, - { 0xa90a, 0xa925 }, - { 0xa930, 0xa946 }, + { 0xa8fd, 0xa8ff }, + { 0xa90a, 0xa92a }, + { 0xa930, 0xa952 }, { 0xa960, 0xa97c }, - { 0xa984, 0xa9b2 }, + { 0xa980, 0xa9b2 }, + { 0xa9b4, 0xa9bf }, { 0xa9cf, 0xa9cf }, - { 0xa9e0, 0xa9e4 }, - { 0xa9e6, 0xa9ef }, + { 0xa9e0, 0xa9ef }, { 0xa9fa, 0xa9fe }, - { 0xaa00, 0xaa28 }, - { 0xaa40, 0xaa42 }, - { 0xaa44, 0xaa4b }, + { 0xaa00, 0xaa36 }, + { 0xaa40, 0xaa4d }, { 0xaa60, 0xaa76 }, - { 0xaa7a, 0xaa7a }, - { 0xaa7e, 0xaaaf }, - { 0xaab1, 0xaab1 }, - { 0xaab5, 0xaab6 }, - { 0xaab9, 0xaabd }, + { 0xaa7a, 0xaabe }, { 0xaac0, 0xaac0 }, { 0xaac2, 0xaac2 }, { 0xaadb, 0xaadd }, - { 0xaae0, 0xaaea }, - { 0xaaf2, 0xaaf4 }, + { 0xaae0, 0xaaef }, + { 0xaaf2, 0xaaf5 }, { 0xab01, 0xab06 }, { 0xab09, 0xab0e }, { 0xab11, 0xab16 }, @@ -3113,7 +3262,7 @@ inline constexpr CharRange unicode_letter[] = { { 0xab28, 0xab2e }, { 0xab30, 0xab5a }, { 0xab5c, 0xab69 }, - { 0xab70, 0xabe2 }, + { 0xab70, 0xabea }, { 0xac00, 0xd7a3 }, { 0xd7b0, 0xd7c6 }, { 0xd7cb, 0xd7fb }, @@ -3121,8 +3270,7 @@ inline constexpr CharRange unicode_letter[] = { { 0xfa70, 0xfad9 }, { 0xfb00, 0xfb06 }, { 0xfb13, 0xfb17 }, - { 0xfb1d, 0xfb1d }, - { 0xfb1f, 0xfb28 }, + { 0xfb1d, 0xfb28 }, { 0xfb2a, 0xfb36 }, { 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, @@ -3149,15 +3297,16 @@ inline constexpr CharRange unicode_letter[] = { { 0x1003f, 0x1004d }, { 0x10050, 0x1005d }, { 0x10080, 0x100fa }, + { 0x10140, 0x10174 }, { 0x10280, 0x1029c }, { 0x102a0, 0x102d0 }, { 0x10300, 0x1031f }, - { 0x1032d, 0x10340 }, - { 0x10342, 0x10349 }, - { 0x10350, 0x10375 }, + { 0x1032d, 0x1034a }, + { 0x10350, 0x1037a }, { 0x10380, 0x1039d }, { 0x103a0, 0x103c3 }, { 0x103c8, 0x103cf }, + { 0x103d1, 0x103d5 }, { 0x10400, 0x1049d }, { 0x104b0, 0x104d3 }, { 0x104d8, 0x104fb }, @@ -3171,6 +3320,7 @@ inline constexpr CharRange unicode_letter[] = { { 0x105a3, 0x105b1 }, { 0x105b3, 0x105b9 }, { 0x105bb, 0x105bc }, + { 0x105c0, 0x105f3 }, { 0x10600, 0x10736 }, { 0x10740, 0x10755 }, { 0x10760, 0x10767 }, @@ -3191,8 +3341,9 @@ inline constexpr CharRange unicode_letter[] = { { 0x10920, 0x10939 }, { 0x10980, 0x109b7 }, { 0x109be, 0x109bf }, - { 0x10a00, 0x10a00 }, - { 0x10a10, 0x10a13 }, + { 0x10a00, 0x10a03 }, + { 0x10a05, 0x10a06 }, + { 0x10a0c, 0x10a13 }, { 0x10a15, 0x10a17 }, { 0x10a19, 0x10a35 }, { 0x10a60, 0x10a7c }, @@ -3206,104 +3357,143 @@ inline constexpr CharRange unicode_letter[] = { { 0x10c00, 0x10c48 }, { 0x10c80, 0x10cb2 }, { 0x10cc0, 0x10cf2 }, - { 0x10d00, 0x10d23 }, + { 0x10d00, 0x10d27 }, + { 0x10d4a, 0x10d65 }, + { 0x10d69, 0x10d69 }, + { 0x10d6f, 0x10d85 }, { 0x10e80, 0x10ea9 }, + { 0x10eab, 0x10eac }, { 0x10eb0, 0x10eb1 }, + { 0x10ec2, 0x10ec4 }, + { 0x10efc, 0x10efc }, { 0x10f00, 0x10f1c }, { 0x10f27, 0x10f27 }, { 0x10f30, 0x10f45 }, { 0x10f70, 0x10f81 }, { 0x10fb0, 0x10fc4 }, { 0x10fe0, 0x10ff6 }, - { 0x11003, 0x11037 }, - { 0x11071, 0x11072 }, - { 0x11075, 0x11075 }, - { 0x11083, 0x110af }, + { 0x11000, 0x11045 }, + { 0x11071, 0x11075 }, + { 0x11080, 0x110b8 }, + { 0x110c2, 0x110c2 }, { 0x110d0, 0x110e8 }, - { 0x11103, 0x11126 }, - { 0x11144, 0x11144 }, - { 0x11147, 0x11147 }, + { 0x11100, 0x11132 }, + { 0x11144, 0x11147 }, { 0x11150, 0x11172 }, { 0x11176, 0x11176 }, - { 0x11183, 0x111b2 }, + { 0x11180, 0x111bf }, { 0x111c1, 0x111c4 }, + { 0x111ce, 0x111cf }, { 0x111da, 0x111da }, { 0x111dc, 0x111dc }, { 0x11200, 0x11211 }, - { 0x11213, 0x1122b }, - { 0x1123f, 0x11240 }, + { 0x11213, 0x11234 }, + { 0x11237, 0x11237 }, + { 0x1123e, 0x11241 }, { 0x11280, 0x11286 }, { 0x11288, 0x11288 }, { 0x1128a, 0x1128d }, { 0x1128f, 0x1129d }, { 0x1129f, 0x112a8 }, - { 0x112b0, 0x112de }, + { 0x112b0, 0x112e8 }, + { 0x11300, 0x11303 }, { 0x11305, 0x1130c }, { 0x1130f, 0x11310 }, { 0x11313, 0x11328 }, { 0x1132a, 0x11330 }, { 0x11332, 0x11333 }, { 0x11335, 0x11339 }, - { 0x1133d, 0x1133d }, + { 0x1133d, 0x11344 }, + { 0x11347, 0x11348 }, + { 0x1134b, 0x1134c }, { 0x11350, 0x11350 }, - { 0x1135d, 0x11361 }, - { 0x11400, 0x11434 }, + { 0x11357, 0x11357 }, + { 0x1135d, 0x11363 }, + { 0x11380, 0x11389 }, + { 0x1138b, 0x1138b }, + { 0x1138e, 0x1138e }, + { 0x11390, 0x113b5 }, + { 0x113b7, 0x113c0 }, + { 0x113c2, 0x113c2 }, + { 0x113c5, 0x113c5 }, + { 0x113c7, 0x113ca }, + { 0x113cc, 0x113cd }, + { 0x113d1, 0x113d1 }, + { 0x113d3, 0x113d3 }, + { 0x11400, 0x11441 }, + { 0x11443, 0x11445 }, { 0x11447, 0x1144a }, { 0x1145f, 0x11461 }, - { 0x11480, 0x114af }, + { 0x11480, 0x114c1 }, { 0x114c4, 0x114c5 }, { 0x114c7, 0x114c7 }, - { 0x11580, 0x115ae }, - { 0x115d8, 0x115db }, - { 0x11600, 0x1162f }, + { 0x11580, 0x115b5 }, + { 0x115b8, 0x115be }, + { 0x115d8, 0x115dd }, + { 0x11600, 0x1163e }, + { 0x11640, 0x11640 }, { 0x11644, 0x11644 }, - { 0x11680, 0x116aa }, + { 0x11680, 0x116b5 }, { 0x116b8, 0x116b8 }, { 0x11700, 0x1171a }, + { 0x1171d, 0x1172a }, { 0x11740, 0x11746 }, - { 0x11800, 0x1182b }, + { 0x11800, 0x11838 }, { 0x118a0, 0x118df }, { 0x118ff, 0x11906 }, { 0x11909, 0x11909 }, { 0x1190c, 0x11913 }, { 0x11915, 0x11916 }, - { 0x11918, 0x1192f }, - { 0x1193f, 0x1193f }, - { 0x11941, 0x11941 }, + { 0x11918, 0x11935 }, + { 0x11937, 0x11938 }, + { 0x1193b, 0x1193c }, + { 0x1193f, 0x11942 }, { 0x119a0, 0x119a7 }, - { 0x119aa, 0x119d0 }, + { 0x119aa, 0x119d7 }, + { 0x119da, 0x119df }, { 0x119e1, 0x119e1 }, - { 0x119e3, 0x119e3 }, - { 0x11a00, 0x11a00 }, - { 0x11a0b, 0x11a32 }, - { 0x11a3a, 0x11a3a }, - { 0x11a50, 0x11a50 }, - { 0x11a5c, 0x11a89 }, + { 0x119e3, 0x119e4 }, + { 0x11a00, 0x11a32 }, + { 0x11a35, 0x11a3e }, + { 0x11a50, 0x11a97 }, { 0x11a9d, 0x11a9d }, { 0x11ab0, 0x11af8 }, + { 0x11bc0, 0x11be0 }, { 0x11c00, 0x11c08 }, - { 0x11c0a, 0x11c2e }, + { 0x11c0a, 0x11c36 }, + { 0x11c38, 0x11c3e }, { 0x11c40, 0x11c40 }, { 0x11c72, 0x11c8f }, + { 0x11c92, 0x11ca7 }, + { 0x11ca9, 0x11cb6 }, { 0x11d00, 0x11d06 }, { 0x11d08, 0x11d09 }, - { 0x11d0b, 0x11d30 }, - { 0x11d46, 0x11d46 }, + { 0x11d0b, 0x11d36 }, + { 0x11d3a, 0x11d3a }, + { 0x11d3c, 0x11d3d }, + { 0x11d3f, 0x11d41 }, + { 0x11d43, 0x11d43 }, + { 0x11d46, 0x11d47 }, { 0x11d60, 0x11d65 }, { 0x11d67, 0x11d68 }, - { 0x11d6a, 0x11d89 }, + { 0x11d6a, 0x11d8e }, + { 0x11d90, 0x11d91 }, + { 0x11d93, 0x11d96 }, { 0x11d98, 0x11d98 }, - { 0x11ee0, 0x11ef2 }, - { 0x11f02, 0x11f02 }, - { 0x11f04, 0x11f10 }, - { 0x11f12, 0x11f33 }, + { 0x11ee0, 0x11ef6 }, + { 0x11f00, 0x11f10 }, + { 0x11f12, 0x11f3a }, + { 0x11f3e, 0x11f40 }, { 0x11fb0, 0x11fb0 }, { 0x12000, 0x12399 }, + { 0x12400, 0x1246e }, { 0x12480, 0x12543 }, { 0x12f90, 0x12ff0 }, { 0x13000, 0x1342f }, { 0x13441, 0x13446 }, + { 0x13460, 0x143fa }, { 0x14400, 0x14646 }, + { 0x16100, 0x1612e }, { 0x16800, 0x16a38 }, { 0x16a40, 0x16a5e }, { 0x16a70, 0x16abe }, @@ -3312,15 +3502,17 @@ inline constexpr CharRange unicode_letter[] = { { 0x16b40, 0x16b43 }, { 0x16b63, 0x16b77 }, { 0x16b7d, 0x16b8f }, + { 0x16d40, 0x16d6c }, { 0x16e40, 0x16e7f }, { 0x16f00, 0x16f4a }, - { 0x16f50, 0x16f50 }, - { 0x16f93, 0x16f9f }, + { 0x16f4f, 0x16f87 }, + { 0x16f8f, 0x16f9f }, { 0x16fe0, 0x16fe1 }, { 0x16fe3, 0x16fe3 }, + { 0x16ff0, 0x16ff1 }, { 0x17000, 0x187f7 }, { 0x18800, 0x18cd5 }, - { 0x18d00, 0x18d08 }, + { 0x18cff, 0x18d08 }, { 0x1aff0, 0x1aff3 }, { 0x1aff5, 0x1affb }, { 0x1affd, 0x1affe }, @@ -3334,6 +3526,7 @@ inline constexpr CharRange unicode_letter[] = { { 0x1bc70, 0x1bc7c }, { 0x1bc80, 0x1bc88 }, { 0x1bc90, 0x1bc99 }, + { 0x1bc9e, 0x1bc9e }, { 0x1d400, 0x1d454 }, { 0x1d456, 0x1d49c }, { 0x1d49e, 0x1d49f }, @@ -3366,19 +3559,28 @@ inline constexpr CharRange unicode_letter[] = { { 0x1d7c4, 0x1d7cb }, { 0x1df00, 0x1df1e }, { 0x1df25, 0x1df2a }, + { 0x1e000, 0x1e006 }, + { 0x1e008, 0x1e018 }, + { 0x1e01b, 0x1e021 }, + { 0x1e023, 0x1e024 }, + { 0x1e026, 0x1e02a }, { 0x1e030, 0x1e06d }, + { 0x1e08f, 0x1e08f }, { 0x1e100, 0x1e12c }, { 0x1e137, 0x1e13d }, { 0x1e14e, 0x1e14e }, { 0x1e290, 0x1e2ad }, { 0x1e2c0, 0x1e2eb }, { 0x1e4d0, 0x1e4eb }, + { 0x1e5d0, 0x1e5ed }, + { 0x1e5f0, 0x1e5f0 }, { 0x1e7e0, 0x1e7e6 }, { 0x1e7e8, 0x1e7eb }, { 0x1e7ed, 0x1e7ee }, { 0x1e7f0, 0x1e7fe }, { 0x1e800, 0x1e8c4 }, { 0x1e900, 0x1e943 }, + { 0x1e947, 0x1e947 }, { 0x1e94b, 0x1e94b }, { 0x1ee00, 0x1ee03 }, { 0x1ee05, 0x1ee1f }, @@ -3413,6 +3615,9 @@ inline constexpr CharRange unicode_letter[] = { { 0x1eea1, 0x1eea3 }, { 0x1eea5, 0x1eea9 }, { 0x1eeab, 0x1eebb }, + { 0x1f130, 0x1f149 }, + { 0x1f150, 0x1f169 }, + { 0x1f170, 0x1f189 }, { 0x20000, 0x2a6df }, { 0x2a700, 0x2b739 }, { 0x2b740, 0x2b81d }, diff --git a/core/string/char_utils.h b/core/string/char_utils.h index 4acb81253fcd..62ab4e9584d4 100644 --- a/core/string/char_utils.h +++ b/core/string/char_utils.h @@ -38,97 +38,97 @@ #define BSEARCH_CHAR_RANGE(m_array) \ int low = 0; \ int high = sizeof(m_array) / sizeof(m_array[0]) - 1; \ - int middle; \ + int middle = (low + high) / 2; \ \ while (low <= high) { \ - middle = (low + high) / 2; \ - \ - if (c < m_array[middle].start) { \ + if (p_char < m_array[middle].start) { \ high = middle - 1; \ - } else if (c > m_array[middle].end) { \ + } else if (p_char > m_array[middle].end) { \ low = middle + 1; \ } else { \ return true; \ } \ + \ + middle = (low + high) / 2; \ } \ \ return false -static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) { +constexpr bool is_unicode_identifier_start(char32_t p_char) { BSEARCH_CHAR_RANGE(xid_start); } -static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) { +constexpr bool is_unicode_identifier_continue(char32_t p_char) { BSEARCH_CHAR_RANGE(xid_continue); } -static _FORCE_INLINE_ bool is_unicode_upper_case(char32_t c) { +constexpr bool is_unicode_upper_case(char32_t p_char) { BSEARCH_CHAR_RANGE(uppercase_letter); } -static _FORCE_INLINE_ bool is_unicode_lower_case(char32_t c) { +constexpr bool is_unicode_lower_case(char32_t p_char) { BSEARCH_CHAR_RANGE(lowercase_letter); } -static _FORCE_INLINE_ bool is_unicode_letter(char32_t c) { +constexpr bool is_unicode_letter(char32_t p_char) { BSEARCH_CHAR_RANGE(unicode_letter); } #undef BSEARCH_CHAR_RANGE -static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); +constexpr bool is_ascii_upper_case(char32_t p_char) { + return (p_char >= 'A' && p_char <= 'Z'); } -static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); +constexpr bool is_ascii_lower_case(char32_t p_char) { + return (p_char >= 'a' && p_char <= 'z'); } -static _FORCE_INLINE_ bool is_digit(char32_t c) { - return (c >= '0' && c <= '9'); +constexpr bool is_digit(char32_t p_char) { + return (p_char >= '0' && p_char <= '9'); } -static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { - return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); +constexpr bool is_hex_digit(char32_t p_char) { + return (is_digit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F')); } -static _FORCE_INLINE_ bool is_binary_digit(char32_t c) { - return (c == '0' || c == '1'); +constexpr bool is_binary_digit(char32_t p_char) { + return (p_char == '0' || p_char == '1'); } -static _FORCE_INLINE_ bool is_ascii_alphabet_char(char32_t c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +constexpr bool is_ascii_alphabet_char(char32_t p_char) { + return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z'); } -static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); +constexpr bool is_ascii_alphanumeric_char(char32_t p_char) { + return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9'); } -static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +constexpr bool is_ascii_identifier_char(char32_t p_char) { + return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9') || p_char == '_'; } -static _FORCE_INLINE_ bool is_symbol(char32_t c) { - return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); +constexpr bool is_symbol(char32_t p_char) { + return p_char != '_' && ((p_char >= '!' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '`') || (p_char >= '{' && p_char <= '~') || p_char == '\t' || p_char == ' '); } -static _FORCE_INLINE_ bool is_control(char32_t p_char) { +constexpr bool is_control(char32_t p_char) { return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); } -static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) { +constexpr bool is_whitespace(char32_t p_char) { return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); } -static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) { +constexpr bool is_linebreak(char32_t p_char) { return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); } -static _FORCE_INLINE_ bool is_punct(char32_t p_char) { +constexpr bool is_punct(char32_t p_char) { return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); } -static _FORCE_INLINE_ bool is_underscore(char32_t p_char) { +constexpr bool is_underscore(char32_t p_char) { return (p_char == '_'); }