From d3b2e8a438486662e956e6999599b1c77b704fb6 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Mon, 10 Apr 2023 18:57:10 +0100 Subject: [PATCH] deps: update simdutf to 3.2.7 PR-URL: https://github.com/nodejs/node/pull/47473 Reviewed-By: Yagiz Nizipli Reviewed-By: Luigi Pinca --- deps/simdutf/simdutf.cpp | 1808 +++++++++++++++++++++----------------- deps/simdutf/simdutf.h | 8 +- 2 files changed, 1029 insertions(+), 787 deletions(-) diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index 3065bcdfbb3857..b6613d69256a5d 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-03-30 20:31:03 -0400. Do not edit! */ +/* auto-generated on 2023-04-08 11:21:57 -0400. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp /* begin file src/simdutf.cpp */ #include "simdutf.h" @@ -4812,7 +4812,7 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * return get_active_implementation()->convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); } simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept { - #if BIG_ENDIAN + #if SIMDUTF_IS_BIG_ENDIAN return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); #else return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); @@ -5294,31 +5294,31 @@ const uint8_t shufutf8[209][16] = /* number of two + three bytes : 145 */ /* number of two + three + four bytes : 209 */ const uint8_t utf8bigindex[4096][2] = -{ {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, +{ {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -5326,15 +5326,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {148, 6}, - {0, 12}, + {209, 12}, {151, 6}, {163, 6}, {66, 6}, - {0, 12}, + {209, 12}, {154, 6}, {166, 6}, {68, 6}, @@ -5342,7 +5342,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -5358,15 +5358,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {152, 7}, {164, 7}, {145, 3}, - {0, 12}, + {209, 12}, {155, 7}, {167, 7}, {69, 7}, @@ -5374,7 +5374,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {170, 7}, {71, 7}, @@ -5390,8 +5390,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {185, 7}, @@ -5406,7 +5406,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -5422,15 +5422,15 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, + {209, 12}, {156, 8}, {168, 8}, {146, 4}, @@ -5438,7 +5438,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {171, 8}, {72, 8}, @@ -5454,8 +5454,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {186, 8}, @@ -5470,7 +5470,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -5486,10 +5486,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -5502,7 +5502,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -5518,8 +5518,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -5534,7 +5534,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -5550,23 +5550,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -5582,8 +5582,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {187, 9}, @@ -5598,7 +5598,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -5614,10 +5614,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -5630,7 +5630,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -5646,8 +5646,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -5662,7 +5662,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -5678,13 +5678,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -5694,7 +5694,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -5710,8 +5710,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -5726,7 +5726,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -5742,10 +5742,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -5758,7 +5758,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -5774,8 +5774,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -5790,7 +5790,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -5806,31 +5806,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -5838,8 +5838,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {176, 10}, {148, 6}, {188, 10}, @@ -5854,7 +5854,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -5870,10 +5870,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {191, 10}, {152, 7}, {164, 7}, @@ -5886,7 +5886,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {114, 10}, {71, 7}, @@ -5902,8 +5902,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {138, 10}, @@ -5918,7 +5918,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -5934,13 +5934,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {206, 10}, {156, 8}, @@ -5950,7 +5950,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {116, 10}, {72, 8}, @@ -5966,8 +5966,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {140, 10}, @@ -5982,7 +5982,7 @@ const uint8_t utf8bigindex[4096][2] = {23, 10}, {39, 10}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -5998,10 +5998,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -6014,7 +6014,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -6030,8 +6030,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -6046,7 +6046,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6062,23 +6062,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -6094,8 +6094,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {142, 10}, @@ -6110,7 +6110,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -6126,10 +6126,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -6142,7 +6142,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -6158,8 +6158,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -6174,7 +6174,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6190,13 +6190,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -6206,7 +6206,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -6222,8 +6222,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -6238,7 +6238,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -6254,10 +6254,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -6270,7 +6270,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -6286,8 +6286,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -6302,7 +6302,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6318,31 +6318,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -6350,15 +6350,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {148, 6}, - {0, 12}, + {209, 12}, {151, 6}, {163, 6}, {66, 6}, - {0, 12}, + {209, 12}, {154, 6}, {166, 6}, {68, 6}, @@ -6366,7 +6366,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -6382,10 +6382,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {192, 11}, {152, 7}, {164, 7}, @@ -6398,7 +6398,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {170, 7}, {71, 7}, @@ -6414,8 +6414,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {185, 7}, @@ -6430,7 +6430,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6446,13 +6446,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {207, 11}, {156, 8}, @@ -6462,7 +6462,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {117, 11}, {72, 8}, @@ -6478,8 +6478,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {141, 11}, @@ -6494,7 +6494,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -6510,10 +6510,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -6526,7 +6526,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -6542,8 +6542,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -6558,7 +6558,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6574,23 +6574,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -6606,8 +6606,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {143, 11}, @@ -6622,7 +6622,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -6638,10 +6638,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -6654,7 +6654,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -6670,8 +6670,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -6686,7 +6686,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6702,13 +6702,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -6718,7 +6718,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -6734,8 +6734,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -6750,7 +6750,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -6766,10 +6766,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -6782,7 +6782,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -6798,8 +6798,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -6814,7 +6814,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6830,31 +6830,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -6862,8 +6862,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {176, 10}, {148, 6}, {188, 10}, @@ -6878,7 +6878,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -6894,10 +6894,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {191, 10}, {152, 7}, {164, 7}, @@ -6910,7 +6910,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {114, 10}, {71, 7}, @@ -6926,8 +6926,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {138, 10}, @@ -6942,7 +6942,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -6958,13 +6958,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {206, 10}, {156, 8}, @@ -6974,7 +6974,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {116, 10}, {72, 8}, @@ -6990,8 +6990,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {140, 10}, @@ -7006,7 +7006,7 @@ const uint8_t utf8bigindex[4096][2] = {23, 10}, {39, 10}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -7022,10 +7022,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -7038,7 +7038,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -7054,8 +7054,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -7070,7 +7070,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7086,23 +7086,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -7118,8 +7118,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {142, 10}, @@ -7134,7 +7134,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -7150,10 +7150,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -7166,7 +7166,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -7182,8 +7182,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -7198,7 +7198,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7214,13 +7214,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -7230,7 +7230,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -7246,8 +7246,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -7262,7 +7262,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -7278,10 +7278,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -7294,7 +7294,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -7310,8 +7310,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -7326,7 +7326,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7342,31 +7342,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -7374,15 +7374,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {148, 6}, - {0, 12}, + {209, 12}, {151, 6}, {163, 6}, {66, 6}, - {0, 12}, + {209, 12}, {154, 6}, {166, 6}, {68, 6}, @@ -7390,7 +7390,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -7406,15 +7406,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {152, 7}, {164, 7}, {145, 3}, - {0, 12}, + {209, 12}, {155, 7}, {167, 7}, {69, 7}, @@ -7422,7 +7422,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {170, 7}, {71, 7}, @@ -7438,8 +7438,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {185, 7}, @@ -7454,7 +7454,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7470,13 +7470,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {208, 12}, {156, 8}, @@ -7486,7 +7486,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {171, 8}, {72, 8}, @@ -7502,8 +7502,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {186, 8}, @@ -7518,7 +7518,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -7534,10 +7534,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -7550,7 +7550,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -7566,8 +7566,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -7582,7 +7582,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7598,23 +7598,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -7630,8 +7630,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {144, 12}, @@ -7646,7 +7646,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -7662,10 +7662,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -7678,7 +7678,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -7694,8 +7694,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -7710,7 +7710,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7726,13 +7726,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -7742,7 +7742,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -7758,8 +7758,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -7774,7 +7774,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -7790,10 +7790,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -7806,7 +7806,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -7822,8 +7822,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -7838,7 +7838,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7854,31 +7854,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -7886,8 +7886,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {176, 10}, {148, 6}, {188, 10}, @@ -7902,7 +7902,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -7918,10 +7918,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {191, 10}, {152, 7}, {164, 7}, @@ -7934,7 +7934,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {114, 10}, {71, 7}, @@ -7950,8 +7950,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {138, 10}, @@ -7966,7 +7966,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -7982,13 +7982,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {206, 10}, {156, 8}, @@ -7998,7 +7998,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {116, 10}, {72, 8}, @@ -8014,8 +8014,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {140, 10}, @@ -8030,7 +8030,7 @@ const uint8_t utf8bigindex[4096][2] = {23, 10}, {39, 10}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -8046,10 +8046,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -8062,7 +8062,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -8078,8 +8078,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -8094,7 +8094,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8110,23 +8110,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -8142,8 +8142,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {142, 10}, @@ -8158,7 +8158,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -8174,10 +8174,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -8190,7 +8190,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -8206,8 +8206,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -8222,7 +8222,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8238,13 +8238,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -8254,7 +8254,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -8270,8 +8270,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -8286,7 +8286,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -8302,10 +8302,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -8318,7 +8318,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -8334,8 +8334,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -8350,7 +8350,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8366,31 +8366,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -8398,15 +8398,15 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {148, 6}, - {0, 12}, + {209, 12}, {151, 6}, {163, 6}, {66, 6}, - {0, 12}, + {209, 12}, {154, 6}, {166, 6}, {68, 6}, @@ -8414,7 +8414,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -8430,10 +8430,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {192, 11}, {152, 7}, {164, 7}, @@ -8446,7 +8446,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {170, 7}, {71, 7}, @@ -8462,8 +8462,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {185, 7}, @@ -8478,7 +8478,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8494,13 +8494,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {207, 11}, {156, 8}, @@ -8510,7 +8510,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {117, 11}, {72, 8}, @@ -8526,8 +8526,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {141, 11}, @@ -8542,7 +8542,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -8558,10 +8558,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -8574,7 +8574,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -8590,8 +8590,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -8606,7 +8606,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8622,23 +8622,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -8654,8 +8654,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {143, 11}, @@ -8670,7 +8670,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -8686,10 +8686,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -8702,7 +8702,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -8718,8 +8718,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -8734,7 +8734,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8750,13 +8750,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -8766,7 +8766,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -8782,8 +8782,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -8798,7 +8798,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -8814,10 +8814,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -8830,7 +8830,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -8846,8 +8846,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -8862,7 +8862,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -8878,31 +8878,31 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {147, 5}, - {0, 12}, + {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {0, 12}, + {209, 12}, {153, 5}, {165, 5}, {67, 5}, @@ -8910,8 +8910,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {176, 10}, {148, 6}, {188, 10}, @@ -8926,7 +8926,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {169, 6}, {70, 6}, @@ -8942,10 +8942,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {191, 10}, {152, 7}, {164, 7}, @@ -8958,7 +8958,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {114, 10}, {71, 7}, @@ -8974,8 +8974,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {138, 10}, @@ -8990,7 +8990,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -9006,13 +9006,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {206, 10}, {156, 8}, @@ -9022,7 +9022,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {116, 10}, {72, 8}, @@ -9038,8 +9038,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {140, 10}, @@ -9054,7 +9054,7 @@ const uint8_t utf8bigindex[4096][2] = {23, 10}, {39, 10}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -9070,10 +9070,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -9086,7 +9086,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -9102,8 +9102,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -9118,7 +9118,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -9134,23 +9134,23 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {146, 4}, - {0, 12}, + {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {160, 9}, {172, 9}, {147, 5}, @@ -9166,8 +9166,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {175, 9}, {148, 6}, {142, 10}, @@ -9182,7 +9182,7 @@ const uint8_t utf8bigindex[4096][2] = {74, 6}, {92, 6}, {64, 4}, - {0, 12}, + {209, 12}, {157, 6}, {111, 9}, {70, 6}, @@ -9198,10 +9198,10 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {190, 9}, {152, 7}, {164, 7}, @@ -9214,7 +9214,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {113, 9}, {71, 7}, @@ -9230,8 +9230,8 @@ const uint8_t utf8bigindex[4096][2] = {19, 9}, {35, 9}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {137, 9}, @@ -9246,7 +9246,7 @@ const uint8_t utf8bigindex[4096][2] = {21, 9}, {37, 9}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -9262,13 +9262,13 @@ const uint8_t utf8bigindex[4096][2] = {16, 7}, {32, 7}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {145, 3}, {205, 9}, {156, 8}, @@ -9278,7 +9278,7 @@ const uint8_t utf8bigindex[4096][2] = {149, 4}, {161, 4}, {64, 4}, - {0, 12}, + {209, 12}, {159, 8}, {115, 9}, {72, 8}, @@ -9294,8 +9294,8 @@ const uint8_t utf8bigindex[4096][2] = {73, 5}, {91, 5}, {64, 4}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {174, 8}, {148, 6}, {139, 9}, @@ -9310,7 +9310,7 @@ const uint8_t utf8bigindex[4096][2] = {22, 9}, {38, 9}, {3, 8}, - {0, 12}, + {209, 12}, {157, 6}, {110, 8}, {70, 6}, @@ -9326,10 +9326,10 @@ const uint8_t utf8bigindex[4096][2] = {17, 8}, {33, 8}, {0, 6}, - {0, 12}, - {0, 12}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, + {209, 12}, + {209, 12}, {189, 8}, {152, 7}, {164, 7}, @@ -9342,7 +9342,7 @@ const uint8_t utf8bigindex[4096][2] = {75, 7}, {93, 7}, {64, 4}, - {0, 12}, + {209, 12}, {158, 7}, {112, 8}, {71, 7}, @@ -9358,8 +9358,8 @@ const uint8_t utf8bigindex[4096][2] = {18, 8}, {34, 8}, {1, 7}, - {0, 12}, - {0, 12}, + {209, 12}, + {209, 12}, {173, 7}, {148, 6}, {136, 8}, @@ -9374,7 +9374,7 @@ const uint8_t utf8bigindex[4096][2] = {20, 8}, {36, 8}, {2, 7}, - {0, 12}, + {209, 12}, {157, 6}, {109, 7}, {70, 6}, @@ -11299,8 +11299,10 @@ template inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) { size_t extra_len{0}; // We potentially need to go back in time and find a leading byte. - size_t how_far_back = 3; // 3 bytes in the past + current position - if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } + // In theory '3' would be sufficient, but sometimes the error can go back quite far. + size_t how_far_back = prior_bytes; + // size_t how_far_back = 3; // 3 bytes in the past + current position + // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } bool found_leading_bytes{false}; // important: it is i <= how_far_back and not 'i < how_far_back'. for(size_t i = 0; i <= how_far_back; i++) { @@ -12206,6 +12208,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, utf16_output += 4; } else if (idx < 209) { // TWO (2) input code-words + ////////////// + // There might be garbage inputs where a leading byte mascarades as a four-byte + // leading byte (by being followed by 3 continuation byte), but is not greater than + // 0xf0. This could trigger a buffer overflow if we only counted leading + // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. + // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. + // We do as at the cost of an extra mask. + ///////////// uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx])); uint8x16_t perm = vqtbl1q_u8(in, sh); uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); @@ -12217,8 +12227,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x400000)))), 1)); middlehighbyte = veorq_u8(correct, middlehighbyte); uint8x16_t middlehighbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlehighbyte), 4)); - uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x07000000))); - uint8x16_t highbyte_shifted =vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6)); + // We deliberately carry the leading four bits if they are present, we remove + // them later when computing hightenbits. + uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0xff000000))); + uint8x16_t highbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6)); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. uint8x16_t composed = vorrq_u8(vorrq_u8(ascii, middlebyte_shifted), vorrq_u8(highbyte_shifted, middlehighbyte_shifted)); @@ -12226,7 +12242,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, vsubq_u32(vreinterpretq_u32_u8(composed), vmovq_n_u32(0x10000)); uint32x4_t lowtenbits = vandq_u32(composedminus, vmovq_n_u32(0x3ff)); - uint32x4_t hightenbits = vshrq_n_u32(composedminus, 10); + // Notice the 0x3ff mask: + uint32x4_t hightenbits = vandq_u32(vshrq_n_u32(composedminus, 10), vmovq_n_u32(0x3ff)); uint32x4_t lowtenbitsadd = vaddq_u32(lowtenbits, vmovq_n_u32(0xDC00)); uint32x4_t hightenbitsadd = @@ -12244,13 +12261,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, uint32_t surrogate_buffer[4]; vst1q_u32(surrogate_buffer, surrogates); for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] < 65536) { - utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); - utf16_output++; - } else { + if(basic_buffer[i] > 0x3c00000) { utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); utf16_output += 2; + } else { + utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); + utf16_output++; } } } else { @@ -14231,7 +14248,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -14377,7 +14394,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -14422,7 +14451,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -14440,7 +14469,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -14492,7 +14533,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -14707,7 +14748,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -14752,7 +14805,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -14769,7 +14822,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -14819,7 +14884,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -19797,7 +19862,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); if (big_endian) composed = _mm_shuffle_epi8(composed, swap); _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 6; // We wrote 12 bytes, 6 code points. + utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential overflow of 4 bytes. } else if (idx < 145) { // FOUR (4) input code-words const __m128i sh = @@ -19816,9 +19881,17 @@ size_t convert_masked_utf8_to_utf16(const char *input, __m128i composed_repacked = _mm_packus_epi32(composed, composed); if (big_endian) composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; + utf16_output += 4; // Here we overflow by 8 bytes. } else if (idx < 209) { // TWO (2) input code-words + ////////////// + // There might be garbage inputs where a leading byte mascarades as a four-byte + // leading byte (by being followed by 3 continuation byte), but is not greater than + // 0xf0. This could trigger a buffer overflow if we only counted leading + // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. + // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. + // We do as at the cost of an extra mask. + ///////////// const __m128i sh = _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); const __m128i perm = _mm_shuffle_epi8(in, sh); @@ -19831,8 +19904,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); middlehighbyte = _mm_xor_si128(correct, middlehighbyte); const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); + // We deliberately carry the leading four bits in highbyte if they are present, + // we remove them later when computing hightenbits. + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. const __m128i composed = _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); @@ -19840,7 +19919,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); const __m128i lowtenbits = _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); - const __m128i hightenbits = _mm_srli_epi32(composedminus, 10); + // Notice the 0x3ff mask: + const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); const __m128i lowtenbitsadd = _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); const __m128i hightenbitsadd = @@ -19858,13 +19938,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, uint32_t surrogate_buffer[4]; _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] < 65536) { - utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); - utf16_output++; - } else { + if(basic_buffer[i] > 0x3c00000) { utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); utf16_output += 2; + } else { + utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); + utf16_output++; } } } else { @@ -19955,7 +20035,8 @@ size_t convert_masked_utf8_to_utf32(const char *input, const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); _mm256_storeu_si256((__m256i *)utf32_output, _mm256_cvtepu16_epi32(composed)); - utf32_output += 6; // We wrote 12 bytes, 6 code points. + utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential + // overflow of 32 - 24 = 8 bytes. } else if (idx < 145) { // FOUR (4) input code-words const __m128i sh = @@ -19993,7 +20074,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 3; + utf32_output += 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. } else { // here we know that there is an error but we do not handle errors } @@ -21810,7 +21891,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -21956,7 +22037,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -22001,7 +22094,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -22019,7 +22112,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -22071,7 +22176,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -22286,7 +22391,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -22331,7 +22448,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -22348,7 +22465,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -22398,7 +22527,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -23539,7 +23668,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -23685,7 +23814,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -23730,7 +23871,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -23748,7 +23889,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -23800,7 +23953,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -24015,7 +24168,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -24060,7 +24225,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -24077,7 +24242,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -24127,7 +24304,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -25144,6 +25321,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, utf16_output += 4; } else if (idx < 209) { // TWO (2) input code-words + ////////////// + // There might be garbage inputs where a leading byte mascarades as a four-byte + // leading byte (by being followed by 3 continuation byte), but is not greater than + // 0xf0. This could trigger a buffer overflow if we only counted leading + // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. + // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. + // We do as at the cost of an extra mask. + ///////////// const __m128i sh = _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); const __m128i perm = _mm_shuffle_epi8(in, sh); @@ -25156,8 +25341,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); middlehighbyte = _mm_xor_si128(correct, middlehighbyte); const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); + // We deliberately carry the leading four bits in highbyte if they are present, + // we remove them later when computing hightenbits. + const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); + // When we need to generate a surrogate pair (leading byte > 0xF0), then + // the corresponding 32-bit value in 'composed' will be greater than + // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the + // location of the surrogate pairs. const __m128i composed = _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); @@ -25165,7 +25356,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); const __m128i lowtenbits = _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); - const __m128i hightenbits = _mm_srli_epi32(composedminus, 10); + // Notice the 0x3ff mask: + const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); const __m128i lowtenbitsadd = _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); const __m128i hightenbitsadd = @@ -25183,13 +25375,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, uint32_t surrogate_buffer[4]; _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] < 65536) { - utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); - utf16_output++; - } else { + if(basic_buffer[i] > 0x3c00000) { utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); utf16_output += 2; + } else { + utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); + utf16_output++; } } } else { @@ -27140,7 +27332,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -27286,7 +27478,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -27331,7 +27535,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -27349,7 +27553,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { size_t pos = 0; char16_t* start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 8; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the eight last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -27401,7 +27617,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -27616,7 +27832,19 @@ using namespace simd; simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -27661,7 +27889,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } @@ -27678,7 +27906,19 @@ using namespace simd; simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { size_t pos = 0; char32_t* start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! + // In the worst case, we have the haswell kernel which can cause an overflow of + // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, + // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate + // much more than 8 bytes. However, you cannot generally assume that you have valid + // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, + // to give us a good margin. + size_t leading_byte = 0; + size_t margin = size; + for(; margin > 0 && leading_byte < 4; margin--) { + leading_byte += (int8_t(in[margin-1]) > -65); + } + // If the input is long enough, then we have that margin-1 is the fourth last leading byte. + const size_t safety_margin = size - margin + 1; // to avoid overruns! while(pos + 64 + safety_margin <= size) { simd8x64 input(reinterpret_cast(in + pos)); if(input.is_ascii()) { @@ -27728,7 +27968,7 @@ using namespace simd; utf8_end_of_code_point_mask >>= consumed; } // At this point there may remain between 0 and 12 bytes in the - // 64-byte block.These bytes will be processed again. So we have an + // 64-byte block. These bytes will be processed again. So we have an // 80% efficiency (in the worst case). In practice we expect an // 85% to 90% efficiency. } diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index bc18418fbf52cb..05e8985540655a 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-03-30 20:31:03 -0400. Do not edit! */ +/* auto-generated on 2023-04-08 11:21:57 -0400. Do not edit! */ // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H @@ -144,6 +144,8 @@ // POWER processors. Please see https://github.com/lemire/simdutf/issues/51 #elif defined(__s390__) // s390 IBM system. Big endian. +#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 +// RISC-V 64-bit #else // The simdutf library is designed // for 64-bit processors and it seems that you are not @@ -572,7 +574,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "3.2.3" +#define SIMDUTF_VERSION "3.2.7" namespace simdutf { enum { @@ -587,7 +589,7 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 3 + SIMDUTF_VERSION_REVISION = 7 }; } // namespace simdutf