|
1 | | -/* auto-generated on 2024-11-14 14:52:31 -0500. Do not edit! */ |
| 1 | +/* auto-generated on 2024-11-21 10:33:28 -0500. Do not edit! */ |
2 | 2 | /* begin file src/simdutf.cpp */ |
3 | 3 | #include "simdutf.h" |
4 | 4 | // We include base64_tables once. |
@@ -23495,7 +23495,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen, |
23495 | 23495 | } |
23496 | 23496 |
|
23497 | 23497 | template <bool base64_url> |
23498 | | -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
| 23498 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
23499 | 23499 | __m512i input = b->chunks[0]; |
23500 | 23500 | const __m512i ascii_space_tbl = _mm512_set_epi8( |
23501 | 23501 | 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, |
@@ -23538,7 +23538,7 @@ static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
23538 | 23538 | if (mask) { |
23539 | 23539 | const __mmask64 spaces = _mm512_cmpeq_epi8_mask( |
23540 | 23540 | _mm512_shuffle_epi8(ascii_space_tbl, input), input); |
23541 | | - *error |= (mask != spaces); |
| 23541 | + *error = (mask ^ spaces); |
23542 | 23542 | } |
23543 | 23543 | b->chunks[0] = translated; |
23544 | 23544 |
|
@@ -23646,16 +23646,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, |
23646 | 23646 | block64 b; |
23647 | 23647 | load_block(&b, src); |
23648 | 23648 | src += 64; |
23649 | | - bool error = false; |
| 23649 | + uint64_t error = 0; |
23650 | 23650 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error); |
23651 | 23651 | if (error) { |
23652 | 23652 | src -= 64; |
23653 | | - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
23654 | | - to_base64[uint8_t(*src)] <= 64) { |
23655 | | - src++; |
23656 | | - } |
23657 | | - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
23658 | | - size_t(dst - dstinit)}; |
| 23653 | + size_t error_offset = _tzcnt_u64(error); |
| 23654 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 23655 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
23659 | 23656 | } |
23660 | 23657 | if (badcharmask != 0) { |
23661 | 23658 | // optimization opportunity: check for simple masks like those made of |
@@ -28240,7 +28237,7 @@ struct block64 { |
28240 | 28237 | }; |
28241 | 28238 |
|
28242 | 28239 | template <bool base64_url> |
28243 | | -static inline uint32_t to_base64_mask(__m256i *src, bool *error) { |
| 28240 | +static inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { |
28244 | 28241 | const __m256i ascii_space_tbl = |
28245 | 28242 | _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, |
28246 | 28243 | 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, |
@@ -28324,17 +28321,19 @@ static inline uint32_t to_base64_mask(__m256i *src, bool *error) { |
28324 | 28321 | if (mask) { |
28325 | 28322 | __m256i ascii_space = |
28326 | 28323 | _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); |
28327 | | - *error |= (mask != _mm256_movemask_epi8(ascii_space)); |
| 28324 | + *error = (mask ^ _mm256_movemask_epi8(ascii_space)); |
28328 | 28325 | } |
28329 | 28326 | *src = out; |
28330 | 28327 | return (uint32_t)mask; |
28331 | 28328 | } |
28332 | 28329 |
|
28333 | 28330 | template <bool base64_url> |
28334 | | -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
28335 | | - *error = 0; |
28336 | | - uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], error); |
28337 | | - uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], error); |
| 28331 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
| 28332 | + uint32_t err0 = 0; |
| 28333 | + uint32_t err1 = 0; |
| 28334 | + uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], &err0); |
| 28335 | + uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], &err1); |
| 28336 | + *error = err0 | ((uint64_t)err1 << 32); |
28338 | 28337 | return m0 | (m1 << 32); |
28339 | 28338 | } |
28340 | 28339 |
|
@@ -28466,16 +28465,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, |
28466 | 28465 | block64 b; |
28467 | 28466 | load_block(&b, src); |
28468 | 28467 | src += 64; |
28469 | | - bool error = false; |
| 28468 | + uint64_t error = 0; |
28470 | 28469 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error); |
28471 | 28470 | if (error) { |
28472 | 28471 | src -= 64; |
28473 | | - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
28474 | | - to_base64[uint8_t(*src)] <= 64) { |
28475 | | - src++; |
28476 | | - } |
28477 | | - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
28478 | | - size_t(dst - dstinit)}; |
| 28472 | + size_t error_offset = _tzcnt_u64(error); |
| 28473 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 28474 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
28479 | 28475 | } |
28480 | 28476 | if (badcharmask != 0) { |
28481 | 28477 | // optimization opportunity: check for simple masks like those made of |
@@ -37992,7 +37988,7 @@ struct block64 { |
37992 | 37988 | }; |
37993 | 37989 |
|
37994 | 37990 | template <bool base64_url> |
37995 | | -static inline uint16_t to_base64_mask(__m128i *src, bool *error) { |
| 37991 | +static inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { |
37996 | 37992 | const __m128i ascii_space_tbl = |
37997 | 37993 | _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, |
37998 | 37994 | 0xc, 0xd, 0x0, 0x0); |
@@ -38059,22 +38055,42 @@ static inline uint16_t to_base64_mask(__m128i *src, bool *error) { |
38059 | 38055 | if (mask) { |
38060 | 38056 | __m128i ascii_space = |
38061 | 38057 | _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); |
38062 | | - *error |= (mask != _mm_movemask_epi8(ascii_space)); |
| 38058 | + *error = (mask ^ _mm_movemask_epi8(ascii_space)); |
38063 | 38059 | } |
38064 | 38060 | *src = out; |
38065 | 38061 | return (uint16_t)mask; |
38066 | 38062 | } |
38067 | 38063 |
|
38068 | 38064 | template <bool base64_url> |
38069 | | -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
38070 | | - *error = 0; |
38071 | | - uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], error); |
38072 | | - uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], error); |
38073 | | - uint64_t m2 = to_base64_mask<base64_url>(&b->chunks[2], error); |
38074 | | - uint64_t m3 = to_base64_mask<base64_url>(&b->chunks[3], error); |
| 38065 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
| 38066 | + uint32_t err0 = 0; |
| 38067 | + uint32_t err1 = 0; |
| 38068 | + uint32_t err2 = 0; |
| 38069 | + uint32_t err3 = 0; |
| 38070 | + uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], &err0); |
| 38071 | + uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], &err1); |
| 38072 | + uint64_t m2 = to_base64_mask<base64_url>(&b->chunks[2], &err2); |
| 38073 | + uint64_t m3 = to_base64_mask<base64_url>(&b->chunks[3], &err3); |
| 38074 | + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | |
| 38075 | + ((uint64_t)err3 << 48); |
38075 | 38076 | return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); |
38076 | 38077 | } |
38077 | 38078 |
|
| 38079 | +#if defined(_MSC_VER) && !defined(__clang__) |
| 38080 | +static inline size_t simdutf_tzcnt_u64(uint64_t num) { |
| 38081 | + unsigned long ret; |
| 38082 | + if (num == 0) { |
| 38083 | + return 64; |
| 38084 | + } |
| 38085 | + _BitScanForward64(&ret, num); |
| 38086 | + return ret; |
| 38087 | +} |
| 38088 | +#else // GCC or Clang |
| 38089 | +static inline size_t simdutf_tzcnt_u64(uint64_t num) { |
| 38090 | + return num ? __builtin_ctzll(num) : 64; |
| 38091 | +} |
| 38092 | +#endif |
| 38093 | + |
38078 | 38094 | static inline void copy_block(block64 *b, char *output) { |
38079 | 38095 | _mm_storeu_si128(reinterpret_cast<__m128i *>(output), b->chunks[0]); |
38080 | 38096 | _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), b->chunks[1]); |
@@ -38222,16 +38238,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen, |
38222 | 38238 | block64 b; |
38223 | 38239 | load_block(&b, src); |
38224 | 38240 | src += 64; |
38225 | | - bool error = false; |
| 38241 | + uint64_t error = 0; |
38226 | 38242 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error); |
38227 | 38243 | if (error) { |
38228 | 38244 | src -= 64; |
38229 | | - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
38230 | | - to_base64[uint8_t(*src)] <= 64) { |
38231 | | - src++; |
38232 | | - } |
38233 | | - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
38234 | | - size_t(dst - dstinit)}; |
| 38245 | + size_t error_offset = simdutf_tzcnt_u64(error); |
| 38246 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 38247 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
38235 | 38248 | } |
38236 | 38249 | if (badcharmask != 0) { |
38237 | 38250 | // optimization opportunity: check for simple masks like those made of |
|
0 commit comments