1- /* auto-generated on 2022-12-13 18:33:40 -0500. Do not edit! */
1+ /* auto-generated on 2022-12-15 12:13:17 -0500. Do not edit! */
22// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp
33/* begin file src/simdutf.cpp */
44#include "simdutf.h"
@@ -3895,7 +3895,9 @@ SIMDUTF_POP_DISABLE_WARNINGS
38953895
38963896#if __GNUC__ == 8
38973897#define SIMDUTF_GCC8 1
3898- #endif // __GNUC__ == 8
3898+ #elif __GNUC__ == 9
3899+ #define SIMDUTF_GCC9 1
3900+ #endif // __GNUC__ == 8 || __GNUC__ == 9
38993901
39003902#endif // defined(__GNUC__) && !defined(__clang__)
39013903
@@ -15730,8 +15732,13 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
1573015732 __mmask64 bxorleading = _kxor_mask64(b, leading);
1573115733 if (_kshiftli_mask64(m234, 1) != bxorleading) { return false; }
1573215734 }
15733- in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii));
15734-
15735+ //
15736+ if (tail == SIMDUTF_FULL) {
15737+ in += 32;
15738+ if(int8_t(*in) <= int8_t(0xc0)) in++;
15739+ } else {
15740+ in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii));
15741+ }
1573515742 __m512i lead = _mm512_maskz_compress_epi8(leading, leading2byte); // will contain zero for ascii, and the data
1573615743 lead = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(lead)); // ... zero extended into words
1573715744 __m512i follow = _mm512_maskz_compress_epi8(continuation_or_ascii, input); // the last bytes of each sequence
@@ -15742,8 +15749,9 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
1574215749 if(big_endian) { final = _mm512_shuffle_epi8(final, byteflip); }
1574315750 if (tail == SIMDUTF_FULL) {
1574415751 // Next part is UTF-16 specific and can be generalized to UTF-32.
15745- _mm512_storeu_si512(out, final);
15746- out += 32; // UTF-8 to UTF-16 is only expansionary in this case.
15752+ int nout = _mm_popcnt_u32(uint32_t(leading));
15753+ _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final);
15754+ out += nout; // UTF-8 to UTF-16 is only expansionary in this case.
1574715755 } else {
1574815756 int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading)));
1574915757 _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final);
@@ -15909,12 +15917,12 @@ __m512i prev(__m512i input, __m512i previous) {
1590915917 static_assert(N<=32, "N must be no larger than 32");
1591015918 const __m512i movemask = _mm512_setr_epi32(28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11);
1591115919 const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous);
15912- #if SIMDUTF_GCC8
15913- constexpr int shift = 16-N; // workaround for GCC8
15920+ #if SIMDUTF_GCC8 || SIMDUTF_GCC9
15921+ constexpr int shift = 16-N; // workaround for GCC8,9
1591415922 return _mm512_alignr_epi8(input, rotated, shift);
1591515923#else
1591615924 return _mm512_alignr_epi8(input, rotated, 16-N);
15917- #endif // SIMDUTF_GCC8
15925+ #endif // SIMDUTF_GCC8 || SIMDUTF_GCC9
1591815926}
1591915927
1592015928template <unsigned idx0, unsigned idx1, unsigned idx2, unsigned idx3>
0 commit comments