1
- /* auto-generated on 2022-12-13 18:33:40 -0500. Do not edit! */
1
+ /* auto-generated on 2022-12-15 12:13:17 -0500. Do not edit! */
2
2
// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp
3
3
/* begin file src/simdutf.cpp */
4
4
#include "simdutf.h"
@@ -3895,7 +3895,9 @@ SIMDUTF_POP_DISABLE_WARNINGS
3895
3895
3896
3896
#if __GNUC__ == 8
3897
3897
#define SIMDUTF_GCC8 1
3898
- #endif // __GNUC__ == 8
3898
+ #elif __GNUC__ == 9
3899
+ #define SIMDUTF_GCC9 1
3900
+ #endif // __GNUC__ == 8 || __GNUC__ == 9
3899
3901
3900
3902
#endif // defined(__GNUC__) && !defined(__clang__)
3901
3903
@@ -15730,8 +15732,13 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
15730
15732
__mmask64 bxorleading = _kxor_mask64(b, leading);
15731
15733
if (_kshiftli_mask64(m234, 1) != bxorleading) { return false; }
15732
15734
}
15733
- in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii));
15734
-
15735
+ //
15736
+ if (tail == SIMDUTF_FULL) {
15737
+ in += 32;
15738
+ if(int8_t(*in) <= int8_t(0xc0)) in++;
15739
+ } else {
15740
+ in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii));
15741
+ }
15735
15742
__m512i lead = _mm512_maskz_compress_epi8(leading, leading2byte); // will contain zero for ascii, and the data
15736
15743
lead = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(lead)); // ... zero extended into words
15737
15744
__m512i follow = _mm512_maskz_compress_epi8(continuation_or_ascii, input); // the last bytes of each sequence
@@ -15742,8 +15749,9 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
15742
15749
if(big_endian) { final = _mm512_shuffle_epi8(final, byteflip); }
15743
15750
if (tail == SIMDUTF_FULL) {
15744
15751
// Next part is UTF-16 specific and can be generalized to UTF-32.
15745
- _mm512_storeu_si512(out, final);
15746
- out += 32; // UTF-8 to UTF-16 is only expansionary in this case.
15752
+ int nout = _mm_popcnt_u32(uint32_t(leading));
15753
+ _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final);
15754
+ out += nout; // UTF-8 to UTF-16 is only expansionary in this case.
15747
15755
} else {
15748
15756
int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading)));
15749
15757
_mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final);
@@ -15909,12 +15917,12 @@ __m512i prev(__m512i input, __m512i previous) {
15909
15917
static_assert(N<=32, "N must be no larger than 32");
15910
15918
const __m512i movemask = _mm512_setr_epi32(28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11);
15911
15919
const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous);
15912
- #if SIMDUTF_GCC8
15913
- constexpr int shift = 16-N; // workaround for GCC8
15920
+ #if SIMDUTF_GCC8 || SIMDUTF_GCC9
15921
+ constexpr int shift = 16-N; // workaround for GCC8,9
15914
15922
return _mm512_alignr_epi8(input, rotated, shift);
15915
15923
#else
15916
15924
return _mm512_alignr_epi8(input, rotated, 16-N);
15917
- #endif // SIMDUTF_GCC8
15925
+ #endif // SIMDUTF_GCC8 || SIMDUTF_GCC9
15918
15926
}
15919
15927
15920
15928
template <unsigned idx0, unsigned idx1, unsigned idx2, unsigned idx3>
0 commit comments