1- /* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1+ /* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
22/* begin file src/simdutf.cpp */
33#include "simdutf.h"
44// We include base64_tables once.
@@ -5999,8 +5999,8 @@ static const implementation* get_single_implementation() {
59995999 */
60006000class detect_best_supported_implementation_on_first_use final : public implementation {
60016001public:
6002- const std::string & name() const noexcept final { return set_best()->name(); }
6003- const std::string & description() const noexcept final { return set_best()->description(); }
6002+ std::string name() const noexcept final { return set_best()->name(); }
6003+ std::string description() const noexcept final { return set_best()->description(); }
60046004 uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
60056005
60066006 simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
@@ -6333,6 +6333,8 @@ class detect_best_supported_implementation_on_first_use final : public implement
63336333 const implementation *set_best() const noexcept;
63346334};
63356335
6336+ static_assert(std::is_trivially_destructible<detect_best_supported_implementation_on_first_use>::value, "detect_best_supported_implementation_on_first_use should be trivially destructible");
6337+
63366338static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
63376339 static const std::initializer_list<const implementation *> available_implementation_pointers {
63386340#if SIMDUTF_IMPLEMENTATION_ICELAKE
@@ -6695,7 +6697,11 @@ class unsupported_implementation final : public implementation {
66956697 unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
66966698};
66976699
6698- const unsupported_implementation unsupported_singleton{};
6700+ const unsupported_implementation* get_unsupported_singleton() {
6701+ static const unsupported_implementation unsupported_singleton{};
6702+ return &unsupported_singleton;
6703+ }
6704+ static_assert(std::is_trivially_destructible<unsupported_implementation>::value, "unsupported_singleton should be trivially destructible");
66996705
67006706size_t available_implementation_list::size() const noexcept {
67016707 return internal::get_available_implementation_pointers().size();
@@ -6713,7 +6719,7 @@ const implementation *available_implementation_list::detect_best_supported() con
67136719 uint32_t required_instruction_sets = impl->required_instruction_sets();
67146720 if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
67156721 }
6716- return &unsupported_singleton ; // this should never happen?
6722+ return get_unsupported_singleton() ; // this should never happen?
67176723}
67186724
67196725const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
@@ -6728,7 +6734,7 @@ const implementation *detect_best_supported_implementation_on_first_use::set_bes
67286734 return get_active_implementation() = force_implementation;
67296735 } else {
67306736 // Note: abort() and stderr usage within the library is forbidden.
6731- return get_active_implementation() = &unsupported_singleton ;
6737+ return get_active_implementation() = get_unsupported_singleton() ;
67326738 }
67336739 }
67346740 return get_active_implementation() = get_available_implementations().detect_best_supported();
@@ -6747,8 +6753,8 @@ SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_avail
67476753}
67486754
67496755/**
6750- * The active implementation.
6751- */
6756+ * The active implementation.
6757+ */
67526758SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
67536759#if SIMDUTF_SINGLE_IMPLEMENTATION
67546760 // skip runtime detection
@@ -26119,7 +26125,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t*
2611926125 1. an input register contains no surrogates and each value
2612026126 is in range 0x0000 .. 0x07ff.
2612126127 2. an input register contains no surrogates and values are
26122- is in range 0x0000 .. 0xffff.
26128+ in range 0x0000 .. 0xffff.
2612326129 3. an input register contains surrogates --- i.e. codepoints
2612426130 can have 16 or 32 bits.
2612526131
@@ -32395,6 +32401,8 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
3239532401
3239632402/* end file src/rvv/rvv_utf8_to.inl.cpp */
3239732403/* begin file src/rvv/rvv_utf16_to.inl.cpp */
32404+ #include <cstdio>
32405+
3239832406template<simdutf_ByteFlip bflip>
3239932407simdutf_really_inline static result rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) {
3240032408 const char16_t *const beg = src;
@@ -32609,47 +32617,95 @@ simdutf_really_inline static result rvv_utf16_to_utf32_with_errors(const char16_
3260932617 const char16_t *const srcBeg = src;
3261032618 char32_t *const dstBeg = dst;
3261132619
32620+ constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800;
32621+ constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800;
32622+ constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00;
32623+ constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00;
32624+ constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00;
32625+ constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800;
32626+
3261232627 uint16_t last = 0;
32613- for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut, last = simdutf_byteflip<bflip>(src[-1])) {
32614- vl = __riscv_vsetvl_e16m2(len);
32615- vuint16m2_t v1 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32616- v1 = simdutf_byteflip<bflip>(v1, vl);
32617- vuint16m2_t v0 = __riscv_vslide1up_vx_u16m2(v1, last, vl);
32628+ while (len > 0) {
32629+ size_t vl = __riscv_vsetvl_e16m2(len);
32630+ vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32631+ v0 = simdutf_byteflip<bflip>(v0, vl);
32632+
32633+ { // check fast-path
32634+ const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl);
32635+ const vbool8_t any_surrogate = __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl);
32636+ if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) {
32637+ /* no surrogates */
32638+ __riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v0, vl), vl);
32639+ len -= vl;
32640+ src += vl;
32641+ dst += vl;
32642+ continue;
32643+ }
32644+ }
3261832645
32619- vbool8_t surhi0 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, 0xFC00, vl), 0xD800, vl);
32620- vbool8_t surlo1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xDC00, vl);
32646+ if ((simdutf_byteflip<bflip>(src[0]) & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32647+ return result(error_code::SURROGATE, src - srcBeg);
32648+ }
3262132649
32622- /* no surrogates */
32623- if (__riscv_vfirst_m_b8(__riscv_vmor_mm_b8(surhi0, surlo1 , vl), vl) < 0) {
32624- vlOut = vl ;
32625- __riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v1, vl), vl);
32626- continue ;
32650+ // decode surrogates
32651+ vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0 , vl);
32652+ vl = __riscv_vsetvl_e16m2(vl - 1) ;
32653+ if (vl == 0) {
32654+ return result(error_code::SURROGATE, src - srcBeg) ;
3262732655 }
3262832656
32629- long idx = __riscv_vfirst_m_b8(__riscv_vmxor_mm_b8(surhi0, surlo1, vl), vl);
32630- if (idx >= 0) {
32631- last = idx > 0 ? simdutf_byteflip<bflip>(src[idx-1]) : last;
32632- return result(error_code::SURROGATE, src - srcBeg + idx - (last - 0xD800u < 0x400u));
32657+ const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, vl);
32658+ const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32659+
32660+ // compress everything but lo surrogates
32661+ const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32662+
32663+ {
32664+ const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl);
32665+ const long idx = __riscv_vfirst_m_b8(diff, vl);
32666+ if (idx >= 0) {
32667+ return result(error_code::SURROGATE, src - srcBeg + idx + 1);
32668+ }
3263332669 }
3263432670
32635- vbool8_t surhi1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xD800, vl);
32636- uint16_t next = vl < len ? simdutf_byteflip<bflip>(src[vl]) : 0;
32671+ last = simdutf_byteflip<bflip>(src[vl]);
32672+ vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl);
32673+
32674+ // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate
32675+ // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate
32676+
32677+ // t0 = u16( 0000_00yy_yyyy_yyyy)
32678+ const vuint32m4_t t0 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl);
32679+ // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000)
32680+ const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl);
32681+
32682+ // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx)
32683+ const vuint32m4_t t2 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl);
32684+
32685+ // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx)
32686+ const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl);
32687+
32688+ // t4 = utf32 from surrogate pairs
32689+ const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl);
3263732690
32638- vuint32m4_t wide = __riscv_vzext_vf2_u32m4(v1, vl);
32639- vuint32m4_t slided = __riscv_vslide1down_vx_u32m4(wide, next, vl);
32640- vuint32m4_t aligned = __riscv_vsll_vx_u32m4_mu(surhi1, wide, wide, 10, vl);
32641- vuint32m4_t added = __riscv_vadd_vv_u32m4_mu(surhi1, aligned, aligned, slided, vl);
32642- vuint32m4_t utf32 = __riscv_vadd_vx_u32m4_mu(surhi1, added, added, 0xFCA02400, vl);
32643- vbool8_t m = __riscv_vmnot_m_b8(surlo1, vl);
32644- vlOut = __riscv_vcpop_m_b8(m, vl);
32645- vuint32m4_t comp = __riscv_vcompress_vm_u32m4(utf32, m, vl);
32691+ const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl);
32692+
32693+ const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl);
32694+ const size_t vlOut = __riscv_vcpop_m_b8(compress, vl);
3264632695 __riscv_vse32_v_u32m4((uint32_t*)dst, comp, vlOut);
32696+
32697+ len -= vl;
32698+ src += vl;
32699+ dst += vlOut;
32700+
32701+ if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32702+ // last item is lo surrogate and got already consumed
32703+ len -= 1;
32704+ src += 1;
32705+ }
3264732706 }
3264832707
32649- if (last - 0xD800u < 0x400u)
32650- return result(error_code::SURROGATE, src - srcBeg - 1); /* end on high surrogate */
32651- else
32652- return result(error_code::SUCCESS, dst - dstBeg);
32708+ return result(error_code::SUCCESS, dst - dstBeg);
3265332709}
3265432710
3265532711simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t *src, size_t len, char32_t *dst) const noexcept {
0 commit comments