1
- /* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1
+ /* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
2
2
/* begin file src/simdutf.cpp */
3
3
#include "simdutf.h"
4
4
// We include base64_tables once.
@@ -5999,8 +5999,8 @@ static const implementation* get_single_implementation() {
5999
5999
*/
6000
6000
class detect_best_supported_implementation_on_first_use final : public implementation {
6001
6001
public:
6002
- const std::string & name() const noexcept final { return set_best()->name(); }
6003
- const std::string & description() const noexcept final { return set_best()->description(); }
6002
+ std::string name() const noexcept final { return set_best()->name(); }
6003
+ std::string description() const noexcept final { return set_best()->description(); }
6004
6004
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
6005
6005
6006
6006
simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
@@ -6333,6 +6333,8 @@ class detect_best_supported_implementation_on_first_use final : public implement
6333
6333
const implementation *set_best() const noexcept;
6334
6334
};
6335
6335
6336
+ static_assert(std::is_trivially_destructible<detect_best_supported_implementation_on_first_use>::value, "detect_best_supported_implementation_on_first_use should be trivially destructible");
6337
+
6336
6338
static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
6337
6339
static const std::initializer_list<const implementation *> available_implementation_pointers {
6338
6340
#if SIMDUTF_IMPLEMENTATION_ICELAKE
@@ -6695,7 +6697,11 @@ class unsupported_implementation final : public implementation {
6695
6697
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
6696
6698
};
6697
6699
6698
- const unsupported_implementation unsupported_singleton{};
6700
+ const unsupported_implementation* get_unsupported_singleton() {
6701
+ static const unsupported_implementation unsupported_singleton{};
6702
+ return &unsupported_singleton;
6703
+ }
6704
+ static_assert(std::is_trivially_destructible<unsupported_implementation>::value, "unsupported_singleton should be trivially destructible");
6699
6705
6700
6706
size_t available_implementation_list::size() const noexcept {
6701
6707
return internal::get_available_implementation_pointers().size();
@@ -6713,7 +6719,7 @@ const implementation *available_implementation_list::detect_best_supported() con
6713
6719
uint32_t required_instruction_sets = impl->required_instruction_sets();
6714
6720
if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
6715
6721
}
6716
- return &unsupported_singleton ; // this should never happen?
6722
+ return get_unsupported_singleton() ; // this should never happen?
6717
6723
}
6718
6724
6719
6725
const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
@@ -6728,7 +6734,7 @@ const implementation *detect_best_supported_implementation_on_first_use::set_bes
6728
6734
return get_active_implementation() = force_implementation;
6729
6735
} else {
6730
6736
// Note: abort() and stderr usage within the library is forbidden.
6731
- return get_active_implementation() = &unsupported_singleton ;
6737
+ return get_active_implementation() = get_unsupported_singleton() ;
6732
6738
}
6733
6739
}
6734
6740
return get_active_implementation() = get_available_implementations().detect_best_supported();
@@ -6747,8 +6753,8 @@ SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_avail
6747
6753
}
6748
6754
6749
6755
/**
6750
- * The active implementation.
6751
- */
6756
+ * The active implementation.
6757
+ */
6752
6758
SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
6753
6759
#if SIMDUTF_SINGLE_IMPLEMENTATION
6754
6760
// skip runtime detection
@@ -26119,7 +26125,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t*
26119
26125
1. an input register contains no surrogates and each value
26120
26126
is in range 0x0000 .. 0x07ff.
26121
26127
2. an input register contains no surrogates and values are
26122
- is in range 0x0000 .. 0xffff.
26128
+ in range 0x0000 .. 0xffff.
26123
26129
3. an input register contains surrogates --- i.e. codepoints
26124
26130
can have 16 or 32 bits.
26125
26131
@@ -32395,6 +32401,8 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
32395
32401
32396
32402
/* end file src/rvv/rvv_utf8_to.inl.cpp */
32397
32403
/* begin file src/rvv/rvv_utf16_to.inl.cpp */
32404
+ #include <cstdio>
32405
+
32398
32406
template<simdutf_ByteFlip bflip>
32399
32407
simdutf_really_inline static result rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) {
32400
32408
const char16_t *const beg = src;
@@ -32609,47 +32617,95 @@ simdutf_really_inline static result rvv_utf16_to_utf32_with_errors(const char16_
32609
32617
const char16_t *const srcBeg = src;
32610
32618
char32_t *const dstBeg = dst;
32611
32619
32620
+ constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800;
32621
+ constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800;
32622
+ constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00;
32623
+ constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00;
32624
+ constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00;
32625
+ constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800;
32626
+
32612
32627
uint16_t last = 0;
32613
- for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut, last = simdutf_byteflip<bflip>(src[-1])) {
32614
- vl = __riscv_vsetvl_e16m2(len);
32615
- vuint16m2_t v1 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32616
- v1 = simdutf_byteflip<bflip>(v1, vl);
32617
- vuint16m2_t v0 = __riscv_vslide1up_vx_u16m2(v1, last, vl);
32628
+ while (len > 0) {
32629
+ size_t vl = __riscv_vsetvl_e16m2(len);
32630
+ vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32631
+ v0 = simdutf_byteflip<bflip>(v0, vl);
32632
+
32633
+ { // check fast-path
32634
+ const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl);
32635
+ const vbool8_t any_surrogate = __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl);
32636
+ if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) {
32637
+ /* no surrogates */
32638
+ __riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v0, vl), vl);
32639
+ len -= vl;
32640
+ src += vl;
32641
+ dst += vl;
32642
+ continue;
32643
+ }
32644
+ }
32618
32645
32619
- vbool8_t surhi0 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, 0xFC00, vl), 0xD800, vl);
32620
- vbool8_t surlo1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xDC00, vl);
32646
+ if ((simdutf_byteflip<bflip>(src[0]) & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32647
+ return result(error_code::SURROGATE, src - srcBeg);
32648
+ }
32621
32649
32622
- /* no surrogates */
32623
- if (__riscv_vfirst_m_b8(__riscv_vmor_mm_b8(surhi0, surlo1 , vl), vl) < 0) {
32624
- vlOut = vl ;
32625
- __riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v1, vl), vl);
32626
- continue ;
32650
+ // decode surrogates
32651
+ vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0 , vl);
32652
+ vl = __riscv_vsetvl_e16m2(vl - 1) ;
32653
+ if (vl == 0) {
32654
+ return result(error_code::SURROGATE, src - srcBeg) ;
32627
32655
}
32628
32656
32629
- long idx = __riscv_vfirst_m_b8(__riscv_vmxor_mm_b8(surhi0, surlo1, vl), vl);
32630
- if (idx >= 0) {
32631
- last = idx > 0 ? simdutf_byteflip<bflip>(src[idx-1]) : last;
32632
- return result(error_code::SURROGATE, src - srcBeg + idx - (last - 0xD800u < 0x400u));
32657
+ const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, vl);
32658
+ const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32659
+
32660
+ // compress everything but lo surrogates
32661
+ const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32662
+
32663
+ {
32664
+ const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl);
32665
+ const long idx = __riscv_vfirst_m_b8(diff, vl);
32666
+ if (idx >= 0) {
32667
+ return result(error_code::SURROGATE, src - srcBeg + idx + 1);
32668
+ }
32633
32669
}
32634
32670
32635
- vbool8_t surhi1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xD800, vl);
32636
- uint16_t next = vl < len ? simdutf_byteflip<bflip>(src[vl]) : 0;
32671
+ last = simdutf_byteflip<bflip>(src[vl]);
32672
+ vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl);
32673
+
32674
+ // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate
32675
+ // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate
32676
+
32677
+ // t0 = u16( 0000_00yy_yyyy_yyyy)
32678
+ const vuint32m4_t t0 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl);
32679
+ // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000)
32680
+ const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl);
32681
+
32682
+ // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx)
32683
+ const vuint32m4_t t2 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl);
32684
+
32685
+ // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx)
32686
+ const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl);
32687
+
32688
+ // t4 = utf32 from surrogate pairs
32689
+ const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl);
32637
32690
32638
- vuint32m4_t wide = __riscv_vzext_vf2_u32m4(v1, vl);
32639
- vuint32m4_t slided = __riscv_vslide1down_vx_u32m4(wide, next, vl);
32640
- vuint32m4_t aligned = __riscv_vsll_vx_u32m4_mu(surhi1, wide, wide, 10, vl);
32641
- vuint32m4_t added = __riscv_vadd_vv_u32m4_mu(surhi1, aligned, aligned, slided, vl);
32642
- vuint32m4_t utf32 = __riscv_vadd_vx_u32m4_mu(surhi1, added, added, 0xFCA02400, vl);
32643
- vbool8_t m = __riscv_vmnot_m_b8(surlo1, vl);
32644
- vlOut = __riscv_vcpop_m_b8(m, vl);
32645
- vuint32m4_t comp = __riscv_vcompress_vm_u32m4(utf32, m, vl);
32691
+ const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl);
32692
+
32693
+ const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl);
32694
+ const size_t vlOut = __riscv_vcpop_m_b8(compress, vl);
32646
32695
__riscv_vse32_v_u32m4((uint32_t*)dst, comp, vlOut);
32696
+
32697
+ len -= vl;
32698
+ src += vl;
32699
+ dst += vlOut;
32700
+
32701
+ if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32702
+ // last item is lo surrogate and got already consumed
32703
+ len -= 1;
32704
+ src += 1;
32705
+ }
32647
32706
}
32648
32707
32649
- if (last - 0xD800u < 0x400u)
32650
- return result(error_code::SURROGATE, src - srcBeg - 1); /* end on high surrogate */
32651
- else
32652
- return result(error_code::SUCCESS, dst - dstBeg);
32708
+ return result(error_code::SUCCESS, dst - dstBeg);
32653
32709
}
32654
32710
32655
32711
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t *src, size_t len, char32_t *dst) const noexcept {
0 commit comments