Skip to content

Commit e399360

Browse files
nodejs-github-bottargos
authored andcommitted
deps: update simdutf to 5.2.6
PR-URL: #52727 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
1 parent 9102255 commit e399360

File tree

2 files changed

+112
-51
lines changed

2 files changed

+112
-51
lines changed

deps/simdutf/simdutf.cpp

+95-39
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1+
/* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
22
/* begin file src/simdutf.cpp */
33
#include "simdutf.h"
44
// We include base64_tables once.
@@ -5999,8 +5999,8 @@ static const implementation* get_single_implementation() {
59995999
*/
60006000
class detect_best_supported_implementation_on_first_use final : public implementation {
60016001
public:
6002-
const std::string &name() const noexcept final { return set_best()->name(); }
6003-
const std::string &description() const noexcept final { return set_best()->description(); }
6002+
std::string name() const noexcept final { return set_best()->name(); }
6003+
std::string description() const noexcept final { return set_best()->description(); }
60046004
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
60056005

60066006
simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
@@ -6333,6 +6333,8 @@ class detect_best_supported_implementation_on_first_use final : public implement
63336333
const implementation *set_best() const noexcept;
63346334
};
63356335

6336+
static_assert(std::is_trivially_destructible<detect_best_supported_implementation_on_first_use>::value, "detect_best_supported_implementation_on_first_use should be trivially destructible");
6337+
63366338
static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
63376339
static const std::initializer_list<const implementation *> available_implementation_pointers {
63386340
#if SIMDUTF_IMPLEMENTATION_ICELAKE
@@ -6695,7 +6697,11 @@ class unsupported_implementation final : public implementation {
66956697
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
66966698
};
66976699

6698-
const unsupported_implementation unsupported_singleton{};
6700+
const unsupported_implementation* get_unsupported_singleton() {
6701+
static const unsupported_implementation unsupported_singleton{};
6702+
return &unsupported_singleton;
6703+
}
6704+
static_assert(std::is_trivially_destructible<unsupported_implementation>::value, "unsupported_singleton should be trivially destructible");
66996705

67006706
size_t available_implementation_list::size() const noexcept {
67016707
return internal::get_available_implementation_pointers().size();
@@ -6713,7 +6719,7 @@ const implementation *available_implementation_list::detect_best_supported() con
67136719
uint32_t required_instruction_sets = impl->required_instruction_sets();
67146720
if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
67156721
}
6716-
return &unsupported_singleton; // this should never happen?
6722+
return get_unsupported_singleton(); // this should never happen?
67176723
}
67186724

67196725
const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
@@ -6728,7 +6734,7 @@ const implementation *detect_best_supported_implementation_on_first_use::set_bes
67286734
return get_active_implementation() = force_implementation;
67296735
} else {
67306736
// Note: abort() and stderr usage within the library is forbidden.
6731-
return get_active_implementation() = &unsupported_singleton;
6737+
return get_active_implementation() = get_unsupported_singleton();
67326738
}
67336739
}
67346740
return get_active_implementation() = get_available_implementations().detect_best_supported();
@@ -6747,8 +6753,8 @@ SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_avail
67476753
}
67486754

67496755
/**
6750-
* The active implementation.
6751-
*/
6756+
* The active implementation.
6757+
*/
67526758
SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
67536759
#if SIMDUTF_SINGLE_IMPLEMENTATION
67546760
// skip runtime detection
@@ -26119,7 +26125,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t*
2611926125
1. an input register contains no surrogates and each value
2612026126
is in range 0x0000 .. 0x07ff.
2612126127
2. an input register contains no surrogates and values are
26122-
is in range 0x0000 .. 0xffff.
26128+
in range 0x0000 .. 0xffff.
2612326129
3. an input register contains surrogates --- i.e. codepoints
2612426130
can have 16 or 32 bits.
2612526131

@@ -32395,6 +32401,8 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
3239532401

3239632402
/* end file src/rvv/rvv_utf8_to.inl.cpp */
3239732403
/* begin file src/rvv/rvv_utf16_to.inl.cpp */
32404+
#include <cstdio>
32405+
3239832406
template<simdutf_ByteFlip bflip>
3239932407
simdutf_really_inline static result rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) {
3240032408
const char16_t *const beg = src;
@@ -32609,47 +32617,95 @@ simdutf_really_inline static result rvv_utf16_to_utf32_with_errors(const char16_
3260932617
const char16_t *const srcBeg = src;
3261032618
char32_t *const dstBeg = dst;
3261132619

32620+
constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800;
32621+
constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800;
32622+
constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00;
32623+
constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00;
32624+
constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00;
32625+
constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800;
32626+
3261232627
uint16_t last = 0;
32613-
for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut, last = simdutf_byteflip<bflip>(src[-1])) {
32614-
vl = __riscv_vsetvl_e16m2(len);
32615-
vuint16m2_t v1 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32616-
v1 = simdutf_byteflip<bflip>(v1, vl);
32617-
vuint16m2_t v0 = __riscv_vslide1up_vx_u16m2(v1, last, vl);
32628+
while (len > 0) {
32629+
size_t vl = __riscv_vsetvl_e16m2(len);
32630+
vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32631+
v0 = simdutf_byteflip<bflip>(v0, vl);
32632+
32633+
{ // check fast-path
32634+
const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl);
32635+
const vbool8_t any_surrogate = __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl);
32636+
if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) {
32637+
/* no surrogates */
32638+
__riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v0, vl), vl);
32639+
len -= vl;
32640+
src += vl;
32641+
dst += vl;
32642+
continue;
32643+
}
32644+
}
3261832645

32619-
vbool8_t surhi0 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, 0xFC00, vl), 0xD800, vl);
32620-
vbool8_t surlo1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xDC00, vl);
32646+
if ((simdutf_byteflip<bflip>(src[0]) & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32647+
return result(error_code::SURROGATE, src - srcBeg);
32648+
}
3262132649

32622-
/* no surrogates */
32623-
if (__riscv_vfirst_m_b8(__riscv_vmor_mm_b8(surhi0, surlo1, vl), vl) < 0) {
32624-
vlOut = vl;
32625-
__riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v1, vl), vl);
32626-
continue;
32650+
// decode surrogates
32651+
vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl);
32652+
vl = __riscv_vsetvl_e16m2(vl - 1);
32653+
if (vl == 0) {
32654+
return result(error_code::SURROGATE, src - srcBeg);
3262732655
}
3262832656

32629-
long idx = __riscv_vfirst_m_b8(__riscv_vmxor_mm_b8(surhi0, surlo1, vl), vl);
32630-
if (idx >= 0) {
32631-
last = idx > 0 ? simdutf_byteflip<bflip>(src[idx-1]) : last;
32632-
return result(error_code::SURROGATE, src - srcBeg + idx - (last - 0xD800u < 0x400u));
32657+
const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, vl);
32658+
const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32659+
32660+
// compress everything but lo surrogates
32661+
const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32662+
32663+
{
32664+
const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl);
32665+
const long idx = __riscv_vfirst_m_b8(diff, vl);
32666+
if (idx >= 0) {
32667+
return result(error_code::SURROGATE, src - srcBeg + idx + 1);
32668+
}
3263332669
}
3263432670

32635-
vbool8_t surhi1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xD800, vl);
32636-
uint16_t next = vl < len ? simdutf_byteflip<bflip>(src[vl]) : 0;
32671+
last = simdutf_byteflip<bflip>(src[vl]);
32672+
vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl);
32673+
32674+
// v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate
32675+
// v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate
32676+
32677+
// t0 = u16( 0000_00yy_yyyy_yyyy)
32678+
const vuint32m4_t t0 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl);
32679+
// t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000)
32680+
const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl);
32681+
32682+
// t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx)
32683+
const vuint32m4_t t2 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl);
32684+
32685+
// t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx)
32686+
const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl);
32687+
32688+
// t4 = utf32 from surrogate pairs
32689+
const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl);
3263732690

32638-
vuint32m4_t wide = __riscv_vzext_vf2_u32m4(v1, vl);
32639-
vuint32m4_t slided = __riscv_vslide1down_vx_u32m4(wide, next, vl);
32640-
vuint32m4_t aligned = __riscv_vsll_vx_u32m4_mu(surhi1, wide, wide, 10, vl);
32641-
vuint32m4_t added = __riscv_vadd_vv_u32m4_mu(surhi1, aligned, aligned, slided, vl);
32642-
vuint32m4_t utf32 = __riscv_vadd_vx_u32m4_mu(surhi1, added, added, 0xFCA02400, vl);
32643-
vbool8_t m = __riscv_vmnot_m_b8(surlo1, vl);
32644-
vlOut = __riscv_vcpop_m_b8(m, vl);
32645-
vuint32m4_t comp = __riscv_vcompress_vm_u32m4(utf32, m, vl);
32691+
const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl);
32692+
32693+
const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl);
32694+
const size_t vlOut = __riscv_vcpop_m_b8(compress, vl);
3264632695
__riscv_vse32_v_u32m4((uint32_t*)dst, comp, vlOut);
32696+
32697+
len -= vl;
32698+
src += vl;
32699+
dst += vlOut;
32700+
32701+
if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32702+
// last item is lo surrogate and got already consumed
32703+
len -= 1;
32704+
src += 1;
32705+
}
3264732706
}
3264832707

32649-
if (last - 0xD800u < 0x400u)
32650-
return result(error_code::SURROGATE, src - srcBeg - 1); /* end on high surrogate */
32651-
else
32652-
return result(error_code::SUCCESS, dst - dstBeg);
32708+
return result(error_code::SUCCESS, dst - dstBeg);
3265332709
}
3265432710

3265532711
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t *src, size_t len, char32_t *dst) const noexcept {

deps/simdutf/simdutf.h

+17-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1+
/* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -149,7 +149,7 @@
149149
#define SIMDUTF_HAS_RVV_TARGET_REGION 1
150150
#endif
151151

152-
#if __riscv_v_intrinsic >= 11000 && !(__GNUC__ == 13 && __GNUC_MINOR__ == 2 && __GNUC_PATCHLEVEL__ == 0)
152+
#if __riscv_v_intrinsic >= 11000
153153
#define SIMDUTF_HAS_RVV_INTRINSICS 1
154154
#endif
155155

@@ -594,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
594594
#define SIMDUTF_SIMDUTF_VERSION_H
595595

596596
/** The version of simdutf being used (major.minor.revision) */
597-
#define SIMDUTF_VERSION "5.2.4"
597+
#define SIMDUTF_VERSION "5.2.6"
598598

599599
namespace simdutf {
600600
enum {
@@ -609,7 +609,7 @@ enum {
609609
/**
610610
* The revision (major.minor.REVISION) of simdutf being used.
611611
*/
612-
SIMDUTF_VERSION_REVISION = 4
612+
SIMDUTF_VERSION_REVISION = 6
613613
};
614614
} // namespace simdutf
615615

@@ -717,6 +717,7 @@ static inline uint32_t detect_supported_architectures() {
717717
#elif SIMDUTF_IS_RISCV64
718718

719719
#if defined(__linux__)
720+
720721
#include <unistd.h>
721722
// We define these our selfs, for backwards compatibility
722723
struct simdutf_riscv_hwprobe { int64_t key; uint64_t value; };
@@ -744,6 +745,10 @@ static inline uint32_t detect_supported_architectures() {
744745
if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
745746
host_isa |= instruction_set::ZVBB;
746747
}
748+
#endif
749+
#if defined(RUN_IN_SPIKE_SIMULATOR)
750+
// Proxy Kernel does not implement yet hwprobe syscall
751+
host_isa |= instruction_set::RVV;
747752
#endif
748753
return host_isa;
749754
}
@@ -2454,7 +2459,7 @@ class implementation {
24542459
*
24552460
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
24562461
*/
2457-
virtual const std::string &name() const { return _name; }
2462+
virtual std::string name() const { return std::string(_name); }
24582463

24592464
/**
24602465
* The description of this implementation.
@@ -2464,7 +2469,7 @@ class implementation {
24642469
*
24652470
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
24662471
*/
2467-
virtual const std::string &description() const { return _description; }
2472+
virtual std::string description() const { return std::string(_description); }
24682473

24692474
/**
24702475
* The instruction sets this implementation is compiled against
@@ -3602,27 +3607,27 @@ class implementation {
36023607
protected:
36033608
/** @private Construct an implementation with the given name and description. For subclasses. */
36043609
simdutf_really_inline implementation(
3605-
std::string name,
3606-
std::string description,
3610+
const char* name,
3611+
const char* description,
36073612
uint32_t required_instruction_sets
36083613
) :
36093614
_name(name),
36103615
_description(description),
36113616
_required_instruction_sets(required_instruction_sets)
36123617
{
36133618
}
3614-
virtual ~implementation()=default;
3615-
3619+
protected:
3620+
~implementation() = default;
36163621
private:
36173622
/**
36183623
* The name of this implementation.
36193624
*/
3620-
const std::string _name;
3625+
const char* _name;
36213626

36223627
/**
36233628
* The description of this implementation.
36243629
*/
3625-
const std::string _description;
3630+
const char* _description;
36263631

36273632
/**
36283633
* Instruction sets required for this implementation.

0 commit comments

Comments
 (0)