1515void * memchr (const void * src , int c , size_t n )
1616{
1717#if defined(__wasm_simd128__ ) && defined(__wasilibc_simd_string )
18+ // Skip Clang 19 and Clang 20 which have a bug (llvm/llvm-project#146574)
19+ // which results in an ICE when inline assembly is used with a vector result.
20+ #if __clang_major__ != 19 && __clang_major__ != 20
1821 // When n is zero, a function that locates a character finds no occurrence.
1922 // Otherwise, decrement n to ensure sub_overflow overflows
2023 // when n would go equal-to-or-below zero.
2124 if (!n -- ) {
2225 return NULL ;
2326 }
2427
25- // memchr must behave as if it reads characters sequentially
26- // and stops as soon as a match is found.
27- // Aligning ensures loads beyond the first match are safe.
28- // Casting through uintptr_t makes this implementation-defined,
29- // rather than undefined behavior.
28+ // Note that reading before/after the allocation of a pointer is UB in
29+ // C, so inline assembly is used to generate the exact machine
30+ // instruction we want with opaque semantics to the compiler to avoid
31+ // the UB.
3032 uintptr_t align = (uintptr_t )src % sizeof (v128_t );
31- const v128_t * v = (v128_t * )(( uintptr_t )src - align ) ;
32- const v128_t vc = wasm_i8x16_splat (c );
33+ uintptr_t addr = (uintptr_t )src - align ;
34+ v128_t vc = wasm_i8x16_splat (c );
3335
3436 for (;;) {
35- const v128_t cmp = wasm_i8x16_eq (* v , vc );
37+ v128_t v ;
38+ __asm__ (
39+ "local.get %1\n"
40+ "v128.load 0\n"
41+ "local.set %0\n"
42+ : "=r" (v )
43+ : "r" (addr )
44+ : "memory" );
45+ v128_t cmp = wasm_i8x16_eq (v , vc );
3646 // Bitmask is slow on AArch64, any_true is much faster.
3747 if (wasm_v128_any_true (cmp )) {
3848 // Clear the bits corresponding to align (little-endian)
@@ -48,16 +58,18 @@ void *memchr(const void *src, int c, size_t n)
4858 // That's a match, unless it is beyond the end of the object.
4959 // Recall that we decremented n, so less-than-or-equal-to is correct.
5060 size_t ctz = __builtin_ctz (mask );
51- return ctz - align <= n ? (char * )v + ctz : NULL ;
61+ return ctz - align <= n ? (char * )src + (addr + ctz - (uintptr_t )src )
62+ : NULL ;
5263 }
5364 }
5465 // Decrement n; if it overflows we're done.
5566 if (__builtin_sub_overflow (n , sizeof (v128_t ) - align , & n )) {
5667 return NULL ;
5768 }
5869 align = 0 ;
59- v ++ ;
70+ addr += sizeof ( v128_t ) ;
6071 }
72+ #endif
6173#endif
6274
6375 const unsigned char * s = src ;
0 commit comments