Closed
Description
https://rust.godbolt.org/z/b4cdaqTcP
pub fn mask8x32_all_v1(m: mask8x32) -> bool {
m.all()
}
pub unsafe fn mask8x32_all_v2(m: mask8x32) -> bool {
let [a, b]: [__m128i; 2] = transmute(m);
_mm_movemask_epi8(_mm_and_si128(a, b)) as i16 == -1
}
-C opt-level=3 --edition 2021 --target x86_64-unknown-linux-gnu -C target-feature=+sse2
example::mask8x32_all_v1:
movdqa xmm0, xmmword ptr [rdi + 16]
pand xmm0, xmmword ptr [rdi]
psllw xmm0, 7
pmovmskb eax, xmm0
cmp ax, -1
sete al
ret
example::mask8x32_all_v2:
movdqa xmm0, xmmword ptr [rdi + 16]
pand xmm0, xmmword ptr [rdi]
pmovmskb eax, xmm0
cmp eax, 65535
sete al
ret
mask8x32_all_v1
generates an extra psllw
instruction, which is unnecessary.
Same bug for wasm32 simd128: https://rust.godbolt.org/z/7r1fKhsM9
Metadata
Metadata
Assignees
Labels
No labels