Description
clang: https://godbolt.org/z/xW5s7s35q
gcc: https://godbolt.org/z/P5Wv7Y4zh
code
#include <arm_neon.h>
extern void abort (void);
int test_vst2_lane_u8 (const uint8_t *data) {
uint8x8x2_t vectors;
for (int i = 0; i < 2; i++, data += 8) {
vectors.val[i] = vld1_u8 (data);
}
uint8_t temp[2];
vst2_lane_u8 (temp, vectors, 6);
// printf("temp[0]: %d\n", temp[0]);
// printf("temp[1]: %d\n", temp[1]);
//printf("vectors.val[0][6]: %d\n", vget_lane_u8(vectors.val[0], 6));
//printf("vectors.val[1][6]: %d\n", vget_lane_u8(vectors.val[1], 6));
for (int i = 0; i < 2; i++) {
if (temp[i] != vget_lane_u8 (vectors.val[i], 6)) /* error */
return 1;
}
return 0;
}
int main (int argc, char **argv)
{
uint64_t orig_data[8] = {
0x1234567890abcdefULL, 0x13579bdf02468aceULL,
};
if (test_vst2_lane_u8 ((const uint8_t *)orig_data))
abort ();;
return 0;
}
I see the asm
test_vst2_lane_u8: // @test_vst2_lane_u8
sub sp, sp, #16
ldp d0, d1, [x0]
add x8, sp, #12
ldrb w11, [sp, #13] // temp[1]
st2 { v0.b, v1.b }[6], [x8] // vst2_lane_u8 (temp, vectors, 6);
umov w8, v0.b[6]
ldrb w9, [sp, #12] // temp[0]
umov w10, v1.b[6]
cmp w9, w8, uxtb
ccmp w11, w10, #0, eq
cset w0, ne
add sp, sp, #16
ret
The error is caused because the temp[1]
is loaded before st2
, so the compare fail.
I switch it like :
st2 { v0.b, v1.b }[6], [x8] // vst2_lane_u8 (temp, vectors, 6);
umov w8, v0.b[6]
ldrb w11, [sp, #13] // temp[1]
ldrb w9, [sp, #12] // temp[0]
and recompile it, it's OK.
Anyone have idea to fix it?
Metadata
Metadata
Assignees
Type
Projects
Status
Done