Open
Description
test: https://godbolt.org/z/7Y6a4EvW9
void s1161(void)
{
for (int i = 0; i < LEN_1D-1; ++i) {
if (c[i] < (real_t)0.) {
goto L20;
}
a[i] = c[i] + d[i] * e[i];
goto L10;
L20:
b[i] = a[i] + d[i] * d[i];
L10:
;
}
}
- gcc: generate a sve loop
.L2:
lsl x1, x0, 3
ld1d z31.d, p7/z, [x9, x0, lsl 3]
ld1d z29.d, p7/z, [x8, x0, lsl 3]
add x2, x7, x1
fcmlt p6.d, p7/z, z31.d, #0.0
ld1d z30.d, p7/z, [x2]
not p6.b, p7/z, p6.b
add x4, x5, x1
add x1, x6, x1
ld1d z28.d, p7/z, [x4]
fcmlt p7.d, p7/z, z31.d, #0.0
fmla z31.d, p6/m, z29.d, z28.d
fmla z30.d, p7/m, z29.d, z29.d
st1d z31.d, p6, [x2]
st1d z30.d, p7, [x1]
add x0, x0, x10
whilelo p7.d, w0, w3
b.any .L2
- llvm: failed with vectorization
.LBB0_1: // in Loop: Header=BB0_2 Depth=1
ldr d0, [x11, x8]
mov x15, x13
ldr d1, [x12, x8]
fmadd d0, d1, d1, d0
str d0, [x13, x8]
add x8, x8, #8
cmp x8, x9
b.eq .LBB0_4
.LBB0_2: // =>This Inner Loop Header: Depth=1
ldr d0, [x10, x8]
fcmp d0, #0.0
b.mi .LBB0_1
// %bb.3: // in Loop: Header=BB0_2 Depth=1
ldr d1, [x12, x8]
mov x15, x11
ldr d2, [x14, x8]
fmadd d0, d1, d2, d0
str d0, [x11, x8]
add x8, x8, #8
cmp x8, x9
b.ne .LBB0_2