Open
Description
https://compiler-explorer.com/z/bcnocK139
#include <stddef.h>
struct Foo { int a, b; };
void f(struct Foo *x, size_t n) {
for (size_t i = 0; i < n; i++) {
x[i].a += 1;
x[i].b += 2;
}
}
void g(struct Foo *x, size_t n) {
for (size_t i = 0; i < n; i++)
x[i].a += x[i].b;
}
In the loop f, the loop vectorizer is able to use a scalar address throughout the segmented accesses.
In g, where not every segmented field is written back to, it's kept around as a widened vector of pointers, even though we only use the first element of it.
legalizeAndOptimizeInductions
in VPlanTransforms.cpp may not be catching this case.
f:
beqz a1, .LBB0_8
csrr a2, vlenb
srli a7, a2, 1
bgeu a1, a7, .LBB0_3
li a6, 0
j .LBB0_6
.LBB0_3:
srli a2, a2, 3
neg a4, a7
and a6, a1, a4
slli a4, a2, 5
mv a5, a0
mv a2, a6
vsetvli a3, zero, e32, m2, ta, ma
.LBB0_4:
vlseg2e32.v v8, (a5)
sub a2, a2, a7
vadd.vi v8, v8, 1
vadd.vi v10, v10, 2
vsseg2e32.v v8, (a5)
add a5, a5, a4
bnez a2, .LBB0_4
beq a1, a6, .LBB0_8
.LBB0_6:
sh3add a2, a6, a0
sh3add a1, a1, a0
addi a0, a2, 4
addi a1, a1, 4
.LBB0_7:
lw a2, -4(a0)
lw a3, 0(a0)
addi a2, a2, 1
addi a3, a3, 2
sw a2, -4(a0)
sw a3, 0(a0)
addi a0, a0, 8
bne a0, a1, .LBB0_7
.LBB0_8:
ret
g:
beqz a1, .LBB1_8
csrr a3, vlenb
srli a2, a3, 1
bgeu a1, a2, .LBB1_3
li a6, 0
j .LBB1_6
.LBB1_3:
srli a3, a3, 3
neg a4, a2
vsetvli a5, zero, e64, m4, ta, ma
vid.v v8
and a6, a1, a4
slli t0, a3, 5
li a7, 8
mv a4, a0
mv a5, a6
.LBB1_4:
vsetvli zero, zero, e64, m4, ta, ma
vsll.vi v12, v8, 3
vadd.vx v12, v12, a0
vmv.x.s a3, v12
vsetvli zero, zero, e32, m2, ta, ma
vlseg2e32.v v12, (a3)
vsetvli zero, zero, e64, m4, ta, ma
vadd.vx v8, v8, a2
sub a5, a5, a2
vsetvli zero, zero, e32, m2, ta, ma
vadd.vv v12, v12, v14
vsse32.v v12, (a4), a7
add a4, a4, t0
bnez a5, .LBB1_4
beq a1, a6, .LBB1_8
.LBB1_6:
sh3add a2, a6, a0
sh3add a0, a1, a0
.LBB1_7:
lw a1, 0(a2)
lw a3, 4(a2)
add a1, a1, a3
sw a1, 0(a2)
addi a2, a2, 8
bne a2, a0, .LBB1_7
.LBB1_8:
ret