Vectorization of loop reduction introduces an aligned store incorrectly #65212
Closed
Description
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @fn(ptr %hbuf, ptr %ref, i32 %height) {
entry:
store i16 0, ptr %hbuf, align 1
br label %for.cond
for.cond:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i, %height
br i1 %cmp, label %for.body, label %for.end
for.body:
%0 = load i16, ptr %ref, align 1
%1 = load i16, ptr %hbuf, align 1
%add = add i16 %1, %0
store i16 %add, ptr %hbuf, align 1
%inc = add i32 %i, 1
br label %for.cond
for.end:
ret void
}
All memory accesses above have align=1, but the loop vectorizer produces a store with align=2:
middle.block:
%bin.rdx = add <8 x i16> %2, %1
%4 = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %bin.rdx)
store i16 %4, ptr %hbuf, align 2 ; <------------------- should be align 1
%cmp.n = icmp eq i32 %n.vec, %height
br i1 %cmp.n, label %for.end, label %for.body.preheader7