Skip to content

Vectorization of loop reduction introduces an aligned store incorrectly #65212

Closed
@nunoplopes

Description

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @fn(ptr %hbuf, ptr %ref, i32 %height) {
entry:
  store i16 0, ptr %hbuf, align 1
  br label %for.cond

for.cond:
  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %cmp = icmp slt i32 %i, %height
  br i1 %cmp, label %for.body, label %for.end

for.body:
  %0 = load i16, ptr %ref, align 1
  %1 = load i16, ptr %hbuf, align 1
  %add = add i16 %1, %0
  store i16 %add, ptr %hbuf, align 1
  %inc = add i32 %i, 1
  br label %for.cond

for.end:
  ret void
}

All memory accesses above have align=1, but the loop vectorizer produces a store with align=2:

middle.block:
  %bin.rdx = add <8 x i16> %2, %1
  %4 = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %bin.rdx)
  store i16 %4, ptr %hbuf, align 2 ; <-------------------  should be align 1
  %cmp.n = icmp eq i32 %n.vec, %height
  br i1 %cmp.n, label %for.end, label %for.body.preheader7

cc @fhan @david-arm @igogo-x86 @lucic71

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions