Skip to content

Commit 4ff96e5

Browse files
committed
[LSV] Avoid adding vectors of pointers as candidates
Summary: We no longer add vectors of pointers as candidates for load/store vectorization. It does not seem to work anyway, but without this patch we can end up in asserts when trying to create casts between an integer type and the pointer of vectors type. The test case I've added used to assert like this when trying to cast between i64 and <2 x i16*>: opt: ../lib/IR/Instructions.cpp:2565: Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed. #0 PrintStackTraceSignalHandler(void*) #1 SignalHandler(int) #2 __restore_rt #3 __GI_raise #4 __GI_abort #5 __GI___assert_fail #6 llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&, llvm::Instruction*) #7 llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter>::CreateBitOrPointerCast(llvm::Value*, llvm::Type*, llvm::Twine const&) #8 Vectorizer::vectorizeStoreChain(llvm::ArrayRef<llvm::Instruction*>, llvm::SmallPtrSet<llvm::Instruction*, 16u>*) Reviewers: arsenm Reviewed By: arsenm Subscribers: nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D39296 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316665 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 941b1f1 commit 4ff96e5

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
616616
if ((TySize % 8) != 0)
617617
continue;
618618

619+
// Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
620+
// functions are currently using an integer type for the vectorized
621+
// load/store, and does not support casting between the integer type and a
622+
// vector of pointers (e.g. i64 to <2 x i16*>)
623+
if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
624+
continue;
625+
619626
Value *Ptr = LI->getPointerOperand();
620627
unsigned AS = Ptr->getType()->getPointerAddressSpace();
621628
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
@@ -646,6 +653,13 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
646653
if (!VectorType::isValidElementType(Ty->getScalarType()))
647654
continue;
648655

656+
// Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
657+
// functions are currently using an integer type for the vectorized
658+
// load/store, and does not support casting between the integer type and a
659+
// vector of pointers (e.g. i64 to <2 x i16*>)
660+
if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
661+
continue;
662+
649663
// Skip weird non-byte sizes. They probably aren't worth the effort of
650664
// handling correctly.
651665
unsigned TySize = DL.getTypeSizeInBits(Ty);
@@ -701,8 +715,8 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
701715
SmallVector<int, 16> Heads, Tails;
702716
int ConsecutiveChain[64];
703717

704-
// Do a quadratic search on all of the given stores and find all of the pairs
705-
// of stores that follow each other.
718+
// Do a quadratic search on all of the given loads/stores and find all of the
719+
// pairs of loads/stores that follow each other.
706720
for (int i = 0, e = Instrs.size(); i < e; ++i) {
707721
ConsecutiveChain[i] = -1;
708722
for (int j = e - 1; j >= 0; --j) {
@@ -769,7 +783,7 @@ bool Vectorizer::vectorizeStoreChain(
769783
SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
770784
StoreInst *S0 = cast<StoreInst>(Chain[0]);
771785

772-
// If the vector has an int element, default to int for the whole load.
786+
// If the vector has an int element, default to int for the whole store.
773787
Type *StoreTy;
774788
for (Instruction *I : Chain) {
775789
StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();

test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,26 @@ define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias
632632
ret void
633633
}
634634

635+
; Verify that we no longer hit asserts for this test case. No change expected.
636+
; CHECK-LABEL: @copy_vec_of_ptrs
637+
; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
638+
; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
639+
; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
640+
; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
641+
; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
642+
; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
643+
define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out,
644+
<2 x i16*> addrspace(1)* %in ) #0 {
645+
%in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1
646+
%vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1
647+
%vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4
648+
649+
%out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1
650+
store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1
651+
store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4
652+
ret void
653+
}
654+
635655
declare void @llvm.amdgcn.s.barrier() #1
636656

637657
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)