Skip to content

Commit 4dd7f80

Browse files
committed
Fix crash of shuffle poison
1 parent db67a66 commit 4dd7f80

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8703,7 +8703,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
87038703
auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
87048704
SmallVectorImpl<unsigned> &Indices)
87058705
-> std::optional<TTI::ShuffleKind> {
8706-
if (NumElts <= EltsPerVector)
8706+
if (NumElts <= EltsPerVector ||
8707+
all_of(Mask, [](int I) { return I == PoisonMaskElem; }))
87078708
return std::nullopt;
87088709
int OffsetReg0 =
87098710
alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
4+
5+
define void @test(i8 %0, i8 %1) {
6+
; CHECK-LABEL: define void @test(
7+
; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[L:%.*]] = load <4 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
10+
; CHECK-NEXT: [[LI15:%.*]] = extractelement <4 x i8> [[L]], i64 15
11+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP0]], 0
12+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP1]], 0
13+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP0]], 0
14+
; CHECK-NEXT: [[DOTI15:%.*]] = icmp ne i8 [[LI15]], 0
15+
; CHECK-NEXT: [[I0244:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[TMP2]], i64 0
16+
; CHECK-NEXT: [[I1245:%.*]] = insertelement <4 x i1> [[I0244]], i1 [[TMP3]], i64 1
17+
; CHECK-NEXT: [[I2246:%.*]] = insertelement <4 x i1> [[I1245]], i1 [[TMP4]], i64 2
18+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[I2246]], i1 [[DOTI15]], i64 3
19+
; CHECK-NEXT: ret void
20+
;
21+
entry:
22+
%l = load <4 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
23+
%li15 = extractelement <4 x i8> %l, i64 15
24+
%2 = icmp ne i8 %0, 0
25+
%3 = icmp ne i8 %1, 0
26+
%4 = icmp ne i8 %0, 0
27+
%.i15 = icmp ne i8 %li15, 0
28+
29+
%i0244 = insertelement <4 x i1> zeroinitializer, i1 %2, i64 0
30+
%i1245 = insertelement <4 x i1> %i0244, i1 %3, i64 1
31+
%i2246 = insertelement <4 x i1> %i1245, i1 %4, i64 2
32+
%14 = insertelement <4 x i1> %i2246, i1 %.i15, i64 3
33+
ret void
34+
}

0 commit comments

Comments
 (0)