Skip to content

Commit 38e64b1

Browse files
committed
[SLP]Fix minbiwidth analysis for gather nodes with SIToFP users
If the buildvector node has cast to float user, it cannot be considered as safe for truncation, need to use the original bitwidth here. Fixes #135410
1 parent db4ad46 commit 38e64b1

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19974,8 +19974,10 @@ void BoUpSLP::computeMinimumValueSizes() {
1997419974
return false;
1997519975
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
1997619976
SelectInst>(U) ||
19977+
isa<SIToFPInst, UIToFPInst>(U) ||
1997719978
!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
19978-
SelectInst>(UserTE->getMainOp()))
19979+
SelectInst>(UserTE->getMainOp()) ||
19980+
isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp()))
1997919981
return true;
1998019982
unsigned UserTESz = DL->getTypeSizeInBits(
1998119983
UserTE->Scalars.front()->getType());

llvm/test/Transforms/SLPVectorizer/X86/trunced-buildvector-scalar-extended.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@ define <4 x float> @test(i64 %0) {
55
; CHECK-LABEL: define <4 x float> @test(
66
; CHECK-SAME: i64 [[TMP0:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
8-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
9-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
10-
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
8+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[TMP0]], i32 2
9+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
1110
; CHECK-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
1211
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x float>
1312
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]

0 commit comments

Comments
 (0)