Skip to content

Commit 4bf5ab4

Browse files
committed
[VPlan] Set flags when constructing truncs using VPWidenCastRecipe.
VPWidenCastRecipes with Trunc opcodes where missing the correct OpType for IR flags. Update createWidenCast to set the correct flags for truncs, and use it consistenly. Fixes #162374.
1 parent ab71b77 commit 4bf5ab4

File tree

4 files changed

+113
-27
lines changed

4 files changed

+113
-27
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,11 @@ class VPBuilder {
322322

323323
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
324324
Type *ResultTy) {
325-
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
325+
VPIRFlags Flags;
326+
if (Opcode == Instruction::Trunc)
327+
Flags = VPIRFlags::TruncFlagsTy(false, false);
328+
return tryInsertInstruction(
329+
new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
326330
}
327331

328332
VPScalarIVStepsRecipe *

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8774,13 +8774,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87748774
assert(!RecurrenceDescriptor::isMinMaxRecurrenceKind(RecurrenceKind) &&
87758775
"Unexpected truncated min-max recurrence!");
87768776
Type *RdxTy = RdxDesc.getRecurrenceType();
8777-
auto *Trunc =
8778-
new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
8777+
VPWidenCastRecipe *Trunc;
87798778
Instruction::CastOps ExtendOpc =
87808779
RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt;
8781-
auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy);
8782-
Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
8783-
Extnd->insertAfter(Trunc);
8780+
VPWidenCastRecipe *Extnd;
8781+
{
8782+
VPBuilder::InsertPointGuard Guard(Builder);
8783+
Builder.setInsertPoint(
8784+
NewExitingVPV->getDefiningRecipe()->getParent(),
8785+
std::next(NewExitingVPV->getDefiningRecipe()->getIterator()));
8786+
Trunc =
8787+
Builder.createWidenCast(Instruction::Trunc, NewExitingVPV, RdxTy);
8788+
Extnd = Builder.createWidenCast(ExtendOpc, Trunc, PhiTy);
8789+
}
87848790
if (PhiR->getOperand(1) == NewExitingVPV)
87858791
PhiR->setOperand(1, Extnd->getVPSingleValue());
87868792

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10261026
PredPHI->replaceAllUsesWith(Op);
10271027
}
10281028

1029+
VPBuilder Builder(Def);
10291030
VPValue *A;
10301031
if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
10311032
Type *TruncTy = TypeInfo.inferScalarType(Def);
@@ -1041,18 +1042,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10411042
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
10421043
? Instruction::SExt
10431044
: Instruction::ZExt;
1044-
auto *VPC =
1045-
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1045+
auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,
1046+
TruncTy);
10461047
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
10471048
// UnderlyingExt has distinct return type, used to retain legacy cost.
1048-
VPC->setUnderlyingValue(UnderlyingExt);
1049+
Ext->setUnderlyingValue(UnderlyingExt);
10491050
}
1050-
VPC->insertBefore(&R);
1051-
Def->replaceAllUsesWith(VPC);
1051+
Def->replaceAllUsesWith(Ext);
10521052
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1053-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1054-
VPC->insertBefore(&R);
1055-
Def->replaceAllUsesWith(VPC);
1053+
auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy);
1054+
Def->replaceAllUsesWith(Trunc);
10561055
}
10571056
}
10581057
#ifndef NDEBUG
@@ -1098,7 +1097,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10981097
return Def->replaceAllUsesWith(Def->getOperand(1));
10991098

11001099
// (x && y) || (x && z) -> x && (y || z)
1101-
VPBuilder Builder(Def);
11021100
if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
11031101
m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) &&
11041102
// Simplify only if one of the operands has one use to avoid creating an
@@ -2206,20 +2204,20 @@ void VPlanTransforms::truncateToMinimalBitwidths(
22062204
continue;
22072205
assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
22082206
auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);
2209-
VPWidenCastRecipe *NewOp =
2210-
IterIsEmpty
2211-
? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy,
2212-
VPIRFlags::TruncFlagsTy(false, false))
2213-
: ProcessedIter->second;
2214-
R.setOperand(Idx, NewOp);
2215-
if (!IterIsEmpty)
2207+
if (!IterIsEmpty) {
2208+
R.setOperand(Idx, ProcessedIter->second);
22162209
continue;
2217-
ProcessedIter->second = NewOp;
2218-
if (!Op->isLiveIn()) {
2219-
NewOp->insertBefore(&R);
2220-
} else {
2221-
PH->appendRecipe(NewOp);
22222210
}
2211+
2212+
VPBuilder Builder;
2213+
if (Op->isLiveIn())
2214+
Builder.setInsertPoint(PH);
2215+
else
2216+
Builder.setInsertPoint(&R);
2217+
VPWidenCastRecipe *NewOp =
2218+
Builder.createWidenCast(Instruction::Trunc, Op, NewResTy);
2219+
ProcessedIter->second = NewOp;
2220+
R.setOperand(Idx, NewOp);
22232221
}
22242222

22252223
}

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,86 @@ loop: ; preds = %loop, %entry
4444
exit: ; preds = %loop
4545
ret void
4646
}
47+
48+
; Test case for https://github.com/llvm/llvm-project/issues/162374.
49+
define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) {
50+
; CHECK-LABEL: define void @truncate_i16_to_i8_cse(
51+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
52+
; CHECK-NEXT: [[ENTRY:.*]]:
53+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
54+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
55+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4294967296, [[TMP1]]
56+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
57+
; CHECK: [[VECTOR_PH]]:
58+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
59+
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
60+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]]
61+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]]
62+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32
63+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
64+
; CHECK: [[VECTOR_BODY]]:
65+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
66+
; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[SRC]], align 2
67+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP5]], i64 0
68+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
69+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <vscale x 8 x i16> [[BROADCAST_SPLAT]] to <vscale x 8 x i8>
70+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
71+
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8
72+
; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1
73+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 8 x i8> [[TMP6]], i32 [[TMP9]]
74+
; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1
75+
; CHECK-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1
76+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
77+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
78+
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
79+
; CHECK: [[MIDDLE_BLOCK]]:
80+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4294967296, [[N_VEC]]
81+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
82+
; CHECK: [[SCALAR_PH]]:
83+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
84+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
85+
; CHECK-NEXT: br label %[[LOOP:.*]]
86+
; CHECK: [[LOOP]]:
87+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
88+
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[COUNT_NEXT:%.*]], %[[LOOP]] ]
89+
; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[SRC]], align 2
90+
; CHECK-NEXT: [[VAL_ZEXT:%.*]] = zext i16 [[VAL]] to i64
91+
; CHECK-NEXT: [[VAL_TRUNC_ZEXT:%.*]] = trunc i64 [[VAL_ZEXT]] to i8
92+
; CHECK-NEXT: store i8 [[VAL_TRUNC_ZEXT]], ptr null, align 1
93+
; CHECK-NEXT: [[VAL_TRUNC:%.*]] = trunc i16 [[VAL]] to i8
94+
; CHECK-NEXT: store i8 [[VAL_TRUNC]], ptr [[DST]], align 1
95+
; CHECK-NEXT: [[COUNT_NEXT]] = add i32 [[COUNT]], 1
96+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[COUNT_NEXT]], 0
97+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
98+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
99+
; CHECK: [[EXIT]]:
100+
; CHECK-NEXT: ret void
101+
;
102+
entry:
103+
br label %loop
104+
105+
loop: ; preds = %loop, %entry
106+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
107+
%count = phi i32 [ 0, %entry ], [ %count.next, %loop ]
108+
%val = load i16, ptr %src, align 2
109+
%val.zext = zext i16 %val to i64
110+
%val.trunc.zext = trunc i64 %val.zext to i8
111+
store i8 %val.trunc.zext, ptr null, align 1
112+
%val.trunc = trunc i16 %val to i8
113+
store i8 %val.trunc, ptr %dst, align 1
114+
%count.next = add i32 %count, 1
115+
%exitcond = icmp eq i32 %count.next, 0
116+
%iv.next = add i64 %iv, 1
117+
br i1 %exitcond, label %exit, label %loop
118+
119+
exit: ; preds = %loop
120+
ret void
121+
}
122+
47123
;.
48124
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
49125
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
50126
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
127+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
128+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
51129
;.

0 commit comments

Comments
 (0)