Skip to content

[SLP]Model single unique value insert + shuffle as splat + select, where profitable #136590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
  • Loading branch information
alexey-bataev committed Apr 21, 2025
commit 655bd2478f305439be0795390c32b698fb0bebd5
96 changes: 81 additions & 15 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12613,11 +12613,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
InstructionCost
finalize(ArrayRef<int> ExtMask,
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
InstructionCost finalize(
ArrayRef<int> ExtMask,
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &,
function_ref<Value *(Value *, Value *, ArrayRef<int>)>)>
Action = {}) {
IsFinalized = true;
if (Action) {
const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
Expand All @@ -12629,7 +12631,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
assert(VF > 0 &&
"Expected vector length for the final value before action.");
Value *V = cast<Value *>(Vec);
Action(V, CommonMask);
Action(V, CommonMask, [this](Value *V1, Value *V2, ArrayRef<int> Mask) {
Cost += createShuffle(V1, V2, Mask);
return V1;
});
InVectors.front() = V;
}
if (!SubVectors.empty()) {
Expand Down Expand Up @@ -16592,11 +16597,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// Finalize emission of the shuffles.
/// \param Action the action (if any) to be performed before final applying of
/// the \p ExtMask mask.
Value *
finalize(ArrayRef<int> ExtMask,
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
Value *finalize(
ArrayRef<int> ExtMask,
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &,
function_ref<Value *(Value *, Value *, ArrayRef<int>)>)>
Action = {}) {
IsFinalized = true;
if (Action) {
Value *Vec = InVectors.front();
Expand All @@ -16615,7 +16622,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0);
Vec = createShuffle(Vec, nullptr, ResizeMask);
}
Action(Vec, CommonMask);
Action(Vec, CommonMask, [this](Value *V1, Value *V2, ArrayRef<int> Mask) {
return createShuffle(V1, V2, Mask);
});
InVectors.front() = Vec;
}
if (!SubVectors.empty()) {
Expand Down Expand Up @@ -17277,9 +17286,66 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
else
Res = ShuffleBuilder.finalize(
E->ReuseShuffleIndices, SubVectors, SubVectorsMask, E->Scalars.size(),
[&](Value *&Vec, SmallVectorImpl<int> &Mask) {
TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false);
Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
[&](Value *&Vec, SmallVectorImpl<int> &Mask, auto CreateShuffle) {
bool IsSplat = isSplat(NonConstants);
SmallVector<int> BVMask(Mask.size(), PoisonMaskElem);
TryPackScalars(NonConstants, BVMask, /*IsRootPoison=*/false);
auto CheckIfSplatIsProfitable = [&]() {
// Estimate the cost of splatting + shuffle and compare with
// insert + shuffle.
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Value *V = *find_if_not(NonConstants, IsaPred<UndefValue>);
if (isa<ExtractElementInst>(V) || isVectorized(V))
return false;
InstructionCost SplatCost = TTI->getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, /*Index=*/0,
PoisonValue::get(VecTy), V);
SmallVector<int> NewMask(Mask.begin(), Mask.end());
for (auto [Idx, I] : enumerate(BVMask))
if (I != PoisonMaskElem)
NewMask[Idx] = Mask.size();
SplatCost += ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy,
NewMask, CostKind);
InstructionCost BVCost = TTI->getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind,
*find_if(Mask, [](int I) { return I != PoisonMaskElem; }),
Vec, V);
// Shuffle required?
if (count(BVMask, PoisonMaskElem) <
static_cast<int>(BVMask.size() - 1)) {
SmallVector<int> NewMask(Mask.begin(), Mask.end());
for (auto [Idx, I] : enumerate(BVMask))
if (I != PoisonMaskElem)
NewMask[Idx] = I;
BVCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
VecTy, NewMask, CostKind);
}
return SplatCost <= BVCost;
};
if (!IsSplat || Mask.size() <= 2 || !CheckIfSplatIsProfitable()) {
for (auto [Idx, I] : enumerate(BVMask))
if (I != PoisonMaskElem)
Mask[Idx] = I;
Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
} else {
Value *V = *find_if_not(NonConstants, IsaPred<UndefValue>);
SmallVector<Value *> Values(NonConstants.size(), PoisonValue::get(ScalarTy));
Values[0] = V;
Value *BV = ShuffleBuilder.gather(Values, BVMask.size());
SmallVector<int> SplatMask(BVMask.size(), PoisonMaskElem);
transform(BVMask, SplatMask.begin(), [](int I) {
return I == PoisonMaskElem ? PoisonMaskElem : 0;
});
if (!ShuffleVectorInst::isIdentityMask(SplatMask, VF))
BV = CreateShuffle(BV, nullptr, SplatMask);
for (auto [Idx, I] : enumerate(BVMask))
if (I != PoisonMaskElem)
Mask[Idx] = BVMask.size() + Idx;
Vec = CreateShuffle(Vec, BV, Mask);
for (auto [Idx, I] : enumerate(Mask))
if (I != PoisonMaskElem)
Mask[Idx] = Idx;
}
});
} else if (!allConstant(GatheredScalars)) {
// Gather unique scalars and all constants.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ define void @test() {
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP17]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
Expand All @@ -48,7 +49,7 @@ define void @test() {
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP23]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[L_11]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 9, i32 10, i32 27, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ define i32 @test() {
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 23, i32 8, i32 9, i32 10, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0)
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ define i32 @test(i64 %l.549) {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[IF_THEN19:.*]]
; CHECK: [[P:.*]]:
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ]
Expand All @@ -23,20 +25,20 @@ define i32 @test(i64 %l.549) {
; CHECK: [[LOR_LHS_FALSE]]:
; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]]
; CHECK: [[R]]:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]]
; CHECK: [[LAND_LHS_TRUE]]:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP8]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP18]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]]
; CHECK: [[S]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP9]], %[[LAND_LHS_TRUE]] ], [ [[TMP8]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP19]], %[[LAND_LHS_TRUE]] ], [ [[TMP18]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: br label %[[IF_THEN19]]
; CHECK: [[IF_THEN19]]:
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11]], %[[S]] ]
; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[L_549]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> [[TMP2]], i64 2)
; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]]
; CHECK: [[IF_END25]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ define i32 @test(i32 %s.0) {
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison>
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[J_4]], i32 7
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
; CHECK-NEXT: br label %[[IF_END24]]
; CHECK: [[IF_THEN18:.*]]:
; CHECK-NEXT: br label %[[T]]
; CHECK: [[T]]:
; CHECK-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
; CHECK: [[IF_END24]]:
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP15]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP16]], %[[T]] ]
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ]
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -124,23 +124,23 @@ define double @preserve_loop_info(ptr %arg, i1 %arg2) {
; CHECK: outer.header:
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: br i1 %arg2, label [[OUTER_LATCH:%.*]], label [[INNER]]
; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[OUTER_LATCH:%.*]], label [[INNER]]
; CHECK: outer.latch:
; CHECK-NEXT: br i1 %arg2, label [[BB:%.*]], label [[OUTER_HEADER]]
; CHECK-NEXT: br i1 [[ARG2]], label [[BB:%.*]], label [[OUTER_HEADER]]
; CHECK: bb:
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr undef, align 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP]], i64 0, i64 1
; CHECK-NEXT: br label [[LOOP_3HEADER:%.*]]
; CHECK: loop.3header:
; CHECK-NEXT: br i1 %arg2, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
; CHECK-NEXT: br i1 [[ARG2]], label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
; CHECK: bb9:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP5]], i64 undef, i64 1
; CHECK-NEXT: store double undef, ptr [[TMP]], align 16
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8
; CHECK-NEXT: store double [[TMP12]], ptr [[TMP7]], align 8
; CHECK-NEXT: br label [[LOOP_3LATCH]]
; CHECK: loop.3latch:
; CHECK-NEXT: br i1 %arg2, label [[BB14:%.*]], label [[LOOP_3HEADER]]
; CHECK-NEXT: br i1 [[ARG2]], label [[BB14:%.*]], label [[LOOP_3HEADER]]
; CHECK: bb14:
; CHECK-NEXT: [[TMP15:%.*]] = call double undef(ptr [[TMP]], ptr [[ARG:%.*]])
; CHECK-NEXT: ret double undef
Expand Down Expand Up @@ -189,7 +189,8 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ARG2:%.*]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[ARG2:%.*]], i32 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it actually the case for x86 that a vector select with a broadcast argument is cheaper than an insert to element 0 with a passthru?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is at least the same, the cost estimation shows it

; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP6]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP8]], align 4
; CHECK-NEXT: ret void
Expand Down
Loading
Loading