Skip to content

Commit ef89e3e

Browse files
committed
[VPlan] Collect ephemeral values for VPlan.
Port collectEphemeralValues to VPlan as collectEphemeralRecipesForVPlan, use it in willGenerateVectors. This fixes a regression caused by 29b8b72 for loops where the only vector values are ephemeral.
1 parent 0b53e7b commit ef89e3e

File tree

4 files changed

+51
-73
lines changed

4 files changed

+51
-73
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4790,11 +4790,15 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
47904790
assert(VF.isVector() && "Checking a scalar VF?");
47914791
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
47924792
Plan.getCanonicalIV()->getScalarType()->getContext());
4793+
DenseSet<VPRecipeBase *> EphemeralRecipes;
4794+
collectEphemeralRecipesForVPlan(Plan, EphemeralRecipes);
47934795
// Set of already visited types.
47944796
DenseSet<Type *> Visited;
47954797
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
47964798
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
47974799
for (VPRecipeBase &R : *VPBB) {
4800+
if (EphemeralRecipes.contains(&R))
4801+
continue;
47984802
// Continue early if the recipe is considered to not produce a vector
47994803
// result. Note that this includes VPInstruction where some opcodes may
48004804
// produce a vector, to preserve existing behavior as VPInstructions model

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
#include "VPlanAnalysis.h"
1010
#include "VPlan.h"
11+
#include "VPlanCFG.h"
1112
#include "llvm/ADT/TypeSwitch.h"
1213
#include "llvm/IR/Instruction.h"
14+
#include "llvm/IR/PatternMatch.h"
1315

1416
using namespace llvm;
1517

@@ -278,3 +280,39 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
278280
CachedTypes[V] = ResultTy;
279281
return ResultTy;
280282
}
283+
284+
void llvm::collectEphemeralRecipesForVPlan(
285+
VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
286+
// First, collect seed recipes which are operands of assumes.
287+
SmallVector<VPRecipeBase *> Worklist;
288+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
289+
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
290+
for (VPRecipeBase &R : *VPBB) {
291+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
292+
if (!RepR || !match(RepR->getUnderlyingInstr(),
293+
PatternMatch::m_Intrinsic<Intrinsic::assume>()))
294+
continue;
295+
Worklist.push_back(RepR);
296+
EphRecipes.insert(RepR);
297+
}
298+
}
299+
300+
// Process operands of candidates in worklist and add them to the set of
301+
// ephemeral recipes, if they don't have side-effects and are only used by
302+
// other ephemeral recipes.
303+
while (!Worklist.empty()) {
304+
VPRecipeBase *Cur = Worklist.pop_back_val();
305+
for (VPValue *Op : Cur->operands()) {
306+
auto *OpR = Op->getDefiningRecipe();
307+
if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
308+
continue;
309+
if (any_of(Op->users(), [EphRecipes](VPUser *U) {
310+
auto *UR = dyn_cast<VPRecipeBase>(U);
311+
return !UR || !EphRecipes.contains(UR);
312+
}))
313+
continue;
314+
EphRecipes.insert(OpR);
315+
Worklist.push_back(OpR);
316+
}
317+
}
318+
}

llvm/lib/Transforms/Vectorize/VPlanAnalysis.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
1111

1212
#include "llvm/ADT/DenseMap.h"
13+
#include "llvm/ADT/DenseSet.h"
1314

1415
namespace llvm {
1516

@@ -23,6 +24,8 @@ class VPWidenIntOrFpInductionRecipe;
2324
class VPWidenMemoryRecipe;
2425
struct VPWidenSelectRecipe;
2526
class VPReplicateRecipe;
27+
class VPRecipeBase;
28+
class VPlan;
2629
class Type;
2730

2831
/// An analysis for type-inference for VPValues.
@@ -61,6 +64,9 @@ class VPTypeAnalysis {
6164
LLVMContext &getContext() { return Ctx; }
6265
};
6366

67+
// Collect a VPlan's ephemeral recipes (those used only by an assume).
68+
void collectEphemeralRecipesForVPlan(VPlan &Plan,
69+
DenseSet<VPRecipeBase *> &EphRecipes);
6470
} // end namespace llvm
6571

6672
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H

llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll

Lines changed: 3 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -335,90 +335,22 @@ exit:
335335
ret i32 %final.load
336336
}
337337

338-
; FIXME: shouldn't be vectorized, as the only vector values generated are
339-
; ephemeral.
340338
define i32 @ephemeral_load_and_compare_another_load_used_outside(ptr %start, ptr %end) #0 {
341339
; CHECK-LABEL: define i32 @ephemeral_load_and_compare_another_load_used_outside(
342340
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
343341
; CHECK-NEXT: [[ENTRY:.*]]:
344-
; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64
345-
; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64
346-
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]]
347-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
348-
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
349-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
350-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
351-
; CHECK: [[VECTOR_PH]]:
352-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
353-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
354-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8
355-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
356-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
357-
; CHECK: [[VECTOR_BODY]]:
358-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
359-
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[END]], align 8
360-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP4]], i64 0
361-
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
362-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
363-
; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
364-
; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
365-
; CHECK-NEXT: [[WIDE_MASKED_GATHER11:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
366-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
367-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer
368-
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER8]], zeroinitializer
369-
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER11]], zeroinitializer
370-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
371-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP9]])
372-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
373-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP10]])
374-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
375-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP11]])
376-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
377-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]])
378-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
379-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]])
380-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
381-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]])
382-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
383-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]])
384-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
385-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]])
386-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
387-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]])
388-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
389-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]])
390-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
391-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]])
392-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
393-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]])
394-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
395-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]])
396-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
397-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]])
398-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
399-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]])
400-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
401-
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]])
402-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
403-
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
404-
; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
405-
; CHECK: [[MIDDLE_BLOCK]]:
406-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
407-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
408-
; CHECK: [[SCALAR_PH]]:
409-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
410342
; CHECK-NEXT: br label %[[LOOP:.*]]
411343
; CHECK: [[LOOP]]:
412-
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
344+
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
413345
; CHECK-NEXT: [[IV_NEXT2]] = getelementptr nusw i8, ptr [[IV]], i64 -8
414346
; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[END]], align 8
415347
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[L1]], align 4
416348
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L2]], 0
417349
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
418350
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]]
419-
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
351+
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
420352
; CHECK: [[EXIT]]:
421-
; CHECK-NEXT: [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
353+
; CHECK-NEXT: [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ]
422354
; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[L1_LCSSA]], align 4
423355
; CHECK-NEXT: ret i32 [[FINAL_LOAD]]
424356
;
@@ -448,6 +380,4 @@ attributes #0 = { "target-cpu"="skylake-avx512" }
448380
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
449381
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
450382
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
451-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
452-
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
453383
;.

0 commit comments

Comments
 (0)