Skip to content

[SYCL] Disable loop passes in SYCL optimization mode #2414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 66 additions & 57 deletions llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,46 +411,50 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions

// Begin the loop pass pipeline.
if (EnableSimpleLoopUnswitch) {
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
// them first so when we re-process a loop they run before other loop
// passes.
MPM.add(createLoopInstSimplifyPass());
MPM.add(createLoopSimplifyCFGPass());
// Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
// optimizations rely on TTI, which is not accurate for SPIR target.
if (!SYCLOptimizationMode) {
// Begin the loop pass pipeline.
if (EnableSimpleLoopUnswitch) {
// The simple loop unswitch pass relies on separate cleanup passes.
// Schedule them first so when we re-process a loop they run before other
// loop passes.
MPM.add(createLoopInstSimplifyPass());
MPM.add(createLoopSimplifyCFGPass());
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
MPM.add(
createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
// FIXME: We break the loop pass pipeline here in order to do full
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace
// the need for this.
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
// TODO: this pass hurts performance due to promotions of induction
// variables from 32-bit value to 64-bit values. I assume it's because SPIR
// is a virtual target with unlimited # of registers and pass doesn't take
// into account that on real HW this promotion is not beneficial.
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops

if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops

// Unroll small loops
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
// FIXME: We break the loop pass pipeline here in order to do full
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
// TODO: this pass hurts performance due to promotions of induction variables
// from 32-bit value to 64-bit values. I assume it's because SPIR is a virtual
// target with unlimited # of registers and pass doesn't take into account
// that on real HW this promotion is not beneficial.
if (!SYCLOptimizationMode)
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops

if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops

// Unroll small loops
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.

if (OptLevel > 1) {
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
Expand Down Expand Up @@ -819,19 +823,21 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopUnrollAndJamPass(OptLevel));
}

// Unroll small loops
MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
if (!SYCLOptimizationMode) {
// Unroll small loops
MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));

if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
MPM.add(createInstructionCombiningPass());
if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
MPM.add(createInstructionCombiningPass());

// Runtime unrolling will introduce runtime check in loop prologue. If the
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
// Runtime unrolling will introduce runtime check in loop prologue. If the
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
}

MPM.add(createWarnMissedTransformationsPass());
Expand Down Expand Up @@ -1034,13 +1040,16 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());

// Unroll small loops
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
if (!SYCLOptimizationMode) {
// Unroll small loops
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll
// again.
PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
ForgetAllSCEVInLoopUnroll));
}

PM.add(createWarnMissedTransformationsPass());

Expand Down
1 change: 0 additions & 1 deletion sycl/test/sub_group/broadcast.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// XFAIL: cpu
// UNSUPPORTED: cuda
// CUDA compilation and runtime do not yet support sub-groups.

Expand Down
1 change: 0 additions & 1 deletion sycl/test/sub_group/broadcast_fp64.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// XFAIL: cpu
// UNSUPPORTED: cuda
// CUDA compilation and runtime do not yet support sub-groups.

Expand Down