From 390300d9f41df6ad71f0f4196ef4885d4bd5dc48 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 3 Nov 2024 23:25:29 -0500 Subject: [PATCH] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (#114577) --- clang/lib/CodeGen/BackendUtil.cpp | 22 +++++++++-------- llvm/include/llvm/Passes/PassBuilder.h | 20 +++++++++++----- llvm/lib/Passes/PassBuilderPipelines.cpp | 24 +++++++++++-------- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++++++++++------- .../CodeGen/AMDGPU/print-pipeline-passes.ll | 1 + llvm/tools/opt/NewPMDriver.cpp | 4 ++-- 6 files changed, 57 insertions(+), 37 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 47a30f00612eb7..70035a5e069a90 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, // Ensure we lower KCFI operand bundles with -O0. PB.registerOptimizerLastEPCallback( - [&](ModulePassManager &MPM, OptimizationLevel Level) { + [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) { if (Level == OptimizationLevel::O0 && LangOpts.Sanitize.has(SanitizerKind::KCFI)) MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); @@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - auto SanitizersCallback = [&](ModulePassManager &MPM, - OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(SanitizerCoveragePass( @@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple, }; if (ClSanitizeOnOptimizerEarlyEP) { PB.registerOptimizerEarlyEPCallback( - [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { + [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { ModulePassManager NewMPM; - SanitizersCallback(NewMPM, Level); + SanitizersCallback(NewMPM, Level, Phase); if (!NewMPM.isEmpty()) { // Sanitizers can abandon. NewMPM.addPass(RequireAnalysisPass()); @@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // TODO: Consider passing the MemoryProfileOutput to the pass builder via // the PGOOptions, and set this up there. if (!CodeGenOpts.MemoryProfileOutput.empty()) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); - MPM.addPass(ModuleMemProfilerPass()); - }); + PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, + OptimizationLevel Level, + ThinOrFullLTOPhase) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); + MPM.addPass(ModuleMemProfilerPass()); + }); } if (CodeGenOpts.FatLTO) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 565fd2ab2147e5..e7bc3a58f414f1 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -490,7 +490,8 @@ class PassBuilder { /// This extension point allows adding optimizations before the function /// optimization pipeline. void registerOptimizerEarlyEPCallback( - const std::function &C) { + const std::function &C) { OptimizerEarlyEPCallbacks.push_back(C); } @@ -499,7 +500,8 @@ class PassBuilder { /// This extension point allows adding optimizations at the very end of the /// function optimization pipeline. void registerOptimizerLastEPCallback( - const std::function &C) { + const std::function &C) { OptimizerLastEPCallbacks.push_back(C); } @@ -630,9 +632,11 @@ class PassBuilder { void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level); void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + OptimizationLevel Level, + ThinOrFullLTOPhase Phase); void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + OptimizationLevel Level, + ThinOrFullLTOPhase Phase); void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, @@ -756,9 +760,13 @@ class PassBuilder { SmallVector, 2> VectorizerStartEPCallbacks; // Module callbacks - SmallVector, 2> + SmallVector, + 2> OptimizerEarlyEPCallbacks; - SmallVector, 2> + SmallVector, + 2> OptimizerLastEPCallbacks; SmallVector, 2> FullLinkTimeOptimizationEarlyEPCallbacks; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 9c90accd9c376b..16fe9a74bb9c0d 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -359,14 +359,16 @@ void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, C(FPM, Level); } void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level) { + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { for (auto &C : OptimizerEarlyEPCallbacks) - C(MPM, Level); + C(MPM, Level, Phase); } void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level) { + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { for (auto &C : OptimizerLastEPCallbacks) - C(MPM, Level); + C(MPM, Level, Phase); } void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( ModulePassManager &MPM, OptimizationLevel Level) { @@ -1464,7 +1466,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (EnableGlobalAnalyses) MPM.addPass(RecomputeGlobalsAAPass()); - invokeOptimizerEarlyEPCallbacks(MPM, Level); + invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase); FunctionPassManager OptimizePM; // Scheduling LoopVersioningLICM when inlining is over, because after that @@ -1559,7 +1561,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff @@ -1716,8 +1718,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual // optimization is going to be done in PostLink stage, but clang can't add // callbacks there in case of in-process ThinLTO called by linker. - invokeOptimizerEarlyEPCallbacks(MPM, Level); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerEarlyEPCallbacks(MPM, Level, + /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); + invokeOptimizerLastEPCallbacks(MPM, Level, + /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -2198,7 +2202,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - invokeOptimizerEarlyEPCallbacks(MPM, Level); + invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase); if (!VectorizerStartEPCallbacks.empty()) { FunctionPassManager FPM; @@ -2216,7 +2220,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, CoroPM.addPass(GlobalDCEPass()); MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); - invokeOptimizerLastEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level, Phase); if (isLTOPreLink(Phase)) addRequiredLTOPreLinkPasses(MPM); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6e063756045a80..86d8dbe4d803cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -258,6 +258,11 @@ static WWMRegisterRegAlloc createGreedyWWMRegisterAllocator); static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator", createFastWWMRegisterAllocator); + +static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { + return Phase == ThinOrFullLTOPhase::FullLTOPreLink || + Phase == ThinOrFullLTOPhase::ThinLTOPreLink; +} } // anonymous namespace static cl::opt @@ -755,9 +760,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(AMDGPUUnifyMetadataPass()); // We don't want to run internalization at per-module stage. - bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink || - Phase == ThinOrFullLTOPhase::ThinLTOPreLink; - if (InternalizeSymbols && !LTOPreLink) { + if (InternalizeSymbols && !isLTOPreLink(Phase)) { PM.addPass(InternalizePass(mustPreserveGV)); PM.addPass(GlobalDCEPass()); } @@ -809,12 +812,14 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { }); // FIXME: Why is AMDGPUAttributor not in CGSCC? - PB.registerOptimizerLastEPCallback( - [this](ModulePassManager &MPM, OptimizationLevel Level) { - if (Level != OptimizationLevel::O0) { - MPM.addPass(AMDGPUAttributorPass(*this)); - } - }); + PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM, + OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + if (Level != OptimizationLevel::O0) { + if (!isLTOPreLink(Phase)) + MPM.addPass(AMDGPUAttributorPass(*this)); + } + }); PB.registerFullLinkTimeOptimizationLastEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll index 13e38f1bdd3330..c68143f44866f3 100644 --- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll +++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll @@ -13,6 +13,7 @@ ; O0-NOT: amdgpu-attributor ; PRE-NOT: internalize +; PRE-NOT: amdgpu-attributor define amdgpu_kernel void @kernel() { entry: diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 3f1092433d9f31..ea300fb3dbeb13 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -300,13 +300,13 @@ static void registerEPCallbacks(PassBuilder &PB) { }); if (tryParsePipelineText(PB, OptimizerEarlyEPPipeline)) PB.registerOptimizerEarlyEPCallback( - [&PB](ModulePassManager &PM, OptimizationLevel) { + [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) { ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: "); Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline)); }); if (tryParsePipelineText(PB, OptimizerLastEPPipeline)) PB.registerOptimizerLastEPCallback( - [&PB](ModulePassManager &PM, OptimizationLevel) { + [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) { ExitOnError Err("Unable to parse OptimizerLastEP pipeline: "); Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline)); });