Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PassBuilder] Add ThinOrFullLTOPhase to optimizer pipeline #114577

Merged
merged 1 commit into from
Nov 4, 2024

Conversation

shiltian
Copy link
Contributor

@shiltian shiltian commented Nov 1, 2024

No description provided.

Copy link
Contributor Author

shiltian commented Nov 1, 2024

@llvmbot
Copy link
Member

llvmbot commented Nov 1, 2024

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Shilei Tian (shiltian)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/114577.diff

6 Files Affected:

  • (modified) clang/lib/CodeGen/BackendUtil.cpp (+12-10)
  • (modified) llvm/include/llvm/Passes/PassBuilder.h (+14-6)
  • (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+14-10)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+10-6)
  • (modified) llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll (+1)
  • (modified) llvm/tools/opt/NewPMDriver.cpp (+2-2)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 47a30f00612eb7..70035a5e069a90 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
 
   // Ensure we lower KCFI operand bundles with -O0.
   PB.registerOptimizerLastEPCallback(
-      [&](ModulePassManager &MPM, OptimizationLevel Level) {
+      [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) {
         if (Level == OptimizationLevel::O0 &&
             LangOpts.Sanitize.has(SanitizerKind::KCFI))
           MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
@@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
 static void addSanitizers(const Triple &TargetTriple,
                           const CodeGenOptions &CodeGenOpts,
                           const LangOptions &LangOpts, PassBuilder &PB) {
-  auto SanitizersCallback = [&](ModulePassManager &MPM,
-                                OptimizationLevel Level) {
+  auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level,
+                                ThinOrFullLTOPhase) {
     if (CodeGenOpts.hasSanitizeCoverage()) {
       auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
       MPM.addPass(SanitizerCoveragePass(
@@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple,
   };
   if (ClSanitizeOnOptimizerEarlyEP) {
     PB.registerOptimizerEarlyEPCallback(
-        [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
+        [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level,
+                             ThinOrFullLTOPhase Phase) {
           ModulePassManager NewMPM;
-          SanitizersCallback(NewMPM, Level);
+          SanitizersCallback(NewMPM, Level, Phase);
           if (!NewMPM.isEmpty()) {
             // Sanitizers can abandon<GlobalsAA>.
             NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
@@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     // TODO: Consider passing the MemoryProfileOutput to the pass builder via
     // the PGOOptions, and set this up there.
     if (!CodeGenOpts.MemoryProfileOutput.empty()) {
-      PB.registerOptimizerLastEPCallback(
-          [](ModulePassManager &MPM, OptimizationLevel Level) {
-            MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
-            MPM.addPass(ModuleMemProfilerPass());
-          });
+      PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM,
+                                            OptimizationLevel Level,
+                                            ThinOrFullLTOPhase) {
+        MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+        MPM.addPass(ModuleMemProfilerPass());
+      });
     }
 
     if (CodeGenOpts.FatLTO) {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 565fd2ab2147e5..e7bc3a58f414f1 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -490,7 +490,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations before the function
   /// optimization pipeline.
   void registerOptimizerEarlyEPCallback(
-      const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+      const std::function<void(ModulePassManager &, OptimizationLevel,
+                               ThinOrFullLTOPhase Phase)> &C) {
     OptimizerEarlyEPCallbacks.push_back(C);
   }
 
@@ -499,7 +500,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations at the very end of the
   /// function optimization pipeline.
   void registerOptimizerLastEPCallback(
-      const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+      const std::function<void(ModulePassManager &, OptimizationLevel,
+                               ThinOrFullLTOPhase)> &C) {
     OptimizerLastEPCallbacks.push_back(C);
   }
 
@@ -630,9 +632,11 @@ class PassBuilder {
   void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
                                         OptimizationLevel Level);
   void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
-                                       OptimizationLevel Level);
+                                       OptimizationLevel Level,
+                                       ThinOrFullLTOPhase Phase);
   void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
-                                      OptimizationLevel Level);
+                                      OptimizationLevel Level,
+                                      ThinOrFullLTOPhase Phase);
   void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM,
                                                       OptimizationLevel Level);
   void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM,
@@ -756,9 +760,13 @@ class PassBuilder {
   SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
       VectorizerStartEPCallbacks;
   // Module callbacks
-  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+                                 ThinOrFullLTOPhase)>,
+              2>
       OptimizerEarlyEPCallbacks;
-  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+                                 ThinOrFullLTOPhase)>,
+              2>
       OptimizerLastEPCallbacks;
   SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
       FullLinkTimeOptimizationEarlyEPCallbacks;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 9c90accd9c376b..16fe9a74bb9c0d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -359,14 +359,16 @@ void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
     C(FPM, Level);
 }
 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
-                                                  OptimizationLevel Level) {
+                                                  OptimizationLevel Level,
+                                                  ThinOrFullLTOPhase Phase) {
   for (auto &C : OptimizerEarlyEPCallbacks)
-    C(MPM, Level);
+    C(MPM, Level, Phase);
 }
 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
-                                                 OptimizationLevel Level) {
+                                                 OptimizationLevel Level,
+                                                 ThinOrFullLTOPhase Phase) {
   for (auto &C : OptimizerLastEPCallbacks)
-    C(MPM, Level);
+    C(MPM, Level, Phase);
 }
 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
     ModulePassManager &MPM, OptimizationLevel Level) {
@@ -1464,7 +1466,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (EnableGlobalAnalyses)
     MPM.addPass(RecomputeGlobalsAAPass());
 
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
 
   FunctionPassManager OptimizePM;
   // Scheduling LoopVersioningLICM when inlining is over, because after that
@@ -1559,7 +1561,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
                                                 PTO.EagerlyInvalidateAnalyses));
 
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
 
   // Split out cold code. Splitting is done late to avoid hiding context from
   // other optimizations and inadvertently regressing performance. The tradeoff
@@ -1716,8 +1718,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
   // optimization is going to be done in PostLink stage, but clang can't add
   // callbacks there in case of in-process ThinLTO called by linker.
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level,
+                                  /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
+  invokeOptimizerLastEPCallbacks(MPM, Level,
+                                 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
 
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
@@ -2198,7 +2202,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
   }
 
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
 
   if (!VectorizerStartEPCallbacks.empty()) {
     FunctionPassManager FPM;
@@ -2216,7 +2220,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
   CoroPM.addPass(GlobalDCEPass());
   MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
 
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
 
   if (isLTOPreLink(Phase))
     addRequiredLTOPreLinkPasses(MPM);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5492d0e589973a..2b02f4527e23fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -809,12 +809,16 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
       });
 
   // FIXME: Why is AMDGPUAttributor not in CGSCC?
-  PB.registerOptimizerLastEPCallback(
-      [this](ModulePassManager &MPM, OptimizationLevel Level) {
-        if (Level != OptimizationLevel::O0) {
-          MPM.addPass(AMDGPUAttributorPass(*this));
-        }
-      });
+  PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
+                                            OptimizationLevel Level,
+                                            ThinOrFullLTOPhase Phase) {
+    if (Level != OptimizationLevel::O0) {
+      bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink ||
+                        Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
+      if (!LTOPreLink)
+        MPM.addPass(AMDGPUAttributorPass(*this));
+    }
+  });
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
index 13e38f1bdd3330..c68143f44866f3 100644
--- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
@@ -13,6 +13,7 @@
 ; O0-NOT: amdgpu-attributor
 
 ; PRE-NOT: internalize
+; PRE-NOT: amdgpu-attributor
 
 define amdgpu_kernel void @kernel() {
 entry:
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 3f1092433d9f31..ea300fb3dbeb13 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -300,13 +300,13 @@ static void registerEPCallbacks(PassBuilder &PB) {
         });
   if (tryParsePipelineText<ModulePassManager>(PB, OptimizerEarlyEPPipeline))
     PB.registerOptimizerEarlyEPCallback(
-        [&PB](ModulePassManager &PM, OptimizationLevel) {
+        [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
           ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: ");
           Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline));
         });
   if (tryParsePipelineText<ModulePassManager>(PB, OptimizerLastEPPipeline))
     PB.registerOptimizerLastEPCallback(
-        [&PB](ModulePassManager &PM, OptimizationLevel) {
+        [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
           ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
           Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
         });

@shiltian shiltian force-pushed the users/shiltian/lto-pre-link-bool branch from 912283a to 56d807b Compare November 1, 2024 19:17
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from dc94afc to 488643c Compare November 1, 2024 19:17
@shiltian shiltian force-pushed the users/shiltian/lto-pre-link-bool branch from 56d807b to 3da52a0 Compare November 4, 2024 02:58
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from 488643c to a931d1a Compare November 4, 2024 02:58
Base automatically changed from users/shiltian/lto-pre-link-bool to main November 4, 2024 04:24
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from a931d1a to 76d44f7 Compare November 4, 2024 04:25
@shiltian shiltian merged commit 390300d into main Nov 4, 2024
5 of 8 checks passed
@shiltian shiltian deleted the users/shiltian/no-amdgpu-attributor-in-pre-link branch November 4, 2024 04:25
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 4, 2024

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building clang,llvm at step 11 "Add check check-libc-amdgcn-amd-amdhsa".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/7977

Here is the relevant piece of the build log for the reference
Step 11 (Add check check-libc-amdgcn-amd-amdhsa) failure: test (failure)
...
[       OK ] LlvmLibcAcosfTest.SpecialNumbers (6 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2410/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strcpy_test.__hermetic__.__build__
[2411/2687] Running hermetic test libc.test.src.math.smoke.atanf_test.__hermetic__
[==========] Running 1 test from 1 test suite.
[ RUN      ] LlvmLibcAtanfTest.SpecialNumbers
[       OK ] LlvmLibcAtanfTest.SpecialNumbers (3 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2412/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.imaxdiv_test.__hermetic__.__build__
[2413/2687] Running integration test libc.test.integration.src.stdio.sprintf_size_test
FAILED: libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test 
cd /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio && /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/bin/amdhsa-loader /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/libc.test.integration.src.stdio.sprintf_size_test.__build__ %s\ %c\ %d First\ arg a 0
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/test/integration/src/stdio/sprintf_size_test.cpp:57: Expected 'my_streq(buf, "First arg a 48")' to be true, but is false
[2414/2687] Running integration test libc.test.integration.src.stdio.gpu.printf_test
[2415/2687] Running hermetic test libc.test.src.math.smoke.atan2f_test.__hermetic__
[==========] Running 1 test from 1 test suite.
[ RUN      ] LlvmLibcAtan2fTest.SpecialNumbers
[       OK ] LlvmLibcAtan2fTest.SpecialNumbers (8 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2416/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strrchr_test.__hermetic__.__build__
[2417/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strsep_test.__hermetic__.__build__
[2418/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.bcmp_test.__hermetic__.__build__
[2419/2687] Linking CXX executable libc/test/include/libc.test.include.signbitf_test.__hermetic__.__build__
[2420/2687] Linking CXX executable libc/test/include/libc.test.include.signbit_test.__hermetic__.__build__
[2421/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strncat_test.__hermetic__.__build__
[2422/2687] Linking CXX executable libc/test/include/libc.test.include.signbitl_test.__hermetic__.__build__
[2423/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassify_test.__hermetic__.__build__
[2424/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.memcmp_test.__hermetic__.__build__
[2425/2687] Linking CXX executable libc/test/include/libc.test.include.iszerof_test.__hermetic__.__build__
[2426/2687] Linking CXX executable libc/test/include/libc.test.include.iszero_test.__hermetic__.__build__
[2427/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strlcat_test.__hermetic__.__build__
[2428/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassifyl_test.__hermetic__.__build__
[2429/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassifyf_test.__hermetic__.__build__
[2430/2687] Linking CXX executable libc/test/src/time/libc.test.src.time.clock_test.__hermetic__.__build__
[2431/2687] Linking CXX executable libc/test/include/libc.test.include.iszerol_test.__hermetic__.__build__
[2432/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.strtoumax_test.__hermetic__.__build__
[2433/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strxfrm_test.__hermetic__.__build__
[2434/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.strtoimax_test.__hermetic__.__build__
[2435/2687] Linking CXX executable libc/test/include/libc.test.include.isnan_test.__hermetic__.__build__
[2436/2687] Linking CXX executable libc/test/include/libc.test.include.isinfl_test.__hermetic__.__build__
[2437/2687] Linking CXX executable libc/test/include/libc.test.include.isfinitef_test.__hermetic__.__build__
[2438/2687] Linking CXX executable libc/test/include/libc.test.include.isnanl_test.__hermetic__.__build__
[2439/2687] Linking CXX executable libc/test/include/libc.test.include.isnanf_test.__hermetic__.__build__
[2440/2687] Linking CXX executable libc/test/include/libc.test.include.isinf_test.__hermetic__.__build__
[2441/2687] Linking CXX executable libc/test/include/libc.test.include.isfinitel_test.__hermetic__.__build__
[2442/2687] Linking CXX executable libc/test/include/libc.test.include.isinff_test.__hermetic__.__build__
[2443/2687] Linking CXX executable libc/test/src/time/libc.test.src.time.clock_gettime_test.__hermetic__.__build__
[2444/2687] Linking CXX executable libc/test/include/libc.test.include.isfinite_test.__hermetic__.__build__
[2445/2687] Linking CXX executable libc/test/src/stdio/libc.test.src.stdio.asprintf_test.__hermetic__.__build__

PhilippRados pushed a commit to PhilippRados/llvm-project that referenced this pull request Nov 6, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang:codegen clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants