Skip to content

Conversation

shiltian
Copy link
Contributor

@shiltian shiltian commented Nov 1, 2024

No description provided.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. labels Nov 1, 2024
Copy link
Contributor Author

shiltian commented Nov 1, 2024

@llvmbot
Copy link
Member

llvmbot commented Nov 1, 2024

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Shilei Tian (shiltian)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/114577.diff

6 Files Affected:

  • (modified) clang/lib/CodeGen/BackendUtil.cpp (+12-10)
  • (modified) llvm/include/llvm/Passes/PassBuilder.h (+14-6)
  • (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+14-10)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+10-6)
  • (modified) llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll (+1)
  • (modified) llvm/tools/opt/NewPMDriver.cpp (+2-2)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 47a30f00612eb7..70035a5e069a90 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
 
   // Ensure we lower KCFI operand bundles with -O0.
   PB.registerOptimizerLastEPCallback(
-      [&](ModulePassManager &MPM, OptimizationLevel Level) {
+      [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) {
         if (Level == OptimizationLevel::O0 &&
             LangOpts.Sanitize.has(SanitizerKind::KCFI))
           MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
@@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
 static void addSanitizers(const Triple &TargetTriple,
                           const CodeGenOptions &CodeGenOpts,
                           const LangOptions &LangOpts, PassBuilder &PB) {
-  auto SanitizersCallback = [&](ModulePassManager &MPM,
-                                OptimizationLevel Level) {
+  auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level,
+                                ThinOrFullLTOPhase) {
     if (CodeGenOpts.hasSanitizeCoverage()) {
       auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
       MPM.addPass(SanitizerCoveragePass(
@@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple,
   };
   if (ClSanitizeOnOptimizerEarlyEP) {
     PB.registerOptimizerEarlyEPCallback(
-        [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
+        [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level,
+                             ThinOrFullLTOPhase Phase) {
           ModulePassManager NewMPM;
-          SanitizersCallback(NewMPM, Level);
+          SanitizersCallback(NewMPM, Level, Phase);
           if (!NewMPM.isEmpty()) {
             // Sanitizers can abandon<GlobalsAA>.
             NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
@@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
     // TODO: Consider passing the MemoryProfileOutput to the pass builder via
     // the PGOOptions, and set this up there.
     if (!CodeGenOpts.MemoryProfileOutput.empty()) {
-      PB.registerOptimizerLastEPCallback(
-          [](ModulePassManager &MPM, OptimizationLevel Level) {
-            MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
-            MPM.addPass(ModuleMemProfilerPass());
-          });
+      PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM,
+                                            OptimizationLevel Level,
+                                            ThinOrFullLTOPhase) {
+        MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+        MPM.addPass(ModuleMemProfilerPass());
+      });
     }
 
     if (CodeGenOpts.FatLTO) {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 565fd2ab2147e5..e7bc3a58f414f1 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -490,7 +490,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations before the function
   /// optimization pipeline.
   void registerOptimizerEarlyEPCallback(
-      const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+      const std::function<void(ModulePassManager &, OptimizationLevel,
+                               ThinOrFullLTOPhase Phase)> &C) {
     OptimizerEarlyEPCallbacks.push_back(C);
   }
 
@@ -499,7 +500,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations at the very end of the
   /// function optimization pipeline.
   void registerOptimizerLastEPCallback(
-      const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+      const std::function<void(ModulePassManager &, OptimizationLevel,
+                               ThinOrFullLTOPhase)> &C) {
     OptimizerLastEPCallbacks.push_back(C);
   }
 
@@ -630,9 +632,11 @@ class PassBuilder {
   void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
                                         OptimizationLevel Level);
   void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
-                                       OptimizationLevel Level);
+                                       OptimizationLevel Level,
+                                       ThinOrFullLTOPhase Phase);
   void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
-                                      OptimizationLevel Level);
+                                      OptimizationLevel Level,
+                                      ThinOrFullLTOPhase Phase);
   void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM,
                                                       OptimizationLevel Level);
   void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM,
@@ -756,9 +760,13 @@ class PassBuilder {
   SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
       VectorizerStartEPCallbacks;
   // Module callbacks
-  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+                                 ThinOrFullLTOPhase)>,
+              2>
       OptimizerEarlyEPCallbacks;
-  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+  SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+                                 ThinOrFullLTOPhase)>,
+              2>
       OptimizerLastEPCallbacks;
   SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
       FullLinkTimeOptimizationEarlyEPCallbacks;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 9c90accd9c376b..16fe9a74bb9c0d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -359,14 +359,16 @@ void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
     C(FPM, Level);
 }
 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
-                                                  OptimizationLevel Level) {
+                                                  OptimizationLevel Level,
+                                                  ThinOrFullLTOPhase Phase) {
   for (auto &C : OptimizerEarlyEPCallbacks)
-    C(MPM, Level);
+    C(MPM, Level, Phase);
 }
 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
-                                                 OptimizationLevel Level) {
+                                                 OptimizationLevel Level,
+                                                 ThinOrFullLTOPhase Phase) {
   for (auto &C : OptimizerLastEPCallbacks)
-    C(MPM, Level);
+    C(MPM, Level, Phase);
 }
 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
     ModulePassManager &MPM, OptimizationLevel Level) {
@@ -1464,7 +1466,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (EnableGlobalAnalyses)
     MPM.addPass(RecomputeGlobalsAAPass());
 
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
 
   FunctionPassManager OptimizePM;
   // Scheduling LoopVersioningLICM when inlining is over, because after that
@@ -1559,7 +1561,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
                                                 PTO.EagerlyInvalidateAnalyses));
 
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
 
   // Split out cold code. Splitting is done late to avoid hiding context from
   // other optimizations and inadvertently regressing performance. The tradeoff
@@ -1716,8 +1718,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
   // optimization is going to be done in PostLink stage, but clang can't add
   // callbacks there in case of in-process ThinLTO called by linker.
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level,
+                                  /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
+  invokeOptimizerLastEPCallbacks(MPM, Level,
+                                 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
 
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
@@ -2198,7 +2202,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
   }
 
-  invokeOptimizerEarlyEPCallbacks(MPM, Level);
+  invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
 
   if (!VectorizerStartEPCallbacks.empty()) {
     FunctionPassManager FPM;
@@ -2216,7 +2220,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
   CoroPM.addPass(GlobalDCEPass());
   MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
 
-  invokeOptimizerLastEPCallbacks(MPM, Level);
+  invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
 
   if (isLTOPreLink(Phase))
     addRequiredLTOPreLinkPasses(MPM);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5492d0e589973a..2b02f4527e23fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -809,12 +809,16 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
       });
 
   // FIXME: Why is AMDGPUAttributor not in CGSCC?
-  PB.registerOptimizerLastEPCallback(
-      [this](ModulePassManager &MPM, OptimizationLevel Level) {
-        if (Level != OptimizationLevel::O0) {
-          MPM.addPass(AMDGPUAttributorPass(*this));
-        }
-      });
+  PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
+                                            OptimizationLevel Level,
+                                            ThinOrFullLTOPhase Phase) {
+    if (Level != OptimizationLevel::O0) {
+      bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink ||
+                        Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
+      if (!LTOPreLink)
+        MPM.addPass(AMDGPUAttributorPass(*this));
+    }
+  });
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
index 13e38f1bdd3330..c68143f44866f3 100644
--- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
@@ -13,6 +13,7 @@
 ; O0-NOT: amdgpu-attributor
 
 ; PRE-NOT: internalize
+; PRE-NOT: amdgpu-attributor
 
 define amdgpu_kernel void @kernel() {
 entry:
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 3f1092433d9f31..ea300fb3dbeb13 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -300,13 +300,13 @@ static void registerEPCallbacks(PassBuilder &PB) {
         });
   if (tryParsePipelineText<ModulePassManager>(PB, OptimizerEarlyEPPipeline))
     PB.registerOptimizerEarlyEPCallback(
-        [&PB](ModulePassManager &PM, OptimizationLevel) {
+        [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
           ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: ");
           Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline));
         });
   if (tryParsePipelineText<ModulePassManager>(PB, OptimizerLastEPPipeline))
     PB.registerOptimizerLastEPCallback(
-        [&PB](ModulePassManager &PM, OptimizationLevel) {
+        [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
           ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
           Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
         });

@shiltian shiltian force-pushed the users/shiltian/lto-pre-link-bool branch from 912283a to 56d807b Compare November 1, 2024 19:17
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from dc94afc to 488643c Compare November 1, 2024 19:17
@shiltian shiltian force-pushed the users/shiltian/lto-pre-link-bool branch from 56d807b to 3da52a0 Compare November 4, 2024 02:58
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from 488643c to a931d1a Compare November 4, 2024 02:58
Base automatically changed from users/shiltian/lto-pre-link-bool to main November 4, 2024 04:24
@shiltian shiltian force-pushed the users/shiltian/no-amdgpu-attributor-in-pre-link branch from a931d1a to 76d44f7 Compare November 4, 2024 04:25
@shiltian shiltian merged commit 390300d into main Nov 4, 2024
5 of 8 checks passed
@shiltian shiltian deleted the users/shiltian/no-amdgpu-attributor-in-pre-link branch November 4, 2024 04:25
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 4, 2024

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building clang,llvm at step 11 "Add check check-libc-amdgcn-amd-amdhsa".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/7977

Here is the relevant piece of the build log for the reference
Step 11 (Add check check-libc-amdgcn-amd-amdhsa) failure: test (failure)
...
[       OK ] LlvmLibcAcosfTest.SpecialNumbers (6 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2410/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strcpy_test.__hermetic__.__build__
[2411/2687] Running hermetic test libc.test.src.math.smoke.atanf_test.__hermetic__
[==========] Running 1 test from 1 test suite.
[ RUN      ] LlvmLibcAtanfTest.SpecialNumbers
[       OK ] LlvmLibcAtanfTest.SpecialNumbers (3 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2412/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.imaxdiv_test.__hermetic__.__build__
[2413/2687] Running integration test libc.test.integration.src.stdio.sprintf_size_test
FAILED: libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/CMakeFiles/libc.test.integration.src.stdio.sprintf_size_test 
cd /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio && /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/bin/amdhsa-loader /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/integration/src/stdio/libc.test.integration.src.stdio.sprintf_size_test.__build__ %s\ %c\ %d First\ arg a 0
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/test/integration/src/stdio/sprintf_size_test.cpp:57: Expected 'my_streq(buf, "First arg a 48")' to be true, but is false
[2414/2687] Running integration test libc.test.integration.src.stdio.gpu.printf_test
[2415/2687] Running hermetic test libc.test.src.math.smoke.atan2f_test.__hermetic__
[==========] Running 1 test from 1 test suite.
[ RUN      ] LlvmLibcAtan2fTest.SpecialNumbers
[       OK ] LlvmLibcAtan2fTest.SpecialNumbers (8 us)
Ran 1 tests.  PASS: 1  FAIL: 0
[2416/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strrchr_test.__hermetic__.__build__
[2417/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strsep_test.__hermetic__.__build__
[2418/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.bcmp_test.__hermetic__.__build__
[2419/2687] Linking CXX executable libc/test/include/libc.test.include.signbitf_test.__hermetic__.__build__
[2420/2687] Linking CXX executable libc/test/include/libc.test.include.signbit_test.__hermetic__.__build__
[2421/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strncat_test.__hermetic__.__build__
[2422/2687] Linking CXX executable libc/test/include/libc.test.include.signbitl_test.__hermetic__.__build__
[2423/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassify_test.__hermetic__.__build__
[2424/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.memcmp_test.__hermetic__.__build__
[2425/2687] Linking CXX executable libc/test/include/libc.test.include.iszerof_test.__hermetic__.__build__
[2426/2687] Linking CXX executable libc/test/include/libc.test.include.iszero_test.__hermetic__.__build__
[2427/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strlcat_test.__hermetic__.__build__
[2428/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassifyl_test.__hermetic__.__build__
[2429/2687] Linking CXX executable libc/test/include/libc.test.include.fpclassifyf_test.__hermetic__.__build__
[2430/2687] Linking CXX executable libc/test/src/time/libc.test.src.time.clock_test.__hermetic__.__build__
[2431/2687] Linking CXX executable libc/test/include/libc.test.include.iszerol_test.__hermetic__.__build__
[2432/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.strtoumax_test.__hermetic__.__build__
[2433/2687] Linking CXX executable libc/test/src/string/libc.test.src.string.strxfrm_test.__hermetic__.__build__
[2434/2687] Linking CXX executable libc/test/src/inttypes/libc.test.src.inttypes.strtoimax_test.__hermetic__.__build__
[2435/2687] Linking CXX executable libc/test/include/libc.test.include.isnan_test.__hermetic__.__build__
[2436/2687] Linking CXX executable libc/test/include/libc.test.include.isinfl_test.__hermetic__.__build__
[2437/2687] Linking CXX executable libc/test/include/libc.test.include.isfinitef_test.__hermetic__.__build__
[2438/2687] Linking CXX executable libc/test/include/libc.test.include.isnanl_test.__hermetic__.__build__
[2439/2687] Linking CXX executable libc/test/include/libc.test.include.isnanf_test.__hermetic__.__build__
[2440/2687] Linking CXX executable libc/test/include/libc.test.include.isinf_test.__hermetic__.__build__
[2441/2687] Linking CXX executable libc/test/include/libc.test.include.isfinitel_test.__hermetic__.__build__
[2442/2687] Linking CXX executable libc/test/include/libc.test.include.isinff_test.__hermetic__.__build__
[2443/2687] Linking CXX executable libc/test/src/time/libc.test.src.time.clock_gettime_test.__hermetic__.__build__
[2444/2687] Linking CXX executable libc/test/include/libc.test.include.isfinite_test.__hermetic__.__build__
[2445/2687] Linking CXX executable libc/test/src/stdio/libc.test.src.stdio.asprintf_test.__hermetic__.__build__

PhilippRados pushed a commit to PhilippRados/llvm-project that referenced this pull request Nov 6, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang:codegen IR generation bugs: mangling, exceptions, etc. clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants