-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[NFC][AMDGPU] Make AMDGPUSplitModule a ModulePass #95773
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
It allows it to access TTI correctly, and opens the door to accessing more analysis in the future. I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications. Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained.
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesIt allows it to access TTI correctly, and opens the door to accessing more analysis in the future. I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications. Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained. Full diff: https://github.com/llvm/llvm-project/pull/95773.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 1ba99730ca702..e72045391bf31 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -431,7 +431,7 @@ class TargetMachine {
/// and \p M has not been modified.
virtual bool splitModule(
Module &M, unsigned NumParts,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
return false;
}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 2449fa581842a..3033b7f58f1a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -98,6 +98,7 @@ static cl::opt<bool>
using CostType = InstructionCost::CostType;
using PartitionID = unsigned;
+using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
static bool isEntryPoint(const Function *F) {
return AMDGPU::isEntryFunctionCC(F->getCallingConv());
@@ -214,13 +215,12 @@ static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) {
/// Calculate the cost of each function in \p M
/// \param SML Log Helper
-/// \param TM TargetMachine instance used to retrieve TargetTransformInfo.
+/// \param GetTTI Abstract getter for TargetTransformInfo.
/// \param M Module to analyze.
/// \param CostMap[out] Resulting Function -> Cost map.
/// \return The module's total cost.
static CostType
-calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
- Module &M,
+calculateFunctionCosts(SplitModuleLogger &SML, GetTTIFn GetTTI, Module &M,
DenseMap<const Function *, CostType> &CostMap) {
CostType ModuleCost = 0;
CostType KernelCost = 0;
@@ -230,8 +230,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
continue;
CostType FnCost = 0;
- TargetTransformInfo TTI = TM.getTargetTransformInfo(Fn);
-
+ const auto &TTI = GetTTI(Fn);
for (const auto &BB : Fn) {
for (const auto &I : BB) {
auto Cost =
@@ -438,8 +437,9 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
// assign X to a partition as usual, but when we get to Y, we check if it's
// worth also putting it in Y's partition.
const CostType LargeKernelThreshold =
- LargeKernelFactor ? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
- : std::numeric_limits<CostType>::max();
+ LargeKernelFactor
+ ? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
+ : std::numeric_limits<CostType>::max();
std::vector<DenseSet<const Function *>> Partitions;
Partitions.resize(NumParts);
@@ -604,10 +604,9 @@ static void externalize(GlobalValue &GV) {
if (!GV.hasName())
GV.setName("__llvmsplit_unnamed");
}
-} // end anonymous namespace
-void llvm::splitAMDGPUModule(
- const AMDGPUTargetMachine &TM, Module &M, unsigned N,
+static void splitAMDGPUModule(
+ GetTTIFn GetTTI, Module &M, unsigned N,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
SplitModuleLogger SML(M);
@@ -648,7 +647,7 @@ void llvm::splitAMDGPUModule(
// Start by calculating the cost of every function in the module, as well as
// the module's overall cost.
DenseMap<const Function *, CostType> FnCosts;
- const CostType ModuleCost = calculateFunctionCosts(SML, TM, M, FnCosts);
+ const CostType ModuleCost = calculateFunctionCosts(SML, GetTTI, M, FnCosts);
// Gather every kernel into a WorkList, then sort it by descending total cost
// of the kernel so the biggest kernels are seen first.
@@ -742,3 +741,16 @@ void llvm::splitAMDGPUModule(
<< format("%0.2f", (float(TotalFnImpls) / FnCosts.size()) * 100)
<< "% of original module)\n";
}
+} // namespace
+
+PreservedAnalyses AMDGPUSplitModulePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ const auto TTIGetter = [&FAM](Function &F) -> const TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+ splitAMDGPUModule(TTIGetter, M, N, ModuleCallback);
+ // We don't change the original module.
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
index 6171643bd4adc..d814dedd6f0c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
@@ -12,18 +12,27 @@
#define LLVM_TARGET_AMDGPUSPLITMODULE_H
#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/IR/PassManager.h"
#include <memory>
namespace llvm {
-class Module;
-class AMDGPUTargetMachine;
-
/// Splits the module M into N linkable partitions. The function ModuleCallback
/// is called N times passing each individual partition as the MPart argument.
-void splitAMDGPUModule(
- const AMDGPUTargetMachine &TM, Module &M, unsigned N,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback);
+class AMDGPUSplitModulePass : public PassInfoMixin<AMDGPUSplitModulePass> {
+public:
+ using ModuleCreationCallback =
+ function_ref<void(std::unique_ptr<Module> MPart)>;
+
+ AMDGPUSplitModulePass(unsigned N, ModuleCreationCallback ModuleCallback)
+ : N(N), ModuleCallback(ModuleCallback) {}
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+private:
+ unsigned N;
+ ModuleCreationCallback ModuleCallback;
+};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce997c659094a..3e21d8ee2e2a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -829,8 +829,24 @@ AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
bool AMDGPUTargetMachine::splitModule(
Module &M, unsigned NumParts,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
- splitAMDGPUModule(*this, M, NumParts, ModuleCallback);
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
+ // FIXME(?): Would be better to use an already existing Analysis/PassManager,
+ // but all current users of this API don't have one ready and would need to
+ // create one anyway. Let's hide the boilerplate for now to keep it simple.
+
+ LoopAnalysisManager LAM;
+ FunctionAnalysisManager FAM;
+ CGSCCAnalysisManager CGAM;
+ ModuleAnalysisManager MAM;
+
+ PassBuilder PB(this);
+ PB.registerModuleAnalyses(MAM);
+ PB.registerFunctionAnalyses(FAM);
+ PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+ ModulePassManager MPM;
+ MPM.addPass(AMDGPUSplitModulePass(NumParts, ModuleCallback));
+ MPM.run(M, MAM);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 2cfd232483a8a..98b0bc034b5be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -76,7 +76,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
bool splitModule(Module &M, unsigned NumParts,
function_ref<void(std::unique_ptr<Module> MPart)>
- ModuleCallback) const override;
+ ModuleCallback) override;
};
//===----------------------------------------------------------------------===//
|
It allows it to access TTI correctly, and opens the door to accessing more analysis in the future.
I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications. Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained.