Skip to content

Commit d95b82c

Browse files
authored
[NFC][AMDGPU] Make AMDGPUSplitModule a ModulePass (#95773)
It allows it to access TTI correctly, and opens the door to accessing more analysis in the future. I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications. Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained.
1 parent ae0813f commit d95b82c

File tree

5 files changed

+58
-21
lines changed

5 files changed

+58
-21
lines changed

llvm/include/llvm/Target/TargetMachine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ class TargetMachine {
431431
/// and \p M has not been modified.
432432
virtual bool splitModule(
433433
Module &M, unsigned NumParts,
434-
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
434+
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
435435
return false;
436436
}
437437
};

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ static cl::opt<bool>
9898

9999
using CostType = InstructionCost::CostType;
100100
using PartitionID = unsigned;
101+
using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
101102

102103
static bool isEntryPoint(const Function *F) {
103104
return AMDGPU::isEntryFunctionCC(F->getCallingConv());
@@ -214,13 +215,12 @@ static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) {
214215

215216
/// Calculate the cost of each function in \p M
216217
/// \param SML Log Helper
217-
/// \param TM TargetMachine instance used to retrieve TargetTransformInfo.
218+
/// \param GetTTI Abstract getter for TargetTransformInfo.
218219
/// \param M Module to analyze.
219220
/// \param CostMap[out] Resulting Function -> Cost map.
220221
/// \return The module's total cost.
221222
static CostType
222-
calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
223-
Module &M,
223+
calculateFunctionCosts(SplitModuleLogger &SML, GetTTIFn GetTTI, Module &M,
224224
DenseMap<const Function *, CostType> &CostMap) {
225225
CostType ModuleCost = 0;
226226
CostType KernelCost = 0;
@@ -230,8 +230,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
230230
continue;
231231

232232
CostType FnCost = 0;
233-
TargetTransformInfo TTI = TM.getTargetTransformInfo(Fn);
234-
233+
const auto &TTI = GetTTI(Fn);
235234
for (const auto &BB : Fn) {
236235
for (const auto &I : BB) {
237236
auto Cost =
@@ -438,8 +437,9 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
438437
// assign X to a partition as usual, but when we get to Y, we check if it's
439438
// worth also putting it in Y's partition.
440439
const CostType LargeKernelThreshold =
441-
LargeKernelFactor ? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
442-
: std::numeric_limits<CostType>::max();
440+
LargeKernelFactor
441+
? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
442+
: std::numeric_limits<CostType>::max();
443443

444444
std::vector<DenseSet<const Function *>> Partitions;
445445
Partitions.resize(NumParts);
@@ -604,10 +604,9 @@ static void externalize(GlobalValue &GV) {
604604
if (!GV.hasName())
605605
GV.setName("__llvmsplit_unnamed");
606606
}
607-
} // end anonymous namespace
608607

609-
void llvm::splitAMDGPUModule(
610-
const AMDGPUTargetMachine &TM, Module &M, unsigned N,
608+
static void splitAMDGPUModule(
609+
GetTTIFn GetTTI, Module &M, unsigned N,
611610
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
612611

613612
SplitModuleLogger SML(M);
@@ -648,7 +647,7 @@ void llvm::splitAMDGPUModule(
648647
// Start by calculating the cost of every function in the module, as well as
649648
// the module's overall cost.
650649
DenseMap<const Function *, CostType> FnCosts;
651-
const CostType ModuleCost = calculateFunctionCosts(SML, TM, M, FnCosts);
650+
const CostType ModuleCost = calculateFunctionCosts(SML, GetTTI, M, FnCosts);
652651

653652
// Gather every kernel into a WorkList, then sort it by descending total cost
654653
// of the kernel so the biggest kernels are seen first.
@@ -742,3 +741,16 @@ void llvm::splitAMDGPUModule(
742741
<< format("%0.2f", (float(TotalFnImpls) / FnCosts.size()) * 100)
743742
<< "% of original module)\n";
744743
}
744+
} // namespace
745+
746+
PreservedAnalyses AMDGPUSplitModulePass::run(Module &M,
747+
ModuleAnalysisManager &MAM) {
748+
FunctionAnalysisManager &FAM =
749+
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
750+
const auto TTIGetter = [&FAM](Function &F) -> const TargetTransformInfo & {
751+
return FAM.getResult<TargetIRAnalysis>(F);
752+
};
753+
splitAMDGPUModule(TTIGetter, M, N, ModuleCallback);
754+
// We don't change the original module.
755+
return PreservedAnalyses::all();
756+
}

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,27 @@
1212
#define LLVM_TARGET_AMDGPUSPLITMODULE_H
1313

1414
#include "llvm/ADT/STLFunctionalExtras.h"
15+
#include "llvm/IR/PassManager.h"
1516
#include <memory>
1617

1718
namespace llvm {
1819

19-
class Module;
20-
class AMDGPUTargetMachine;
21-
2220
/// Splits the module M into N linkable partitions. The function ModuleCallback
2321
/// is called N times passing each individual partition as the MPart argument.
24-
void splitAMDGPUModule(
25-
const AMDGPUTargetMachine &TM, Module &M, unsigned N,
26-
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback);
22+
class AMDGPUSplitModulePass : public PassInfoMixin<AMDGPUSplitModulePass> {
23+
public:
24+
using ModuleCreationCallback =
25+
function_ref<void(std::unique_ptr<Module> MPart)>;
26+
27+
AMDGPUSplitModulePass(unsigned N, ModuleCreationCallback ModuleCallback)
28+
: N(N), ModuleCallback(ModuleCallback) {}
29+
30+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
31+
32+
private:
33+
unsigned N;
34+
ModuleCreationCallback ModuleCallback;
35+
};
2736

2837
} // end namespace llvm
2938

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -829,8 +829,24 @@ AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
829829

830830
bool AMDGPUTargetMachine::splitModule(
831831
Module &M, unsigned NumParts,
832-
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
833-
splitAMDGPUModule(*this, M, NumParts, ModuleCallback);
832+
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
833+
// FIXME(?): Would be better to use an already existing Analysis/PassManager,
834+
// but all current users of this API don't have one ready and would need to
835+
// create one anyway. Let's hide the boilerplate for now to keep it simple.
836+
837+
LoopAnalysisManager LAM;
838+
FunctionAnalysisManager FAM;
839+
CGSCCAnalysisManager CGAM;
840+
ModuleAnalysisManager MAM;
841+
842+
PassBuilder PB(this);
843+
PB.registerModuleAnalyses(MAM);
844+
PB.registerFunctionAnalyses(FAM);
845+
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
846+
847+
ModulePassManager MPM;
848+
MPM.addPass(AMDGPUSplitModulePass(NumParts, ModuleCallback));
849+
MPM.run(M, MAM);
834850
return true;
835851
}
836852

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
7676

7777
bool splitModule(Module &M, unsigned NumParts,
7878
function_ref<void(std::unique_ptr<Module> MPart)>
79-
ModuleCallback) const override;
79+
ModuleCallback) override;
8080
};
8181

8282
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)