@@ -98,6 +98,7 @@ static cl::opt<bool>
98
98
99
99
using CostType = InstructionCost::CostType;
100
100
using PartitionID = unsigned ;
101
+ using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
101
102
102
103
static bool isEntryPoint (const Function *F) {
103
104
return AMDGPU::isEntryFunctionCC (F->getCallingConv ());
@@ -214,13 +215,12 @@ static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) {
214
215
215
216
// / Calculate the cost of each function in \p M
216
217
// / \param SML Log Helper
217
- // / \param TM TargetMachine instance used to retrieve TargetTransformInfo.
218
+ // / \param GetTTI Abstract getter for TargetTransformInfo.
218
219
// / \param M Module to analyze.
219
220
// / \param CostMap[out] Resulting Function -> Cost map.
220
221
// / \return The module's total cost.
221
222
static CostType
222
- calculateFunctionCosts (SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
223
- Module &M,
223
+ calculateFunctionCosts (SplitModuleLogger &SML, GetTTIFn GetTTI, Module &M,
224
224
DenseMap<const Function *, CostType> &CostMap) {
225
225
CostType ModuleCost = 0 ;
226
226
CostType KernelCost = 0 ;
@@ -230,8 +230,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
230
230
continue ;
231
231
232
232
CostType FnCost = 0 ;
233
- TargetTransformInfo TTI = TM.getTargetTransformInfo (Fn);
234
-
233
+ const auto &TTI = GetTTI (Fn);
235
234
for (const auto &BB : Fn) {
236
235
for (const auto &I : BB) {
237
236
auto Cost =
@@ -438,8 +437,9 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
438
437
// assign X to a partition as usual, but when we get to Y, we check if it's
439
438
// worth also putting it in Y's partition.
440
439
const CostType LargeKernelThreshold =
441
- LargeKernelFactor ? CostType (((ModuleCost / NumParts) * LargeKernelFactor))
442
- : std::numeric_limits<CostType>::max ();
440
+ LargeKernelFactor
441
+ ? CostType (((ModuleCost / NumParts) * LargeKernelFactor))
442
+ : std::numeric_limits<CostType>::max ();
443
443
444
444
std::vector<DenseSet<const Function *>> Partitions;
445
445
Partitions.resize (NumParts);
@@ -604,10 +604,9 @@ static void externalize(GlobalValue &GV) {
604
604
if (!GV.hasName ())
605
605
GV.setName (" __llvmsplit_unnamed" );
606
606
}
607
- } // end anonymous namespace
608
607
609
- void llvm:: splitAMDGPUModule (
610
- const AMDGPUTargetMachine &TM , Module &M, unsigned N,
608
+ static void splitAMDGPUModule (
609
+ GetTTIFn GetTTI , Module &M, unsigned N,
611
610
function_ref<void (std::unique_ptr<Module> MPart)> ModuleCallback) {
612
611
613
612
SplitModuleLogger SML (M);
@@ -648,7 +647,7 @@ void llvm::splitAMDGPUModule(
648
647
// Start by calculating the cost of every function in the module, as well as
649
648
// the module's overall cost.
650
649
DenseMap<const Function *, CostType> FnCosts;
651
- const CostType ModuleCost = calculateFunctionCosts (SML, TM , M, FnCosts);
650
+ const CostType ModuleCost = calculateFunctionCosts (SML, GetTTI , M, FnCosts);
652
651
653
652
// Gather every kernel into a WorkList, then sort it by descending total cost
654
653
// of the kernel so the biggest kernels are seen first.
@@ -742,3 +741,16 @@ void llvm::splitAMDGPUModule(
742
741
<< format (" %0.2f" , (float (TotalFnImpls) / FnCosts.size ()) * 100 )
743
742
<< " % of original module)\n " ;
744
743
}
744
+ } // namespace
745
+
746
+ PreservedAnalyses AMDGPUSplitModulePass::run (Module &M,
747
+ ModuleAnalysisManager &MAM) {
748
+ FunctionAnalysisManager &FAM =
749
+ MAM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
750
+ const auto TTIGetter = [&FAM](Function &F) -> const TargetTransformInfo & {
751
+ return FAM.getResult <TargetIRAnalysis>(F);
752
+ };
753
+ splitAMDGPUModule (TTIGetter, M, N, ModuleCallback);
754
+ // We don't change the original module.
755
+ return PreservedAnalyses::all ();
756
+ }
0 commit comments