@@ -100,6 +100,10 @@ static cl::opt<bool>
100
100
using CostType = InstructionCost::CostType;
101
101
using PartitionID = unsigned ;
102
102
103
+ static bool isEntryPoint (const Function *F) {
104
+ return AMDGPU::isEntryFunctionCC (F->getCallingConv ());
105
+ }
106
+
103
107
static std::string getName (const Value &V) {
104
108
static std::optional<bool > HideNames;
105
109
if (!HideNames) {
@@ -176,9 +180,8 @@ class SplitModuleLogger {
176
180
sys::path::append (PathTemplate, LogDir, " Module-%%-%%-%%-%%-%%-%%-%%.txt" );
177
181
if (auto Err =
178
182
sys::fs::createUniqueFile (PathTemplate.str (), Fd, RealPath)) {
179
- std::string Msg =
180
- " Failed to create log file at '" + LogDir + " ': " + Err.message ();
181
- report_fatal_error (StringRef (Msg),
183
+ report_fatal_error (" Failed to create log file at '" + Twine (LogDir) +
184
+ " ': " + Err.message (),
182
185
/* CrashDiag=*/ false );
183
186
}
184
187
@@ -232,7 +235,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
232
235
continue ;
233
236
234
237
CostType FnCost = 0 ;
235
- auto TTI = TM.getTargetTransformInfo (Fn);
238
+ TargetTransformInfo TTI = TM.getTargetTransformInfo (Fn);
236
239
237
240
for (auto &BB : Fn) {
238
241
for (auto &I : BB) {
@@ -253,7 +256,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
253
256
assert ((ModuleCost + FnCost) >= ModuleCost && " Overflow!" );
254
257
ModuleCost += FnCost;
255
258
256
- if (AMDGPU::isKernelCC (&Fn))
259
+ if (isEntryPoint (&Fn))
257
260
KernelCost += FnCost;
258
261
}
259
262
@@ -279,7 +282,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
279
282
static void addAllIndirectCallDependencies (const Module &M,
280
283
DenseSet<const Function *> &Fns) {
281
284
for (const auto &Fn : M) {
282
- if (!Fn.isDeclaration () && !AMDGPU::isEntryFunctionCC (Fn. getCallingConv () ))
285
+ if (!Fn.isDeclaration () && !isEntryPoint (&Fn ))
283
286
Fns.insert (&Fn);
284
287
}
285
288
}
@@ -335,7 +338,7 @@ static void addAllDependencies(SplitModuleLogger &SML, const CallGraph &CG,
335
338
return ;
336
339
}
337
340
338
- assert (!AMDGPU::isKernelCC (Callee));
341
+ assert (!isEntryPoint (Callee));
339
342
if (Callee->isDeclaration ())
340
343
continue ;
341
344
@@ -386,7 +389,7 @@ static float calculateOverlap(const DenseSet<const Function *> &A,
386
389
const DenseSet<const Function *> &B) {
387
390
DenseSet<const Function *> Total;
388
391
for (const auto *F : A) {
389
- if (!AMDGPU::isKernelCC (F))
392
+ if (!isEntryPoint (F))
390
393
Total.insert (F);
391
394
}
392
395
@@ -395,7 +398,7 @@ static float calculateOverlap(const DenseSet<const Function *> &A,
395
398
396
399
unsigned NumCommon = 0 ;
397
400
for (const auto *F : B) {
398
- if (AMDGPU::isKernelCC (F))
401
+ if (isEntryPoint (F))
399
402
continue ;
400
403
401
404
auto [It, Inserted] = Total.insert (F);
@@ -554,13 +557,13 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
554
557
// P0.
555
558
DenseSet<const Function *> AllFunctions;
556
559
for (const auto &[Idx, Part] : enumerate(Partitions)) {
557
- [[maybe_unused]] CostType Cost = 0 ;
560
+ CostType Cost = 0 ;
558
561
for (auto *Fn : Part) {
559
562
// external linkage functions should exclusively be in the first partition
560
563
// at this stage. In theory, we should only ever see external linkage
561
564
// functions here if they're kernels, or if they've been added due to a
562
565
// kernel using indirect calls somewhere in its CallGraph.
563
- assert (Idx == 0 || (!Fn->hasExternalLinkage () || AMDGPU::isKernelCC (Fn)));
566
+ assert (Idx == 0 || (!Fn->hasExternalLinkage () || isEntryPoint (Fn)));
564
567
Cost += FnCosts.at (Fn);
565
568
}
566
569
SML << " P" << Idx << " has a total cost of " << Cost << " ("
@@ -646,7 +649,7 @@ void llvm::splitAMDGPUModule(
646
649
// of the kernel so the biggest kernels are seen first.
647
650
SmallVector<KernelWithDependencies> WorkList;
648
651
for (auto &Fn : M) {
649
- if (AMDGPU::isKernelCC (&Fn) && !Fn.isDeclaration ())
652
+ if (isEntryPoint (&Fn) && !Fn.isDeclaration ())
650
653
WorkList.emplace_back (SML, CG, FnCosts, &Fn);
651
654
}
652
655
sort (WorkList, [&](auto &A, auto &B) {
@@ -695,7 +698,7 @@ void llvm::splitAMDGPUModule(
695
698
// Check we don't import an external linkage function in any
696
699
// partition other than P0.
697
700
#ifndef NDEBUG
698
- if (Fn->hasExternalLinkage () && !AMDGPU::isKernelCC (Fn)) {
701
+ if (Fn->hasExternalLinkage () && !isEntryPoint (Fn)) {
699
702
assert ((I == 0 ) == FnsInPart.contains (Fn));
700
703
}
701
704
#endif
@@ -708,8 +711,6 @@ void llvm::splitAMDGPUModule(
708
711
// Everything else goes in the first partition.
709
712
return I == 0 ;
710
713
}));
711
- if (I != 0 )
712
- MPart->setModuleInlineAsm (" " );
713
714
714
715
// Clean-up conservatively imported GVs without any users.
715
716
for (auto &GV : make_early_inc_range (MPart->globals ())) {
@@ -721,7 +722,7 @@ void llvm::splitAMDGPUModule(
721
722
for (auto &Cur : *MPart) {
722
723
if (!Cur.isDeclaration ()) {
723
724
++NumAllFns;
724
- if (AMDGPU::isKernelCC (&Cur))
725
+ if (isEntryPoint (&Cur))
725
726
++NumKernels;
726
727
}
727
728
}
0 commit comments