Skip to content

Commit a092a3c

Browse files
committed
comments
1 parent cec4259 commit a092a3c

File tree

4 files changed

+24
-23
lines changed

4 files changed

+24
-23
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ static cl::opt<bool>
100100
using CostType = InstructionCost::CostType;
101101
using PartitionID = unsigned;
102102

103+
static bool isEntryPoint(const Function *F) {
104+
return AMDGPU::isEntryFunctionCC(F->getCallingConv());
105+
}
106+
103107
static std::string getName(const Value &V) {
104108
static std::optional<bool> HideNames;
105109
if (!HideNames) {
@@ -176,9 +180,8 @@ class SplitModuleLogger {
176180
sys::path::append(PathTemplate, LogDir, "Module-%%-%%-%%-%%-%%-%%-%%.txt");
177181
if (auto Err =
178182
sys::fs::createUniqueFile(PathTemplate.str(), Fd, RealPath)) {
179-
std::string Msg =
180-
"Failed to create log file at '" + LogDir + "': " + Err.message();
181-
report_fatal_error(StringRef(Msg),
183+
report_fatal_error("Failed to create log file at '" + Twine(LogDir) +
184+
"': " + Err.message(),
182185
/*CrashDiag=*/false);
183186
}
184187

@@ -232,7 +235,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
232235
continue;
233236

234237
CostType FnCost = 0;
235-
auto TTI = TM.getTargetTransformInfo(Fn);
238+
TargetTransformInfo TTI = TM.getTargetTransformInfo(Fn);
236239

237240
for (auto &BB : Fn) {
238241
for (auto &I : BB) {
@@ -253,7 +256,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
253256
assert((ModuleCost + FnCost) >= ModuleCost && "Overflow!");
254257
ModuleCost += FnCost;
255258

256-
if (AMDGPU::isKernelCC(&Fn))
259+
if (isEntryPoint(&Fn))
257260
KernelCost += FnCost;
258261
}
259262

@@ -279,7 +282,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
279282
static void addAllIndirectCallDependencies(const Module &M,
280283
DenseSet<const Function *> &Fns) {
281284
for (const auto &Fn : M) {
282-
if (!Fn.isDeclaration() && !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()))
285+
if (!Fn.isDeclaration() && !isEntryPoint(&Fn))
283286
Fns.insert(&Fn);
284287
}
285288
}
@@ -335,7 +338,7 @@ static void addAllDependencies(SplitModuleLogger &SML, const CallGraph &CG,
335338
return;
336339
}
337340

338-
assert(!AMDGPU::isKernelCC(Callee));
341+
assert(!isEntryPoint(Callee));
339342
if (Callee->isDeclaration())
340343
continue;
341344

@@ -386,7 +389,7 @@ static float calculateOverlap(const DenseSet<const Function *> &A,
386389
const DenseSet<const Function *> &B) {
387390
DenseSet<const Function *> Total;
388391
for (const auto *F : A) {
389-
if (!AMDGPU::isKernelCC(F))
392+
if (!isEntryPoint(F))
390393
Total.insert(F);
391394
}
392395

@@ -395,7 +398,7 @@ static float calculateOverlap(const DenseSet<const Function *> &A,
395398

396399
unsigned NumCommon = 0;
397400
for (const auto *F : B) {
398-
if (AMDGPU::isKernelCC(F))
401+
if (isEntryPoint(F))
399402
continue;
400403

401404
auto [It, Inserted] = Total.insert(F);
@@ -554,13 +557,13 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
554557
// P0.
555558
DenseSet<const Function *> AllFunctions;
556559
for (const auto &[Idx, Part] : enumerate(Partitions)) {
557-
[[maybe_unused]] CostType Cost = 0;
560+
CostType Cost = 0;
558561
for (auto *Fn : Part) {
559562
// external linkage functions should exclusively be in the first partition
560563
// at this stage. In theory, we should only ever see external linkage
561564
// functions here if they're kernels, or if they've been added due to a
562565
// kernel using indirect calls somewhere in its CallGraph.
563-
assert(Idx == 0 || (!Fn->hasExternalLinkage() || AMDGPU::isKernelCC(Fn)));
566+
assert(Idx == 0 || (!Fn->hasExternalLinkage() || isEntryPoint(Fn)));
564567
Cost += FnCosts.at(Fn);
565568
}
566569
SML << "P" << Idx << " has a total cost of " << Cost << " ("
@@ -646,7 +649,7 @@ void llvm::splitAMDGPUModule(
646649
// of the kernel so the biggest kernels are seen first.
647650
SmallVector<KernelWithDependencies> WorkList;
648651
for (auto &Fn : M) {
649-
if (AMDGPU::isKernelCC(&Fn) && !Fn.isDeclaration())
652+
if (isEntryPoint(&Fn) && !Fn.isDeclaration())
650653
WorkList.emplace_back(SML, CG, FnCosts, &Fn);
651654
}
652655
sort(WorkList, [&](auto &A, auto &B) {
@@ -695,7 +698,7 @@ void llvm::splitAMDGPUModule(
695698
// Check we don't import an external linkage function in any
696699
// partition other than P0.
697700
#ifndef NDEBUG
698-
if (Fn->hasExternalLinkage() && !AMDGPU::isKernelCC(Fn)) {
701+
if (Fn->hasExternalLinkage() && !isEntryPoint(Fn)) {
699702
assert((I == 0) == FnsInPart.contains(Fn));
700703
}
701704
#endif
@@ -708,8 +711,6 @@ void llvm::splitAMDGPUModule(
708711
// Everything else goes in the first partition.
709712
return I == 0;
710713
}));
711-
if (I != 0)
712-
MPart->setModuleInlineAsm("");
713714

714715
// Clean-up conservatively imported GVs without any users.
715716
for (auto &GV : make_early_inc_range(MPart->globals())) {
@@ -721,7 +722,7 @@ void llvm::splitAMDGPUModule(
721722
for (auto &Cur : *MPart) {
722723
if (!Cur.isDeclaration()) {
723724
++NumAllFns;
724-
if (AMDGPU::isKernelCC(&Cur))
725+
if (isEntryPoint(&Cur))
725726
++NumKernels;
726727
}
727728
}

llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables-noexternal.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
; CHECK2: define amdgpu_kernel void @B
2323
; CHECK2-NOT: define
2424

25-
@foo = private constant ptr undef
26-
@bar = internal constant ptr undef
25+
@foo = private constant ptr poison
26+
@bar = internal constant ptr poison
2727

2828
define amdgpu_kernel void @A() {
2929
store i32 42, ptr @foo

llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
; The GVs should be copied in each partition as needed.
88

99
; CHECK0-NOT: define
10-
; CHECK0: @foo = hidden constant ptr undef
11-
; CHECK0: @bar = hidden constant ptr undef
10+
; CHECK0: @foo = hidden constant ptr poison
11+
; CHECK0: @bar = hidden constant ptr poison
1212
; CHECK0: define amdgpu_kernel void @C
1313
; CHECK0-NOT: define
1414

@@ -24,8 +24,8 @@
2424
; CHECK2: define amdgpu_kernel void @B
2525
; CHECK2-NOT: define
2626

27-
@foo = private constant ptr undef
28-
@bar = internal constant ptr undef
27+
@foo = private constant ptr poison
28+
@bar = internal constant ptr poison
2929

3030
define amdgpu_kernel void @A() {
3131
store i32 42, ptr @foo

llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A
5151

5252
define internal void @large2() {
53-
store i32 42, ptr undef
53+
store volatile i32 42, ptr null
5454
call void @large2()
5555
ret void
5656
}

0 commit comments

Comments
 (0)