Skip to content

Commit 5e990b0

Browse files
authored
[PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (llvm#101224)
This patch aims to reduce TOC usage by merging internal and private global data. Moreover, we also add the GlobalMerge pass within the PPCTargetMachine pipeline, which is disabled by default. This transformation can be enabled by -ppc-global-merge.
1 parent 1f995b5 commit 5e990b0

File tree

5 files changed

+45
-16
lines changed

5 files changed

+45
-16
lines changed

llvm/include/llvm/CodeGen/GlobalMerge.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ struct GlobalMergeOptions {
2828
bool MergeConst = false;
2929
/// Whether we should merge global variables that have external linkage.
3030
bool MergeExternal = true;
31+
/// Whether we should merge constant global variables.
32+
bool MergeConstantGlobals = false;
3133
/// Whether we should try to optimize for size only.
3234
/// Currently, this applies a dead simple heuristic: only consider globals
3335
/// used in minsize functions for merging.

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,8 @@ namespace llvm {
476476
///
477477
Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
478478
bool OnlyOptimizeForSize = false,
479-
bool MergeExternalByDefault = false);
479+
bool MergeExternalByDefault = false,
480+
bool MergeConstantByDefault = false);
480481

481482
/// This pass splits the stack into a safe stack and an unsafe stack to
482483
/// protect against stack-based overflow vulnerabilities.

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,13 @@ class GlobalMerge : public FunctionPass {
196196
}
197197

198198
explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
199-
bool OnlyOptimizeForSize, bool MergeExternalGlobals)
199+
bool OnlyOptimizeForSize, bool MergeExternalGlobals,
200+
bool MergeConstantGlobals)
200201
: FunctionPass(ID), TM(TM) {
201202
Opt.MaxOffset = MaximalOffset;
202203
Opt.SizeOnly = OnlyOptimizeForSize;
203204
Opt.MergeExternal = MergeExternalGlobals;
205+
Opt.MergeConstantGlobals = MergeConstantGlobals;
204206
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
205207
}
206208

@@ -475,7 +477,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
475477
auto &DL = M.getDataLayout();
476478

477479
LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
478-
<< GlobalSet.find_first() << "\n");
480+
<< GlobalSet.find_first() << ", total of " << Globals.size()
481+
<< "\n");
479482

480483
bool Changed = false;
481484
ssize_t i = GlobalSet.find_first();
@@ -551,6 +554,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
551554
MergedGV->setAlignment(MaxAlign);
552555
MergedGV->setSection(Globals[i]->getSection());
553556

557+
LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");
558+
554559
const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
555560
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
556561
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
@@ -700,6 +705,11 @@ bool GlobalMergeImpl::run(Module &M) {
700705
else
701706
Globals[{AddressSpace, Section}].push_back(&GV);
702707
}
708+
LLVM_DEBUG(dbgs() << "GV "
709+
<< ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
710+
? "to merge: "
711+
: "not to merge: ")
712+
<< GV << "\n");
703713
}
704714

705715
for (auto &P : Globals)
@@ -710,7 +720,7 @@ bool GlobalMergeImpl::run(Module &M) {
710720
if (P.second.size() > 1)
711721
Changed |= doMerge(P.second, M, false, P.first.first);
712722

713-
if (EnableGlobalMergeOnConst)
723+
if (Opt.MergeConstantGlobals)
714724
for (auto &P : ConstGlobals)
715725
if (P.second.size() > 1)
716726
Changed |= doMerge(P.second, M, true, P.first.first);
@@ -720,8 +730,11 @@ bool GlobalMergeImpl::run(Module &M) {
720730

721731
Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
722732
bool OnlyOptimizeForSize,
723-
bool MergeExternalByDefault) {
733+
bool MergeExternalByDefault,
734+
bool MergeConstantByDefault) {
724735
bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
725736
MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
726-
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
737+
bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
738+
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
739+
MergeConstant);
727740
}

llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,15 @@ static cl::opt<bool> EnablePPCGenScalarMASSEntries(
111111
"(scalar) entries"),
112112
cl::Hidden);
113113

114+
static cl::opt<bool>
115+
EnableGlobalMerge("ppc-global-merge", cl::Hidden, cl::init(false),
116+
cl::desc("Enable the global merge pass"));
117+
118+
static cl::opt<unsigned>
119+
GlobalMergeMaxOffset("ppc-global-merge-max-offset", cl::Hidden,
120+
cl::init(0x7fff),
121+
cl::desc("Maximum global merge offset"));
122+
114123
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
115124
// Register the targets
116125
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
@@ -491,6 +500,10 @@ void PPCPassConfig::addIRPasses() {
491500
}
492501

493502
bool PPCPassConfig::addPreISel() {
503+
if (EnableGlobalMerge)
504+
addPass(
505+
createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true));
506+
494507
if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)
495508
addPass(createPPCMergeStringPoolPass());
496509

llvm/test/CodeGen/PowerPC/merge-private.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
3-
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
3+
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
44
; RUN: --check-prefix=AIX64
55
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
6-
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
6+
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
77
; RUN: --check-prefix=AIX32
88
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux -mcpu=pwr8 \
9-
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
9+
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
1010
; RUN: --check-prefix=LINUX64LE
1111
; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
12-
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
12+
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
1313
; RUN: --check-prefix=LINUX64BE
1414

1515
@.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1
@@ -24,7 +24,7 @@ define dso_local void @print_func() {
2424
; AIX64-NEXT: stdu r1, -128(r1)
2525
; AIX64-NEXT: std r0, 144(r1)
2626
; AIX64-NEXT: std r31, 120(r1) # 8-byte Folded Spill
27-
; AIX64-NEXT: ld r31, L..C0(r2) # @__ModuleStringPool
27+
; AIX64-NEXT: ld r31, L..C0(r2) # @_MergedGlobals
2828
; AIX64-NEXT: mr r3, r31
2929
; AIX64-NEXT: bl .puts[PR]
3030
; AIX64-NEXT: nop
@@ -43,7 +43,7 @@ define dso_local void @print_func() {
4343
; AIX32-NEXT: stwu r1, -64(r1)
4444
; AIX32-NEXT: stw r0, 72(r1)
4545
; AIX32-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
46-
; AIX32-NEXT: lwz r31, L..C0(r2) # @__ModuleStringPool
46+
; AIX32-NEXT: lwz r31, L..C0(r2) # @_MergedGlobals
4747
; AIX32-NEXT: mr r3, r31
4848
; AIX32-NEXT: bl .puts[PR]
4949
; AIX32-NEXT: nop
@@ -64,9 +64,9 @@ define dso_local void @print_func() {
6464
; LINUX64LE-NEXT: .cfi_offset r30, -16
6565
; LINUX64LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
6666
; LINUX64LE-NEXT: stdu r1, -48(r1)
67-
; LINUX64LE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha
67+
; LINUX64LE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha
6868
; LINUX64LE-NEXT: std r0, 64(r1)
69-
; LINUX64LE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l
69+
; LINUX64LE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l
7070
; LINUX64LE-NEXT: mr r3, r30
7171
; LINUX64LE-NEXT: bl puts
7272
; LINUX64LE-NEXT: nop
@@ -87,9 +87,9 @@ define dso_local void @print_func() {
8787
; LINUX64BE-NEXT: .cfi_def_cfa_offset 128
8888
; LINUX64BE-NEXT: .cfi_offset lr, 16
8989
; LINUX64BE-NEXT: .cfi_offset r30, -16
90-
; LINUX64BE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha
90+
; LINUX64BE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha
9191
; LINUX64BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
92-
; LINUX64BE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l
92+
; LINUX64BE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l
9393
; LINUX64BE-NEXT: mr r3, r30
9494
; LINUX64BE-NEXT: bl puts
9595
; LINUX64BE-NEXT: nop

0 commit comments

Comments
 (0)