-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[lld] Merge equivalent symbols found during ICF #134342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Pranav Kant (pranavk) ChangesFixes a correctness issue for AArch64 when ADRP and LDR instructions are outlined in separate sections and sections are fed to ICF for deduplication. See test case (based on #129122) for details. All rodata.* sections are folded into a single section with ICF. This leads to all f2_* function sections getting folded into one (as their relocation target symbols g* belong to .rodata.g* sections that have already been folded into one). Since relocations still refer original g* symbols, we end up creating duplicate GOT entry for all such symbols. This PR addresses that by tracking such folded symbols and create one GOT entry for all such symbols. Fixes #129122 Full diff: https://github.com/llvm/llvm-project/pull/134342.diff 5 Files Affected:
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 1cdcf6be9d8a9..467487e10f310 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -333,6 +333,28 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
: constantEq(a, ra.relas, b, rb.relas);
}
+template <class RelTy>
+static SmallVector<Symbol *> getReloc(const InputSection *sec,
+ Relocs<RelTy> relocs) {
+ SmallVector<Symbol *> syms;
+ for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) {
+ Symbol &sym = sec->file->getRelocTargetSym(*ri);
+ syms.push_back(&sym);
+ }
+ return syms;
+}
+
+template <class ELFT>
+static SmallVector<Symbol *> getRelocTargetSyms(const InputSection *sec) {
+ const RelsOrRelas<ELFT> rel = sec->template relsOrRelas<ELFT>();
+ if (rel.areRelocsCrel())
+ return getReloc(sec, rel.crels);
+ if (rel.areRelocsRel())
+ return getReloc(sec, rel.rels);
+
+ return getReloc(sec, rel.relas);
+}
+
// Compare two lists of relocations. Returns true if all pairs of
// relocations point to the same section in terms of ICF.
template <class ELFT>
@@ -535,14 +557,35 @@ template <class ELFT> void ICF<ELFT>::run() {
auto print = [&ctx = ctx]() -> ELFSyncStream {
return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None};
};
+
+ DenseMap<Symbol *, Symbol *> symbolMap;
// Merge sections by the equivalence class.
+ // Merge symbols identified as equivalent during ICF
forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {
if (end - begin == 1)
return;
print() << "selected section " << sections[begin];
+ SmallVector<Symbol *> syms = getRelocTargetSyms<ELFT>(sections[begin]);
for (size_t i = begin + 1; i < end; ++i) {
print() << " removing identical section " << sections[i];
sections[begin]->replace(sections[i]);
+ SmallVector<Symbol *> replacedSyms =
+ getRelocTargetSyms<ELFT>(sections[i]);
+ assert(syms.size() == replacedSyms.size() &&
+ "Should have same number of syms!");
+ for (size_t i = 0; i < syms.size(); i++) {
+ if (syms[i] == replacedSyms[i] || !syms[i]->isGlobal() ||
+ !replacedSyms[i]->isGlobal())
+ continue;
+ auto [it, inserted] =
+ symbolMap.insert(std::make_pair(replacedSyms[i], syms[i]));
+ print() << " selected symbol: " << syms[i]->getName().data()
+ << "; replaced symbol: " << replacedSyms[i]->getName().data();
+ if (!inserted) {
+ print() << " replacement already exists: "
+ << it->getSecond()->getName().data();
+ }
+ }
// At this point we know sections merged are fully identical and hence
// we want to remove duplicate implicit dependencies such as link order
@@ -561,11 +604,17 @@ template <class ELFT> void ICF<ELFT>::run() {
d->folded = true;
}
};
- for (Symbol *sym : ctx.symtab->getSymbols())
+ for (Symbol *sym : ctx.symtab->getSymbols()) {
fold(sym);
+ if (Symbol *s = symbolMap.lookup(sym))
+ ctx.symtab->redirect(sym, s);
+ }
parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) {
for (Symbol *sym : file->getLocalSymbols())
fold(sym);
+ for (Symbol *&sym : file->getMutableGlobalSymbols())
+ if (Symbol *s = symbolMap.lookup(sym))
+ sym = s;
});
// InputSectionDescription::sections is populated by processSectionCommands().
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index b8a70d4e898fc..2199f159692b0 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -29,6 +29,13 @@ using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
+void SymbolTable::redirect(Symbol *from, Symbol *to) {
+ int &fromIdx = symMap[CachedHashStringRef(from->getName())];
+ const int toIdx = symMap[CachedHashStringRef(to->getName())];
+
+ fromIdx = toIdx;
+}
+
void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
// Redirect __real_foo to the original foo and foo to the original __wrap_foo.
int &idx1 = symMap[CachedHashStringRef(sym->getName())];
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index d6443742f7baa..e3a39bac85f97 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -41,6 +41,7 @@ class SymbolTable {
SymbolTable(Ctx &ctx) : ctx(ctx) {}
ArrayRef<Symbol *> getSymbols() const { return symVector; }
+ void redirect(Symbol *from, Symbol *to);
void wrap(Symbol *sym, Symbol *real, Symbol *wrap);
Symbol *insert(StringRef name);
diff --git a/lld/test/ELF/aarch64-got-merging-icf.s b/lld/test/ELF/aarch64-got-merging-icf.s
new file mode 100644
index 0000000000000..9f359cbf3a0a9
--- /dev/null
+++ b/lld/test/ELF/aarch64-got-merging-icf.s
@@ -0,0 +1,68 @@
+// REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t
+# RUN: ld.lld %t -o %t2 --icf=all
+# RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=EXE
+
+# RUN: ld.lld -shared %t -o %t3 --icf=all
+# RUN: llvm-objdump --section-headers %t3 | FileCheck %s --check-prefix=DSO
+
+## All .rodata.* sections should merge into a single GOT entry
+# EXE: {{.*}}.got 00000008{{.*}}
+
+## When symbols are preemptible in DSO mode, GOT entries wouldn't be merged
+# DSO: {{.*}}.got 00000020{{.*}}
+
+.addrsig
+
+callee:
+ret
+
+.section .rodata.dummy1,"a",@progbits
+sym1:
+.long 111
+.long 122
+.byte 123
+
+.section .rodata.dummy2,"a",@progbits
+sym2:
+.long 111
+.long 122
+sym3:
+.byte 123
+
+.macro f, index
+
+.section .text.f1_\index,"ax",@progbits
+f1_\index:
+adrp x0, :got:g\index
+mov x1, #\index
+b f2_\index
+
+.section .text.f2_\index,"ax",@progbits
+f2_\index:
+ldr x0, [x0, :got_lo12:g\index]
+b callee
+
+.globl g\index
+.section .rodata.g\index,"a",@progbits
+g_\index:
+.long 111
+.long 122
+
+g\index:
+.byte 123
+
+.section .text._start,"ax",@progbits
+bl f1_\index
+
+.endm
+
+.section .text._start,"ax",@progbits
+.globl _start
+_start:
+
+f 0
+f 1
+f 2
+f 3
diff --git a/lld/test/ELF/icf-preemptible.s b/lld/test/ELF/icf-preemptible.s
index 4bd1eca438b19..f79cf73b911ba 100644
--- a/lld/test/ELF/icf-preemptible.s
+++ b/lld/test/ELF/icf-preemptible.s
@@ -11,12 +11,15 @@
# EXE-NOT: {{.}}
# EXE: selected section {{.*}}:(.text.g1)
# EXE-NEXT: removing identical section {{.*}}:(.text.g2)
+# EXE-NEXT: selected symbol: f1; replaced symbol: f2
# EXE-NEXT: removing identical section {{.*}}:(.text.g3)
# EXE-NEXT: selected section {{.*}}:(.text.f1)
# EXE-NEXT: removing identical section {{.*}}:(.text.f2)
# EXE-NEXT: selected section {{.*}}:(.text.h1)
# EXE-NEXT: removing identical section {{.*}}:(.text.h2)
+# EXE-NEXT: selected symbol: g1; replaced symbol: g2
# EXE-NEXT: removing identical section {{.*}}:(.text.h3)
+# EXE-NEXT: selected symbol: g1; replaced symbol: g3
# EXE-NOT: {{.}}
## Definitions are preemptible in a DSO. Only leaf functions can be folded.
|
Once this is merged, I am going to send another PR to fix an issue that was uncovered by James' original diff to disable merging two sections when one refer to a global symbol, while other refer to a local symbol. JFYI. |
(Apologies. I will have limited internet access between April 20th and May 4th, and my response time may be delayed..) |
assert(syms.size() == replacedSyms.size() && | ||
"Should have same number of syms!"); | ||
for (size_t i = 0; i < syms.size(); i++) { | ||
if (syms[i] == replacedSyms[i] || !syms[i]->isGlobal() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you're checking isGlobal only here, and not in variableEq, doesn't that mean that any non-trivial relocation (such as GOT) which references a local symbol will remain broken? Maybe ELF producers don't need to create a GOT reloc pointing to a local symbol, but it's not invalid to do so.
We can't (at least not right now) get rid of the isGlobal check, because we cannot actually canonicalize local symbols across TUs, since code doing loops over getLocalSymbols()
for each file in ctx.objectFiles
expects to see no duplicates. (which was a bug in my original version).
So I think we need to add a check in variableEq, if((!sa.isGlobal() || !sb.isGlobal()) && !ra->"isTrivialRelocType"()) return false;
. (I made up "isTrivialRelocType"; do we have a relevant predicate already that would determine if a relocation has semantics beyond just the offset?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, as I mentioned in my comment earlier, I plan to do that in a separate PR as it's going to change the behavior of two sections getting ICF-merged (which also changes a icf-ineligible.s
test as you noticed earlier).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK with me to do in another PR, as long as we're agreed the issue hasn't fully been fixed until that's also done.
The change doesn't need to affect icf-ineligible, though, since that test uses only trivial relocations, where we don't need to replace the symbols.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I followed-up here in this PR: #136641
assert(syms.size() == replacedSyms.size() && | ||
"Should have same number of syms!"); | ||
for (size_t i = 0; i < syms.size(); i++) { | ||
if (syms[i] == replacedSyms[i] || !syms[i]->isGlobal() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK with me to do in another PR, as long as we're agreed the issue hasn't fully been fixed until that's also done.
The change doesn't need to affect icf-ineligible, though, since that test uses only trivial relocations, where we don't need to replace the symbols.
lld/test/ELF/icf-preemptible.s
Outdated
# EXE-NOT: {{.}} | ||
# EXE-NEXT: Redirecting f2 to f1 | ||
# EXE-NEXT: Redirecting g2 to g1 | ||
# EXE-NEXT: Redirecting g3 to g1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could add EXE-NOT: {{.}}
to ensure that there is no new line below (to catch bugs like printing redirecting
multiple times)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed that earlier because we are redirecting these symbols twice -- once in the symtab and again in the ELFfile. I have changed the test and messages to better reflect that. I have also added EXE-NOT: {{.}}
back in.
✅ With the latest revision this PR passed the C/C++ code formatter. |
Fixes a correctness issue for AArch64 when ADRP and LDR instructions are outlined in separate sections and sections are fed to ICF for deduplication.
See test case (based on #129122) for details. All rodata.* sections are folded into a single section with ICF. This leads to all f2_* function sections getting folded into one (as their relocation target symbols g* belong to .rodata.g* sections that have already been folded into one). Since relocations still refer original g* symbols, we end up creating duplicate GOT entry for all such symbols. This PR addresses that by tracking such folded symbols and create one GOT entry for all such symbols.
Fixes #129122
Co-authored by: @jyknight