Skip to content

[lld][ELF] Merge equivalent symbols found during ICF #139493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 109 additions & 15 deletions lld/ELF/ICF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,12 @@
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "Relocations.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/Parallel.h"
Expand All @@ -104,18 +107,23 @@ template <class ELFT> class ICF {
void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant);

template <class RelTy>
bool constantEq(const InputSection *a, Relocs<RelTy> relsA,
const InputSection *b, Relocs<RelTy> relsB);
bool constantEq(InputSection *a, Relocs<RelTy> relsA, InputSection *b,
Relocs<RelTy> relsB);

template <class RelTy>
bool variableEq(const InputSection *a, Relocs<RelTy> relsA,
const InputSection *b, Relocs<RelTy> relsB);
bool variableEq(InputSection *a, Relocs<RelTy> relsA, InputSection *b,
Relocs<RelTy> relsB);

bool equalsConstant(const InputSection *a, const InputSection *b);
bool equalsVariable(const InputSection *a, const InputSection *b);
bool equalsConstant(InputSection *a, InputSection *b);
bool equalsVariable(InputSection *a, InputSection *b);

size_t findBoundary(size_t begin, size_t end);

// A relocation with side-effects is considered non-trivial. Eg: relocation
// creates GOT entry or TLS slot.
template <class RelTy>
bool isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc);

void forEachClassRange(size_t begin, size_t end,
llvm::function_ref<void(size_t, size_t)> fn);

Expand Down Expand Up @@ -234,11 +242,30 @@ void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase,
}
}

template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc) {
OffsetGetter getter(*a);
uint64_t offset = getter.get(ctx, reloc.r_offset);
RelExpr expr = ctx.target->getRelExpr(reloc.getType(ctx.arg.isMips64EL), s,
a->content().data() + offset);

if (needsGot(expr) || needsTls(s, expr))
return false;
return true;
}

// Two symbols referenced by relocations can be merged together safely
// when their addends are same.
static bool canMergeSymbols(uint64_t addA, uint64_t addB) {
return addA == addB;
}

// Compare two lists of relocations.
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra,
const InputSection *secB, Relocs<RelTy> rb) {
bool ICF<ELFT>::constantEq(InputSection *secA, Relocs<RelTy> ra,
InputSection *secB, Relocs<RelTy> rb) {
if (ra.size() != rb.size())
return false;
auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();
Expand Down Expand Up @@ -286,9 +313,14 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra,
// Relocations referring to InputSections are constant-equal if their
// section offsets are equal.
if (isa<InputSection>(da->section)) {
if (da->value + addA == db->value + addB)
if (da->value + addA == db->value + addB) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was not in #134342 . I have tried to understand why the logic is so complex.
Looks like https://reviews.llvm.org/D34094 , while supporting MergeInputSection, also introduced the untested condition for InputSection

    if (isa<InputSection>(DA->Section))
      return DA->Value + AddA == DB->Value + AddB;

I feel that we should switch to the safer da->value == db->value && addA == addB, which likely doesn't lose anything (I believe it's difficult for clang emitted code to utilize the relaxed condition.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think da->value + addA == db->value + addB is more readable. I can read it as if the address resolves to the same place, sections are considered same. It also takes care of section folding for more cases. Changing it to da->value == db->value && addA == addB would exclude some cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing it to da->value == db->value && addA == addB would exclude some cases.

The excluded cases are likely negligible. This condition is for InputSection (non merge string), where symbols defined not at offset 0 in -ffunction-sections -fdata-sections code are extremely rare. I don't know a case LLVM codegen would do this.

// For non-trivial relocations, if we cannot merge symbols together,
// we must not merge sections either.
if (!isTrivialRelocation(secA, sa, *rai) &&
!canMergeSymbols(addA, addB))
return false;
continue;
return false;
}
}

// Relocations referring to MergeInputSections are constant-equal if their
Expand All @@ -314,7 +346,7 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra,
// Compare "non-moving" part of two InputSections, namely everything
// except relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
bool ICF<ELFT>::equalsConstant(InputSection *a, InputSection *b) {
if (a->flags != b->flags || a->getSize() != b->getSize() ||
a->content() != b->content())
return false;
Expand All @@ -333,12 +365,35 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
: constantEq(a, ra.relas, b, rb.relas);
}

template <class ELFT, class RelTy>
static SmallVector<std::pair<Symbol *, uint64_t>>
getReloc(const InputSection *sec, Relocs<RelTy> relocs) {
SmallVector<std::pair<Symbol *, uint64_t>> syms;
for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) {
Symbol &sym = sec->file->getRelocTargetSym(*ri);
syms.emplace_back(&sym, getAddend<ELFT>(*ri));
}
return syms;
}

template <class ELFT>
static SmallVector<std::pair<Symbol *, uint64_t>>
getRelocTargetSyms(const InputSection *sec) {
const RelsOrRelas<ELFT> rel = sec->template relsOrRelas<ELFT>();
if (rel.areRelocsCrel())
return getReloc<ELFT>(sec, rel.crels);
if (rel.areRelocsRel())
return getReloc<ELFT>(sec, rel.rels);

return getReloc<ELFT>(sec, rel.relas);
}

// Compare two lists of relocations. Returns true if all pairs of
// relocations point to the same section in terms of ICF.
template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra,
const InputSection *secB, Relocs<RelTy> rb) {
bool ICF<ELFT>::variableEq(InputSection *secA, Relocs<RelTy> ra,
InputSection *secB, Relocs<RelTy> rb) {
assert(ra.size() == rb.size());

auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();
Expand All @@ -352,6 +407,15 @@ bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra,
auto *da = cast<Defined>(&sa);
auto *db = cast<Defined>(&sb);

// Prevent sections containing local symbols from merging into sections with
// global symbols, or vice-versa. This is to prevent local-global symbols
// getting merged into each other (done later in ICF). We do this as
// post-ICF passes cannot handle duplicates when iterating over local
// symbols. There are also assertions that prevent this.
if ((!da->isGlobal() || !db->isGlobal()) &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No test seems to cover this block . Actually I don't understand why we need this check (#134342 (comment)). If we merge two sections, symbols defined relative to them, even if global, can be merged as well, regardless whether there are GOT/TLS relocations.

If we delete the isTrivialRelocation check from #136641 , we can get rid of the OffsetGetter complexity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add the test for this.

Regarding why we need this, the issue was brought to the attention first here in this comment: #131630 (comment)

Quoting part of James' comment:

I also added a new check that ensures local symbols aren't merged with global symbols or vice-versa, otherwise assert(b->isLocal() && "should have been caught in initializeSymbols()"); is triggered by lld/test/ELF/icf-ineligible.s. (Instead, that test now fails because the sections referring to a local symbol aren't merged with those referring to the equivalent global symbol, as the test expects, but maybe that's OK.)

So we are not merging sections (and that automatically implies not merging their symbols) when there are local symbols involved. Note that we are only avoiding merging of such sections when it's a non-trivial relocation as that's when it will cause the original 'GOT merging' problem that motivated this series of changes.

Regarding deleting isTrivialRelocation(), I am currently using it in two places -- here and previous block. Both usages try to merge sections when only trivial relocations are involved as our original 'GOT merging' problem only occurs with non-trivial relocation.

Basically, the invariant we are trying to maintain here is:
When there's a non-trivial relocation, we shouldn't merge the sections unless their symbols can be merged

We can try to satisfy this invariant broadly by removing !isTrivialRelocation() check but that will fold less sections. I believe the current code satisfies the above invariant as tightly as possible to maximize section folding without causing 'GOT merging' problem.

!isTrivialRelocation(secA, sa, *rai))
return false;

// We already dealt with absolute and non-InputSection symbols in
// constantEq, and for InputSections we have already checked everything
// except the equivalence class.
Expand All @@ -375,7 +439,7 @@ bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra,

// Compare "moving" part of two InputSections, namely relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) {
bool ICF<ELFT>::equalsVariable(InputSection *a, InputSection *b) {
const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
if (ra.areRelocsCrel() || rb.areRelocsCrel())
Expand Down Expand Up @@ -537,14 +601,29 @@ template <class ELFT> void ICF<ELFT>::run() {
auto print = [&ctx = ctx]() -> ELFSyncStream {
return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None};
};

EquivalenceClasses<Symbol *> symbolEquivalence;
// Merge sections by the equivalence class.
// Merge symbols identified as equivalent during ICF.
forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {
if (end - begin == 1)
return;
print() << "selected section " << sections[begin];
SmallVector<std::pair<Symbol *, uint64_t>> syms =
getRelocTargetSyms<ELFT>(sections[begin]);
for (size_t i = begin + 1; i < end; ++i) {
print() << " removing identical section " << sections[i];
sections[begin]->replace(sections[i]);
SmallVector<std::pair<Symbol *, uint64_t>> replacedSyms =
getRelocTargetSyms<ELFT>(sections[i]);
assert(syms.size() == replacedSyms.size() &&
"Should have same number of syms!");
for (size_t j = 0; j < syms.size(); j++) {
if (!syms[j].first->isGlobal() || !replacedSyms[j].first->isGlobal() ||
!canMergeSymbols(syms[j].second, replacedSyms[j].second))
continue;
symbolEquivalence.unionSets(syms[j].first, replacedSyms[j].first);
}

// At this point we know sections merged are fully identical and hence
// we want to remove duplicate implicit dependencies such as link order
Expand All @@ -563,11 +642,26 @@ template <class ELFT> void ICF<ELFT>::run() {
d->folded = true;
}
};
for (Symbol *sym : ctx.symtab->getSymbols())
for (Symbol *sym : ctx.symtab->getSymbols()) {
fold(sym);
auto it = symbolEquivalence.findLeader(sym);
if (it != symbolEquivalence.member_end() && *it != sym) {
print() << "redirecting '" << sym->getName() << "' in symtab to '"
<< (*it)->getName() << "'";
ctx.symtab->redirect(sym, *it);
}
}
parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) {
for (Symbol *sym : file->getLocalSymbols())
fold(sym);
for (Symbol *&sym : file->getMutableGlobalSymbols()) {
auto it = symbolEquivalence.findLeader(sym);
if (it != symbolEquivalence.member_end() && *it != sym) {
print() << "redirecting '" << sym->getName() << "' to '"
<< (*it)->getName() << "'";
sym = *it;
}
}
});

// InputSectionDescription::sections is populated by processSectionCommands().
Expand Down
43 changes: 43 additions & 0 deletions lld/ELF/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,49 @@ class SyntheticSection : public InputSection {
}
};

class OffsetGetter {
public:
OffsetGetter() = default;
explicit OffsetGetter(InputSectionBase &sec) {
if (auto *eh = dyn_cast<EhInputSection>(&sec)) {
cies = eh->cies;
fdes = eh->fdes;
i = cies.begin();
j = fdes.begin();
}
}

// Translates offsets in input sections to offsets in output sections.
// Given offset must increase monotonically. We assume that Piece is
// sorted by inputOff.
uint64_t get(Ctx &ctx, uint64_t off) {
if (cies.empty())
return off;

while (j != fdes.end() && j->inputOff <= off)
++j;
auto it = j;
if (j == fdes.begin() || j[-1].inputOff + j[-1].size <= off) {
while (i != cies.end() && i->inputOff <= off)
++i;
if (i == cies.begin() || i[-1].inputOff + i[-1].size <= off) {
Err(ctx) << ".eh_frame: relocation is not in any piece";
return 0;
}
it = i;
}

// Offset -1 means that the piece is dead (i.e. garbage collected).
if (it[-1].outputOff == -1)
return -1;
return it[-1].outputOff + (off - it[-1].inputOff);
}

private:
ArrayRef<EhSectionPiece> cies, fdes;
ArrayRef<EhSectionPiece>::iterator i, j;
};

inline bool isStaticRelSecType(uint32_t type) {
return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL ||
type == llvm::ELF::SHT_REL;
Expand Down
77 changes: 1 addition & 76 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,29 +127,6 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n,
}
}

// Build a bitmask with one bit set for each 64 subset of RelExpr.
static constexpr uint64_t buildMask() { return 0; }

template <typename... Tails>
static constexpr uint64_t buildMask(int head, Tails... tails) {
return (0 <= head && head < 64 ? uint64_t(1) << head : 0) |
buildMask(tails...);
}

// Return true if `Expr` is one of `Exprs`.
// There are more than 64 but less than 128 RelExprs, so we divide the set of
// exprs into [0, 64) and [64, 128) and represent each range as a constant
// 64-bit mask. Then we decide which mask to test depending on the value of
// expr and use a simple shift and bitwise-and to test for membership.
template <RelExpr... Exprs> static bool oneof(RelExpr expr) {
assert(0 <= expr && (int)expr < 128 &&
"RelExpr is too large for 128-bit mask!");

if (expr >= 64)
return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...);
return (uint64_t(1) << expr) & buildMask(Exprs...);
}

static RelType getMipsPairType(RelType type, bool isLocal) {
switch (type) {
case R_MIPS_HI16:
Expand Down Expand Up @@ -196,15 +173,6 @@ static bool needsPlt(RelExpr expr) {
RE_PPC64_CALL_PLT>(expr);
}

bool lld::elf::needsGot(RelExpr expr) {
return oneof<R_GOT, RE_AARCH64_AUTH_GOT, RE_AARCH64_AUTH_GOT_PC, R_GOT_OFF,
RE_MIPS_GOT_LOCAL_PAGE, RE_MIPS_GOT_OFF, RE_MIPS_GOT_OFF32,
RE_AARCH64_GOT_PAGE_PC, RE_AARCH64_AUTH_GOT_PAGE_PC,
RE_AARCH64_AUTH_GOT_PAGE_PC, R_GOT_PC, R_GOTPLT,
RE_AARCH64_GOT_PAGE, RE_LOONGARCH_GOT, RE_LOONGARCH_GOT_PAGE_PC>(
expr);
}

// True if this expression is of the form Sym - X, where X is a position in the
// file (PC, or GOT for example).
static bool isRelExpr(RelExpr expr) {
Expand Down Expand Up @@ -403,49 +371,6 @@ template <class ELFT> static void addCopyRelSymbol(Ctx &ctx, SharedSymbol &ss) {
//
// For sections other than .eh_frame, this class doesn't do anything.
namespace {
class OffsetGetter {
public:
OffsetGetter() = default;
explicit OffsetGetter(InputSectionBase &sec) {
if (auto *eh = dyn_cast<EhInputSection>(&sec)) {
cies = eh->cies;
fdes = eh->fdes;
i = cies.begin();
j = fdes.begin();
}
}

// Translates offsets in input sections to offsets in output sections.
// Given offset must increase monotonically. We assume that Piece is
// sorted by inputOff.
uint64_t get(Ctx &ctx, uint64_t off) {
if (cies.empty())
return off;

while (j != fdes.end() && j->inputOff <= off)
++j;
auto it = j;
if (j == fdes.begin() || j[-1].inputOff + j[-1].size <= off) {
while (i != cies.end() && i->inputOff <= off)
++i;
if (i == cies.begin() || i[-1].inputOff + i[-1].size <= off) {
Err(ctx) << ".eh_frame: relocation is not in any piece";
return 0;
}
it = i;
}

// Offset -1 means that the piece is dead (i.e. garbage collected).
if (it[-1].outputOff == -1)
return -1;
return it[-1].outputOff + (off - it[-1].inputOff);
}

private:
ArrayRef<EhSectionPiece> cies, fdes;
ArrayRef<EhSectionPiece>::iterator i, j;
};

// This class encapsulates states needed to scan relocations for one
// InputSectionBase.
class RelocationScanner {
Expand Down Expand Up @@ -1593,7 +1518,7 @@ void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) {
//
// Some RISCV TLSDESC relocations reference a local NOTYPE symbol,
// but we need to process them in handleTlsRelocation.
if (sym.isTls() || oneof<R_TLSDESC_PC, R_TLSDESC_CALL>(expr)) {
if (needsTls(sym, expr)) {
if (unsigned processed =
handleTlsRelocation(expr, type, offset, sym, addend)) {
i += processed - 1;
Expand Down
Loading