Skip to content

Commit d8283d9

Browse files
committed
[lld-macho][nfc] Give every SyntheticSection a fake InputSection
Previously, it was difficult to write code that handled both synthetic and regular sections generically. We solve this problem by creating a fake InputSection at the start of every SyntheticSection. This refactor allows us to handle DSOHandle like a regular Defined symbol (since Defined symbols must be attached to an InputSection), and paves the way for supporting `__mh_*header` symbols. Additionally, it simplifies our binding/rebase code. I did have to extend Defined a little -- it now has a `linkerInternal` flag, to indicate that `___dso_handle` should not be in the final symbol table. I've also added some additional testing for `___dso_handle`. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D98545
1 parent dc8bee9 commit d8283d9

File tree

9 files changed

+94
-131
lines changed

9 files changed

+94
-131
lines changed

lld/MachO/Driver.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,14 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
10311031
}
10321032

10331033
createSyntheticSections();
1034-
symtab->addDSOHandle(in.header);
1034+
1035+
// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
1036+
// which does e.g. cleanup of static global variables. The ABI document says
1037+
// that the pointer can point to any address in one of the dylib's segments,
1038+
// but in practice ld64 seems to set it to point to the header, so that's
1039+
// what's implemented here.
1040+
symtab->addSynthetic("___dso_handle", in.header->isec, 0,
1041+
/*privateExtern=*/true, /*linkerInternal=*/true);
10351042

10361043
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
10371044
StringRef segName = arg->getValue(0);

lld/MachO/SymbolTable.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
3737
return {sym, true};
3838
}
3939

40-
Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
41-
InputSection *isec, uint32_t value,
42-
bool isWeakDef, bool isPrivateExtern) {
40+
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
41+
InputSection *isec, uint32_t value,
42+
bool isWeakDef, bool isPrivateExtern) {
4343
Symbol *s;
4444
bool wasInserted;
4545
bool overridesWeakDef = false;
@@ -52,7 +52,7 @@ Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
5252
// If one of them isn't private extern, the merged symbol isn't.
5353
if (defined->isWeakDef())
5454
defined->privateExtern &= isPrivateExtern;
55-
return s;
55+
return defined;
5656
}
5757
if (!defined->isWeakDef()) {
5858
error("duplicate symbol: " + name + "\n>>> defined in " +
@@ -70,7 +70,7 @@ Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
7070
replaceSymbol<Defined>(s, name, file, isec, value, isWeakDef,
7171
/*isExternal=*/true, isPrivateExtern);
7272
defined->overridesWeakDef = overridesWeakDef;
73-
return s;
73+
return defined;
7474
}
7575

7676
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
@@ -158,18 +158,12 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
158158
return s;
159159
}
160160

161-
Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
162-
Symbol *s;
163-
bool wasInserted;
164-
std::tie(s, wasInserted) = insert(DSOHandle::name);
165-
if (!wasInserted) {
166-
// FIXME: Make every symbol (including absolute symbols) contain a
167-
// reference to their originating file, then add that file name to this
168-
// error message. dynamic_lookup symbols don't have an originating file.
169-
if (isa<Defined>(s))
170-
error("found defined symbol with illegal name " + DSOHandle::name);
171-
}
172-
replaceSymbol<DSOHandle>(s, header);
161+
Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
162+
uint32_t value, bool isPrivateExtern,
163+
bool isLinkerInternal) {
164+
Defined *s = addDefined(name, nullptr, isec, value, /*isWeakDef=*/false,
165+
isPrivateExtern);
166+
s->linkerInternal = isLinkerInternal;
173167
return s;
174168
}
175169

lld/MachO/SymbolTable.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class ObjFile;
2424
class InputSection;
2525
class MachHeaderSection;
2626
class Symbol;
27+
class Defined;
2728
class Undefined;
2829

2930
/*
@@ -34,8 +35,8 @@ class Undefined;
3435
*/
3536
class SymbolTable {
3637
public:
37-
Symbol *addDefined(StringRef name, InputFile *, InputSection *,
38-
uint32_t value, bool isWeakDef, bool isPrivateExtern);
38+
Defined *addDefined(StringRef name, InputFile *, InputSection *,
39+
uint32_t value, bool isWeakDef, bool isPrivateExtern);
3940

4041
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
4142

@@ -48,7 +49,8 @@ class SymbolTable {
4849
Symbol *addLazy(StringRef name, ArchiveFile *file,
4950
const llvm::object::Archive::Symbol &sym);
5051

51-
Symbol *addDSOHandle(const MachHeaderSection *);
52+
Defined *addSynthetic(StringRef name, InputSection *, uint32_t value,
53+
bool isPrivateExtern, bool isLinkerInternal);
5254

5355
ArrayRef<Symbol *> getSymbols() const { return symVector; }
5456
Symbol *find(StringRef name);

lld/MachO/Symbols.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,3 @@ uint64_t Defined::getFileOffset() const {
4545
}
4646

4747
void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); }
48-
49-
uint64_t DSOHandle::getVA() const { return header->addr; }
50-
51-
uint64_t DSOHandle::getFileOffset() const { return header->fileOff; }
52-
53-
constexpr StringRef DSOHandle::name;

lld/MachO/Symbols.h

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class Symbol {
3939
CommonKind,
4040
DylibKind,
4141
LazyKind,
42-
DSOHandleKind,
4342
};
4443

4544
virtual ~Symbol() {}
@@ -100,7 +99,7 @@ class Defined : public Symbol {
10099
bool isWeakDef, bool isExternal, bool isPrivateExtern)
101100
: Symbol(DefinedKind, name, file), isec(isec), value(value),
102101
overridesWeakDef(false), privateExtern(isPrivateExtern),
103-
weakDef(isWeakDef), external(isExternal) {}
102+
linkerInternal(false), weakDef(isWeakDef), external(isExternal) {}
104103

105104
bool isWeakDef() const override { return weakDef; }
106105
bool isExternalWeakDef() const {
@@ -123,7 +122,10 @@ class Defined : public Symbol {
123122
uint32_t value;
124123

125124
bool overridesWeakDef : 1;
125+
// Whether this symbol should appear in the output binary's export trie.
126126
bool privateExtern : 1;
127+
// Whether this symbol should appear in the output binary's symbol table.
128+
bool linkerInternal : 1;
127129

128130
private:
129131
const bool weakDef : 1;
@@ -228,44 +230,12 @@ class LazySymbol : public Symbol {
228230
const llvm::object::Archive::Symbol sym;
229231
};
230232

231-
// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
232-
// does e.g. cleanup of static global variables. The ABI document says that the
233-
// pointer can point to any address in one of the dylib's segments, but in
234-
// practice ld64 seems to set it to point to the header, so that's what's
235-
// implemented here.
236-
//
237-
// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
238-
// tested this on an ARM platform.
239-
//
240-
// DSOHandle effectively functions like a Defined symbol, but it doesn't belong
241-
// to an InputSection.
242-
class DSOHandle : public Symbol {
243-
public:
244-
DSOHandle(const MachHeaderSection *header)
245-
: Symbol(DSOHandleKind, name, nullptr), header(header) {}
246-
247-
const MachHeaderSection *header;
248-
249-
uint64_t getVA() const override;
250-
251-
uint64_t getFileOffset() const override;
252-
253-
bool isWeakDef() const override { return false; }
254-
255-
bool isTlv() const override { return false; }
256-
257-
static constexpr StringRef name = "___dso_handle";
258-
259-
static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; }
260-
};
261-
262233
union SymbolUnion {
263234
alignas(Defined) char a[sizeof(Defined)];
264235
alignas(Undefined) char b[sizeof(Undefined)];
265236
alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
266237
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
267238
alignas(LazySymbol) char e[sizeof(LazySymbol)];
268-
alignas(DSOHandle) char f[sizeof(DSOHandle)];
269239
};
270240

271241
template <typename T, typename... ArgT>

lld/MachO/SyntheticSections.cpp

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ std::vector<SyntheticSection *> macho::syntheticSections;
4242

4343
SyntheticSection::SyntheticSection(const char *segname, const char *name)
4444
: OutputSection(SyntheticKind, name), segname(segname) {
45+
isec = make<InputSection>();
46+
isec->segname = segname;
47+
isec->name = name;
48+
isec->parent = this;
49+
isec->outSecOff = 0;
4550
syntheticSections.push_back(this);
4651
}
4752

@@ -118,12 +123,6 @@ void MachHeaderSection::writeTo(uint8_t *buf) const {
118123
PageZeroSection::PageZeroSection()
119124
: SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
120125

121-
uint64_t Location::getVA() const {
122-
if (const auto *isec = section.dyn_cast<const InputSection *>())
123-
return isec->getVA() + offset;
124-
return section.get<const OutputSection *>()->addr + offset;
125-
}
126-
127126
RebaseSection::RebaseSection()
128127
: LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
129128

@@ -186,16 +185,11 @@ void RebaseSection::finalizeContents() {
186185
os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
187186

188187
llvm::sort(locations, [](const Location &a, const Location &b) {
189-
return a.getVA() < b.getVA();
188+
return a.isec->getVA() < b.isec->getVA();
190189
});
191-
for (const Location &loc : locations) {
192-
if (const auto *isec = loc.section.dyn_cast<const InputSection *>()) {
193-
encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os);
194-
} else {
195-
const auto *osec = loc.section.get<const OutputSection *>();
196-
encodeRebase(osec, loc.offset, lastRebase, os);
197-
}
198-
}
190+
for (const Location &loc : locations)
191+
encodeRebase(loc.isec->parent, loc.isec->outSecOff + loc.offset, lastRebase,
192+
os);
199193
if (lastRebase.consecutiveCount != 0)
200194
encodeDoRebase(lastRebase, os);
201195

@@ -218,7 +212,7 @@ void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
218212
assert(!sym->isInGot());
219213
sym->gotIndex = entries.size() - 1;
220214

221-
addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize);
215+
addNonLazyBindingEntries(sym, isec, sym->gotIndex * WordSize);
222216
}
223217
}
224218

@@ -336,14 +330,9 @@ void BindingSection::finalizeContents() {
336330
encodeDylibOrdinal(ordinal, os);
337331
lastBinding.ordinal = ordinal;
338332
}
339-
if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
340-
encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
341-
b.addend, /*isWeakBinding=*/false, lastBinding, os);
342-
} else {
343-
auto *osec = b.target.section.get<const OutputSection *>();
344-
encodeBinding(b.dysym, osec, b.target.offset, b.addend,
345-
/*isWeakBinding=*/false, lastBinding, os);
346-
}
333+
encodeBinding(b.dysym, b.target.isec->parent,
334+
b.target.isec->outSecOff + b.target.offset, b.addend,
335+
/*isWeakBinding=*/false, lastBinding, os);
347336
}
348337
if (!bindings.empty())
349338
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
@@ -369,16 +358,10 @@ void WeakBindingSection::finalizeContents() {
369358
[](const WeakBindingEntry &a, const WeakBindingEntry &b) {
370359
return a.target.getVA() < b.target.getVA();
371360
});
372-
for (const WeakBindingEntry &b : bindings) {
373-
if (const auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
374-
encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset,
375-
b.addend, /*isWeakBinding=*/true, lastBinding, os);
376-
} else {
377-
const auto *osec = b.target.section.get<const OutputSection *>();
378-
encodeBinding(b.symbol, osec, b.target.offset, b.addend,
379-
/*isWeakBinding=*/true, lastBinding, os);
380-
}
381-
}
361+
for (const WeakBindingEntry &b : bindings)
362+
encodeBinding(b.symbol, b.target.isec->parent,
363+
b.target.isec->outSecOff + b.target.offset, b.addend,
364+
/*isWeakBinding=*/true, lastBinding, os);
382365
if (!bindings.empty() || !definitions.empty())
383366
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
384367
}
@@ -396,23 +379,21 @@ bool macho::needsBinding(const Symbol *sym) {
396379
}
397380

398381
void macho::addNonLazyBindingEntries(const Symbol *sym,
399-
SectionPointerUnion section,
400-
uint64_t offset, int64_t addend) {
382+
const InputSection *isec, uint64_t offset,
383+
int64_t addend) {
401384
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
402-
in.binding->addEntry(dysym, section, offset, addend);
385+
in.binding->addEntry(dysym, isec, offset, addend);
403386
if (dysym->isWeakDef())
404-
in.weakBinding->addEntry(sym, section, offset, addend);
387+
in.weakBinding->addEntry(sym, isec, offset, addend);
405388
} else if (auto *defined = dyn_cast<Defined>(sym)) {
406-
in.rebase->addEntry(section, offset);
389+
in.rebase->addEntry(isec, offset);
407390
if (defined->isExternalWeakDef())
408-
in.weakBinding->addEntry(sym, section, offset, addend);
409-
} else if (!isa<DSOHandle>(sym)) {
391+
in.weakBinding->addEntry(sym, isec, offset, addend);
392+
} else {
410393
// Undefined symbols are filtered out in scanRelocations(); we should never
411394
// get here
412395
llvm_unreachable("cannot bind to an undefined symbol");
413396
}
414-
// TODO: understand the DSOHandle case better.
415-
// Is it bindable? Add a new test?
416397
}
417398

418399
StubsSection::StubsSection()
@@ -538,7 +519,7 @@ void LazyBindingSection::writeTo(uint8_t *buf) const {
538519
void LazyBindingSection::addEntry(DylibSymbol *dysym) {
539520
if (entries.insert(dysym)) {
540521
dysym->stubsHelperIndex = entries.size() - 1;
541-
in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize);
522+
in.rebase->addEntry(in.lazyPointers->isec, dysym->stubsIndex * WordSize);
542523
}
543524
}
544525

@@ -572,9 +553,9 @@ void macho::prepareBranchTarget(Symbol *sym) {
572553
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
573554
if (in.stubs->addEntry(dysym)) {
574555
if (sym->isWeakDef()) {
575-
in.binding->addEntry(dysym, in.lazyPointers,
556+
in.binding->addEntry(dysym, in.lazyPointers->isec,
576557
sym->stubsIndex * WordSize);
577-
in.weakBinding->addEntry(sym, in.lazyPointers,
558+
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
578559
sym->stubsIndex * WordSize);
579560
} else {
580561
in.lazyBinding->addEntry(dysym);
@@ -583,8 +564,8 @@ void macho::prepareBranchTarget(Symbol *sym) {
583564
} else if (auto *defined = dyn_cast<Defined>(sym)) {
584565
if (defined->isExternalWeakDef()) {
585566
if (in.stubs->addEntry(sym)) {
586-
in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize);
587-
in.weakBinding->addEntry(sym, in.lazyPointers,
567+
in.rebase->addEntry(in.lazyPointers->isec, sym->stubsIndex * WordSize);
568+
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
588569
sym->stubsIndex * WordSize);
589570
}
590571
}
@@ -786,9 +767,10 @@ void SymtabSection::finalizeContents() {
786767

787768
for (Symbol *sym : symtab->getSymbols()) {
788769
if (auto *defined = dyn_cast<Defined>(sym)) {
770+
if (defined->linkerInternal)
771+
continue;
789772
assert(defined->isExternal());
790-
(void)defined;
791-
addSymbol(externalSymbols, sym);
773+
addSymbol(externalSymbols, defined);
792774
} else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
793775
if (dysym->isReferenced())
794776
addSymbol(undefinedSymbols, sym);

0 commit comments

Comments
 (0)