-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLD][COFF] Add support for ARM64EC auxiliary IAT #108304
Conversation
In addition to the regular IAT, ARM64EC also includes an auxiliary IAT. At runtime, the regular IAT is populated with the addresses of imported functions, which may be x86_64 functions or the export thunks of ARM64EC functions. The auxiliary IAT contains versions of functions that are guaranteed to be directly callable by ARM64 code. The linker fills the auxiliary IAT with the addresses of __impchk_ thunks. These thunks perform a call on the IAT address using __icall_helper_arm64ec with the target address from the IAT. If the imported function is an ARM64EC function, the OS may replace the address in the auxiliary IAT with the address of the ARM64EC version of the function (not its export thunk), avoiding the runtime call checker for better performance.
@llvm/pr-subscribers-lld @llvm/pr-subscribers-platform-windows Author: Jacek Caban (cjacek) ChangesIn addition to the regular IAT, ARM64EC also includes an auxiliary IAT. At runtime, the regular IAT is populated with the addresses of imported functions, which may be x86_64 functions or the export thunks of ARM64EC functions. The auxiliary IAT contains versions of functions that are guaranteed to be directly callable by ARM64 code. The linker fills the auxiliary IAT with the addresses of Full diff: https://github.com/llvm/llvm-project/pull/108304.diff 11 Files Affected:
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 5f00eaded76d3a..b00a59433319a7 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -142,6 +142,30 @@ class NullChunk : public NonSectionChunk {
size_t size;
};
+// A chunk for ARM64EC auxiliary IAT.
+class AuxImportChunk : public NonSectionChunk {
+public:
+ explicit AuxImportChunk(ImportFile *file) : file(file) {
+ setAlignment(sizeof(uint64_t));
+ }
+ size_t getSize() const override { return sizeof(uint64_t); }
+
+ void writeTo(uint8_t *buf) const override {
+ uint64_t impchkVA = 0;
+ if (file->impchkThunk)
+ impchkVA = file->impchkThunk->getRVA() + file->ctx.config.imageBase;
+ write64le(buf, impchkVA);
+ }
+
+ void getBaserels(std::vector<Baserel> *res) override {
+ if (file->impchkThunk)
+ res->emplace_back(rva, file->ctx.config.machine);
+ }
+
+private:
+ ImportFile *file;
+};
+
static std::vector<std::vector<DefinedImportData *>>
binImports(COFFLinkerContext &ctx,
const std::vector<DefinedImportData *> &imports) {
@@ -160,7 +184,15 @@ binImports(COFFLinkerContext &ctx,
// Sort symbols by name for each group.
std::vector<DefinedImportData *> &syms = kv.second;
llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) {
- return a->getName() < b->getName();
+ auto getBaseName = [](DefinedImportData *sym) {
+ StringRef name = sym->getName();
+ name.consume_front("__imp_");
+ // Skip aux_ part of ARM64EC function symbol name.
+ if (sym->file->impchkThunk)
+ name.consume_front("aux_");
+ return name;
+ };
+ return getBaseName(a) < getBaseName(b);
});
v.push_back(std::move(syms));
}
@@ -687,16 +719,24 @@ void IdataContents::create(COFFLinkerContext &ctx) {
if (s->getExternalName().empty()) {
lookups.push_back(make<OrdinalOnlyChunk>(ctx, ord));
addresses.push_back(make<OrdinalOnlyChunk>(ctx, ord));
- continue;
+ } else {
+ auto *c = make<HintNameChunk>(s->getExternalName(), ord);
+ lookups.push_back(make<LookupChunk>(ctx, c));
+ addresses.push_back(make<LookupChunk>(ctx, c));
+ hints.push_back(c);
+ }
+
+ if (s->file->impECSym) {
+ auto chunk = make<AuxImportChunk>(s->file);
+ auxIat.push_back(chunk);
+ s->file->impECSym->setLocation(chunk);
}
- auto *c = make<HintNameChunk>(s->getExternalName(), ord);
- lookups.push_back(make<LookupChunk>(ctx, c));
- addresses.push_back(make<LookupChunk>(ctx, c));
- hints.push_back(c);
}
// Terminate with null values.
lookups.push_back(make<NullChunk>(ctx.config.wordsize));
addresses.push_back(make<NullChunk>(ctx.config.wordsize));
+ if (ctx.config.machine == ARM64EC)
+ auxIat.push_back(make<NullChunk>(ctx.config.wordsize));
for (int i = 0, e = syms.size(); i < e; ++i)
syms[i]->setLocation(addresses[base + i]);
diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h
index 7cf71f59d7c7d7..48b0f17ca62aeb 100644
--- a/lld/COFF/DLL.h
+++ b/lld/COFF/DLL.h
@@ -31,6 +31,7 @@ class IdataContents {
std::vector<Chunk *> addresses;
std::vector<Chunk *> hints;
std::vector<Chunk *> dllNames;
+ std::vector<Chunk *> auxIat;
};
// Windows-specific.
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index a1fe6444991a36..9994639d5d364a 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2447,6 +2447,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
+ ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
ctx.symtab.addAbsolute("__hybrid_code_map", 0);
ctx.symtab.addAbsolute("__hybrid_code_map_count", 0);
ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0);
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 3dbdf8fe3920dc..569220468e96ad 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1071,19 +1071,39 @@ void ImportFile::parse() {
this->hdr = hdr;
externalName = extName;
- impSym = ctx.symtab.addImportData(impName, this);
+ bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
+
+ if (ctx.config.machine != ARM64EC) {
+ impSym = ctx.symtab.addImportData(impName, this, location);
+ } else {
+ // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
+ // which holds addresses that are guaranteed to be callable directly from
+ // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
+ // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
+ // data imports, the naming is reversed.
+ StringRef auxImpName = saver().save("__imp_aux_" + name);
+ if (isCode) {
+ impSym = ctx.symtab.addImportData(auxImpName, this, location);
+ impECSym = ctx.symtab.addImportData(impName, this, auxLocation);
+ } else {
+ impSym = ctx.symtab.addImportData(impName, this, location);
+ impECSym = ctx.symtab.addImportData(auxImpName, this, auxLocation);
+ }
+ if (!impECSym)
+ return;
+ }
// If this was a duplicate, we logged an error but may continue;
// in this case, impSym is nullptr.
if (!impSym)
return;
if (hdr->getType() == llvm::COFF::IMPORT_CONST)
- static_cast<void>(ctx.symtab.addImportData(name, this));
+ static_cast<void>(ctx.symtab.addImportData(name, this, location));
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
- if (hdr->getType() == llvm::COFF::IMPORT_CODE) {
+ if (isCode) {
if (ctx.config.machine != ARM64EC) {
thunkSym = ctx.symtab.addImportThunk(name, impSym, makeImportThunk());
} else {
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b837017e1c21b..8140a031f71166 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -362,6 +362,10 @@ class ImportFile : public InputFile {
const coff_import_header *hdr;
Chunk *location = nullptr;
+ // Auxiliary IAT symbol and chunk on ARM64EC.
+ DefinedImportData *impECSym = nullptr;
+ Chunk *auxLocation = nullptr;
+
// We want to eliminate dllimported symbols if no one actually refers to them.
// These "Live" bits are used to keep track of which import library members
// are actually in use.
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index a6575ecac3bb44..582a856213cbc8 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -584,7 +584,7 @@ void SymbolTable::initializeECThunks() {
Symbol *sym = exitThunks.lookup(file->thunkSym);
if (!sym)
- sym = exitThunks.lookup(file->impSym);
+ sym = exitThunks.lookup(file->impECSym);
file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
}
}
@@ -785,11 +785,12 @@ Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
return s;
}
-DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f) {
+DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
+ Chunk *&location) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
- replaceSymbol<DefinedImportData>(s, n, f);
+ replaceSymbol<DefinedImportData>(s, n, f, location);
return cast<DefinedImportData>(s);
}
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 13e151e3a8c501..bf97cf442039e0 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -103,7 +103,8 @@ class SymbolTable {
Symbol *addCommon(InputFile *f, StringRef n, uint64_t size,
const llvm::object::coff_symbol_generic *s = nullptr,
CommonChunk *c = nullptr);
- DefinedImportData *addImportData(StringRef n, ImportFile *f);
+ DefinedImportData *addImportData(StringRef n, ImportFile *f,
+ Chunk *&location);
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
ImportThunkChunk *chunk);
void addLibcall(StringRef name);
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index 724330e4bab958..2df60a01ec813d 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -354,23 +354,23 @@ class Undefined : public Symbol {
// table in an output. The former has "__imp_" prefix.
class DefinedImportData : public Defined {
public:
- DefinedImportData(StringRef n, ImportFile *f)
- : Defined(DefinedImportDataKind, n), file(f) {
- }
+ DefinedImportData(StringRef n, ImportFile *file, Chunk *&location)
+ : Defined(DefinedImportDataKind, n), file(file), location(location) {}
static bool classof(const Symbol *s) {
return s->kind() == DefinedImportDataKind;
}
- uint64_t getRVA() { return file->location->getRVA(); }
- Chunk *getChunk() { return file->location; }
- void setLocation(Chunk *addressTable) { file->location = addressTable; }
+ uint64_t getRVA() { return getChunk()->getRVA(); }
+ Chunk *getChunk() { return location; }
+ void setLocation(Chunk *addressTable) { location = addressTable; }
StringRef getDLLName() { return file->dllName; }
StringRef getExternalName() { return file->externalName; }
uint16_t getOrdinal() { return file->hdr->OrdinalHint; }
ImportFile *file;
+ Chunk *&location;
// This is a pointer to the synthetic symbol associated with the load thunk
// for this symbol that will be called if the DLL is delay-loaded. This is
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index b589a16bca32a3..9a8040008e73ca 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -914,6 +914,8 @@ void Writer::addSyntheticIdata() {
if (!idata.hints.empty())
add(".idata$6", idata.hints);
add(".idata$7", idata.dllNames);
+ if (!idata.auxIat.empty())
+ add(".idata$9", idata.auxIat);
}
void Writer::appendECImportTables() {
@@ -936,6 +938,15 @@ void Writer::appendECImportTables() {
rdataSec->contribSections.insert(rdataSec->contribSections.begin(),
importAddresses);
}
+
+ // The auxiliary IAT is always placed at the end of the .rdata section
+ // and is aligned to 4KB.
+ if (PartialSection *auxIat = findPartialSection(".idata$9", rdata)) {
+ auxIat->chunks.front()->setAlignment(0x1000);
+ rdataSec->chunks.insert(rdataSec->chunks.end(), auxIat->chunks.begin(),
+ auxIat->chunks.end());
+ rdataSec->addContributingPartialSection(auxIat);
+ }
}
// Locate the first Chunk and size of the import directory list and the
@@ -1095,7 +1106,8 @@ void Writer::createSections() {
// ARM64EC has specific placement and alignment requirements for the IAT.
// Delay adding its chunks until appendECImportTables.
- if (isArm64EC(ctx.config.machine) && pSec->name == ".idata$5")
+ if (isArm64EC(ctx.config.machine) &&
+ (pSec->name == ".idata$5" || pSec->name == ".idata$9"))
continue;
OutputSection *sec = createSection(name, outChars);
@@ -2254,6 +2266,11 @@ void Writer::setECSymbols() {
Symbol *entryPointCountSym =
ctx.symtab.findUnderscore("__arm64x_redirection_metadata_count");
cast<DefinedAbsolute>(entryPointCountSym)->setVA(exportThunks.size());
+
+ Symbol *iatSym = ctx.symtab.findUnderscore("__hybrid_auxiliary_iat");
+ replaceSymbol<DefinedSynthetic>(iatSym, "__hybrid_auxiliary_iat",
+ idata.auxIat.empty() ? nullptr
+ : idata.auxIat.front());
}
// Write section contents to a mmap'ed file.
diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
index 75dc6105301d00..8d59d29bbd0212 100644
--- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s
+++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
@@ -76,7 +76,7 @@ __chpe_metadata:
.rva __os_arm64x_check_icall
.rva __os_arm64x_check_icall_cfg
.word 0 // __arm64x_native_entrypoint
- .word 0 // __hybrid_auxiliary_iat
+ .rva __hybrid_auxiliary_iat
.word __x64_code_ranges_to_entry_points_count
.word __arm64x_redirection_metadata_count
.rva __os_arm64x_get_x64_information
diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test
index 44a84c09e11a36..f8279cefc3bcfb 100644
--- a/lld/test/COFF/arm64ec-import.test
+++ b/lld/test/COFF/arm64ec-import.test
@@ -63,14 +63,37 @@ DISASM-NEXT: 180002000: ff 25 02 10 00 00 jmpq *0x1002(%rip)
RUN: llvm-readobj --hex-dump=.test out.dll | FileCheck --check-prefix=TESTSEC %s
RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC %s
-TESTSEC: 0x180006000 08300000 00300000 10300000 20300000
-TESTSEC-NEXT: 0x180006010 08100000 1c100000 00200000
+TESTSEC: 0x180007000 08500000 00300000 10500000 20500000
+TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000
+TESTSEC-NEXT: 0x180007020 08100000 1c100000 00200000
RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s
HEADERS: LoadConfigTableRVA: 0x4010
HEADERS: IATRVA: 0x3000
HEADERS: IATSize: 0x1000
+RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFIG %s
+LOADCONFIG: AuxiliaryIAT: 0x5000
+
+RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s
+RDATA: 0x180005000 00000000 00000000 08100080 01000000
+RDATA-NEXT: 0x180005010 1c100080 01000000 00000000 00000000
+RDATA-NEXT: 0x180005020 30100080 01000000 00000000 00000000
+
+RUN: llvm-readobj --coff-basereloc out.dll | FileCheck -check-prefix=BASERELOC %s
+BASERELOC: BaseReloc [
+BASERELOC-NOT: Address: 0x5000
+BASERELOC: Address: 0x5008
+BASERELOC-NEXT: }
+BASERELOC-NEXT: Entry {
+BASERELOC-NEXT: Type: DIR64
+BASERELOC-NEXT: Address: 0x5010
+BASERELOC-NEXT: }
+BASERELOC-NEXT: Entry {
+BASERELOC-NEXT: Type: DIR64
+BASERELOC-NEXT: Address: 0x5020
+BASERELOC-NEXT: }
+
#--- test.s
.section .test, "r"
.globl arm64ec_data_sym
@@ -80,6 +103,10 @@ arm64ec_data_sym:
.rva __imp_data
.rva __imp_func2
.rva __imp_t2func
+ .rva __imp_aux_func
+ .rva __imp_aux_data
+ .rva __imp_aux_func2
+ .rva __imp_aux_t2func
.rva __impchk_func
.rva __impchk_func2
.rva func
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, looks good in general, mostly a couple of discussion points around this.
void writeTo(uint8_t *buf) const override { | ||
uint64_t impchkVA = 0; | ||
if (file->impchkThunk) | ||
impchkVA = file->impchkThunk->getRVA() + file->ctx.config.imageBase; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, so these aux IAT entries contain a full absolute address when on disk? That's unusual, but I guess it doesn't matter (wrt requiring runtime base relocations or not) as the loader needs to fill them in, in any case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, my guess is that it was just more convenient to use regular base relocations.
// which holds addresses that are guaranteed to be callable directly from | ||
// ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to | ||
// the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For | ||
// data imports, the naming is reversed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wow, this bit is highly surprising!
As there is such a stark difference, I guess it's also quite important that the distinction between what's data and what's code is made in quite exactly the same way as MS link, not only roughly the same way. But I guess as the short import library does contain a declaration of the type, it's not something we need to guess somehow.
As many mingw-w64 import libraries mark functions as "DATA", for cases where we don't want to call them (but still retain them in the import libraries, in the __imp_func
form), does this have any practical implication? Not much I guess, as we generally don't expect those functions to be called. (And I think recent cleanups there have moved away from marking functions as DATA.) I guess the implication is that if you'd call a DATA-marked function via the __imp_func
symbol, you'll get the x64 thunks, while if you'd call it via __imp_func
for a function that isn't marked DATA, you'd get the native arm64ec version?
How does this work with x86_64 code in an arm64ec module? When x86_64 code calls a dllimport function, it'd load the address from __imp_func
, won't that be the auxillary IAT, which contains a native arm64ec version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way mingw-w64 handles DATA in msvcrt .def files should mostly work fine. Using _fpreset
as an example: __imp__fpreset
would point to msvcrt's ARM64EC export thunk, which cannot be called directly. However, since we don’t declare it as dllimport, _fpreset from the static library will be called like any other non-imported function. The _fpreset
implementation looks like this:
void _fpreset (void)
{
(* __MINGW_IMP_SYMBOL(_fpreset))();
}
Here, the __imp_
symbol is treated as a pointer from the C compiler's perspective. On ARM64EC, any call via a pointer needs to pass through the call checker, so it's fine that __imp__fpreset
points to the thunk. The call checker invoked directly by the _fpreset
code will handle this.
However, there are some edge cases where this approach might cause problems. For example, if a caller defines _fpreset
with the dllimport
attribute, it would crash on ARM64EC, whereas it wouldn't on other platforms. This is a limitation of ARM64EC that we can't easily resolve (although, in this case, we could likely modify _fpreset
to avoid using DATA, but there are other situations where that won’t be feasible).
mingw-w64-crt employs various tricks with __imp_
symbols quite extensively. Most of this should work fine, but we should review these cases to ensure compatibility.
On a related note, we will likely need to consider defining __imp_aux_
in mingw-w64-crt as well. I created a Wine MR with an example of how this might be done, but it will be more complex for mingw-w64, where we use these symbols in more creative ways.
How does this work with x86_64 code in an arm64ec module? When x86_64 code calls a dllimport function, it'd load the address from __imp_func, won't that be the auxillary IAT, which contains a native arm64ec version?
According to my testing, the MSVC linker special-cases these symbols and redirects them to the __imp_aux_
symbol. I have a draft of its implementation queued here.
@@ -362,6 +362,10 @@ class ImportFile : public InputFile { | |||
const coff_import_header *hdr; | |||
Chunk *location = nullptr; | |||
|
|||
// Auxiliary IAT symbol and chunk on ARM64EC. | |||
DefinedImportData *impECSym = nullptr; | |||
Chunk *auxLocation = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was about to say that I don't see anything that sets auxLocation
within this patch, but I see that we're passing it by reference to DefinedImportData
, so I guess it gets set that way?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's filled with setLocation
in IdataContents::create
, like other DefinedImportData
symbols.
In addition to the regular IAT, ARM64EC also includes an auxiliary IAT. At runtime, the regular IAT is populated with the addresses of imported functions, which may be x86_64 functions or the export thunks of ARM64EC functions. The auxiliary IAT contains versions of functions that are guaranteed to be directly callable by ARM64 code.
The linker fills the auxiliary IAT with the addresses of
__impchk_
thunks. These thunks perform a call on the IAT address using__icall_helper_arm64ec
with the target address from the IAT. If the imported function is an ARM64EC function, the OS may replace the address in the auxiliary IAT with the address of the ARM64EC version of the function (not its export thunk), avoiding the runtime call checker for better performance.