Skip to content

[LLD][COFF] Add support for ARM64EC import call thunks with extended range #109703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion lld/COFF/Chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,13 @@ void CHPERedirectionChunk::writeTo(uint8_t *buf) const {
ImportThunkChunkARM64EC::ImportThunkChunkARM64EC(ImportFile *file)
: ImportThunkChunk(file->ctx, file->impSym), file(file) {}

size_t ImportThunkChunkARM64EC::getSize() const {
if (!extended)
return sizeof(importThunkARM64EC);
// The last instruction is replaced with an inline range extension thunk.
return sizeof(importThunkARM64EC) + sizeof(arm64Thunk) - sizeof(uint32_t);
}

void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
memcpy(buf, importThunkARM64EC, sizeof(importThunkARM64EC));
applyArm64Addr(buf, file->impSym->getRVA(), rva, 12);
Expand All @@ -1116,7 +1123,29 @@ void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
applyArm64Imm(buf + 12, exitThunkRVA & 0xfff, 0);

Defined *helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16);
if (extended) {
// Replace last instruction with an inline range extension thunk.
memcpy(buf + 16, arm64Thunk, sizeof(arm64Thunk));
applyArm64Addr(buf + 16, helper->getRVA(), rva + 16, 12);
applyArm64Imm(buf + 20, helper->getRVA() & 0xfff, 0);
} else {
applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16);
}
}

bool ImportThunkChunkARM64EC::verifyRanges() {
if (extended)
return true;
auto helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
return isInt<28>(helper->getRVA() - rva - 16);
}

uint32_t ImportThunkChunkARM64EC::extendRanges() {
if (extended || verifyRanges())
return 0;
extended = true;
// The last instruction is replaced with an inline range extension thunk.
return sizeof(arm64Thunk) - sizeof(uint32_t);
}

} // namespace lld::coff
12 changes: 11 additions & 1 deletion lld/COFF/Chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ class NonSectionChunk : public Chunk {
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }

// Verify that chunk relocations are within their ranges.
virtual bool verifyRanges() { return true; };

// If needed, extend the chunk to ensure all relocations are within the
// allowed ranges. Return the additional space required for the extension.
virtual uint32_t extendRanges() { return 0; };

static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }

protected:
Expand Down Expand Up @@ -620,12 +627,15 @@ class ImportThunkChunkARM64 : public ImportThunkChunk {
class ImportThunkChunkARM64EC : public ImportThunkChunk {
public:
explicit ImportThunkChunkARM64EC(ImportFile *file);
size_t getSize() const override { return sizeof(importThunkARM64EC); };
size_t getSize() const override;
MachineTypes getMachine() const override { return ARM64EC; }
void writeTo(uint8_t *buf) const override;
bool verifyRanges() override;
uint32_t extendRanges() override;

Defined *exitThunk;
Defined *sym = nullptr;
bool extended = false;

private:
ImportFile *file;
Expand Down
13 changes: 11 additions & 2 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,14 @@ bool Writer::createThunks(OutputSection *os, int margin) {
// elements into it.
for (size_t i = 0; i != os->chunks.size(); ++i) {
SectionChunk *sc = dyn_cast<SectionChunk>(os->chunks[i]);
if (!sc)
if (!sc) {
auto chunk = cast<NonSectionChunk>(os->chunks[i]);
if (uint32_t size = chunk->extendRanges()) {
thunksSize += size;
addressesChanged = true;
}
continue;
}
MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1;

Expand Down Expand Up @@ -607,8 +613,11 @@ void Writer::createECCodeMap() {
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
for (Chunk *c : chunks) {
SectionChunk *sc = dyn_cast<SectionChunk>(c);
if (!sc)
if (!sc) {
if (!cast<NonSectionChunk>(c)->verifyRanges())
return false;
continue;
}
MachineTypes machine = sc->getMachine();

ArrayRef<coff_relocation> relocs = sc->getRelocs();
Expand Down
39 changes: 39 additions & 0 deletions lld/test/COFF/arm64ec-import-range-ext.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
REQUIRES: aarch64, x86
RUN: split-file %s %t.dir && cd %t.dir

RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test.s -o test.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
RUN: llvm-lib -machine:arm64ec -def:test.def -out:test.lib

RUN: lld-link -machine:arm64ec -dll -noentry -out:out.dll loadconfig-arm64ec.obj test.obj test.lib

RUN: llvm-objdump -d out.dll | FileCheck --check-prefix=DISASM %s
DISASM: 0000000180001000 <.text>:
DISASM-NEXT: 180001000: 52800000 mov w0, #0x0 // =0
DISASM-NEXT: 180001004: d65f03c0 ret
DISASM-NEXT: ...
DISASM-NEXT: 188001008: b000000b adrp x11, 0x188002000
DISASM-NEXT: 18800100c: f940016b ldr x11, [x11]
DISASM-NEXT: 188001010: f0fbffea adrp x10, 0x180000000
DISASM-NEXT: 188001014: 9100014a add x10, x10, #0x0
DISASM-NEXT: 188001018: 90fc0010 adrp x16, 0x180001000 <.text>
DISASM-NEXT: 18800101c: 91000210 add x16, x16, #0x0
DISASM-NEXT: 188001020: d61f0200 br x16

#--- test.s
.text
.globl __icall_helper_arm64ec
.p2align 2, 0x0
__icall_helper_arm64ec:
mov w0, #0
ret

.space 0x8000000

.data
.rva __imp_func
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this test even require a thunk? We're only referencing __imp_func, not func directly, so we should only need the raw IAT entry, no thunks at all? (I guess this is some of the other arm64ec aspects that we've already been through a couple times, that I haven't really internalized yet - sorry about that.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On ARM64EC, __imp_func references the auxiliary IAT, not the regular one. The auxiliary IAT is guaranteed to be directly callable by EC code, so the caller doesn’t need to use __icall_helper_arm64ec, as it would for indirect calls. Initially, the auxiliary IAT is filled with __impchk_* thunks (#107931), which handle the use of __icall_helper_arm64ec to call via the regular IAT. The OS may later update the auxiliary IAT entries to allow direct EC calls if the callee is also EC.

For ARM64EC, func references the x86 thunk that calls the regular IAT (__imp_aux_func), while #func references the ARM thunk that calls the auxiliary IAT (__imp_func). At runtime, if the imported function is x86, two thunks are involved: #func calls __imp_func, which points to __impchk_func, and this ultimately invokes the imported function pointer stored in __imp_aux_func using __icall_helper_arm64ec.


#--- test.def
NAME test.dll
EXPORTS
func
Loading