-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LLD][COFF] Add support for range extension thunks for ARM64EC targets. #106289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld-coff @llvm/pr-subscribers-platform-windows Author: Jacek Caban (cjacek) ChangesThunks themselves are the same as regular ARM64 thunks; they just need to report the correct machine type. When processing the code, we also need to use the current chunk's machine type instead of the global one: we don't want to treat x86_64 thunks as ARM64EC, and we need to report the correct machine type in hybrid binaries. Full diff: https://github.com/llvm/llvm-project/pull/106289.diff 4 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 72a9ad05ca11c1..386012e3ce8237 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -842,14 +842,9 @@ const uint8_t arm64Thunk[] = {
0x00, 0x02, 0x1f, 0xd6, // br x16
};
-size_t RangeExtensionThunkARM64::getSize() const {
- assert(ctx.config.machine == ARM64);
- (void)&ctx;
- return sizeof(arm64Thunk);
-}
+size_t RangeExtensionThunkARM64::getSize() const { return sizeof(arm64Thunk); }
void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
- assert(ctx.config.machine == ARM64);
memcpy(buf, arm64Thunk, sizeof(arm64Thunk));
applyArm64Addr(buf + 0, target->getRVA(), rva, 12);
applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index fe202008971a54..e8e647b01c74b5 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -617,18 +617,19 @@ class RangeExtensionThunkARM : public NonSectionCodeChunk {
class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
public:
- explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t)
- : target(t), ctx(ctx) {
+ explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
+ : target(t), machine(machine) {
setAlignment(4);
+ assert(llvm::COFF::isAnyArm64(machine));
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
- MachineTypes getMachine() const override { return ARM64; }
+ MachineTypes getMachine() const override { return machine; }
Defined *target;
private:
- COFFLinkerContext &ctx;
+ MachineTypes machine;
};
// Windows-specific.
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 35e0f98926ee8a..4a0eed4d00997e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -219,10 +219,12 @@ class Writer {
void sortECChunks();
void removeUnusedSections();
void assignAddresses();
- bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
+ bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+ MachineTypes machine);
std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
Defined *target, uint64_t p,
- uint16_t type, int margin);
+ uint16_t type, int margin,
+ MachineTypes machine);
bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks);
void createECCodeMap();
@@ -396,8 +398,9 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {
// Check whether the target address S is in range from a relocation
// of type relType at address P.
-bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
- if (ctx.config.machine == ARMNT) {
+bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+ MachineTypes machine) {
+ if (machine == ARMNT) {
int64_t diff = AbsoluteDifference(s, p + 4) + margin;
switch (relType) {
case IMAGE_REL_ARM_BRANCH20T:
@@ -408,7 +411,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
default:
return true;
}
- } else if (ctx.config.machine == ARM64) {
+ } else if (isAnyArm64(machine)) {
int64_t diff = AbsoluteDifference(s, p) + margin;
switch (relType) {
case IMAGE_REL_ARM64_BRANCH26:
@@ -421,7 +424,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
return true;
}
} else {
- llvm_unreachable("Unexpected architecture");
+ return true;
}
}
@@ -429,17 +432,17 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
// or create a new one.
std::pair<Defined *, bool>
Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
- uint64_t p, uint16_t type, int margin) {
+ uint64_t p, uint16_t type, int margin, MachineTypes machine) {
Defined *&lastThunk = lastThunks[target->getRVA()];
- if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin))
+ if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
return {lastThunk, false};
Chunk *c;
- switch (ctx.config.machine) {
- case ARMNT:
+ switch (getMachineArchType(machine)) {
+ case Triple::thumb:
c = make<RangeExtensionThunkARM>(ctx, target);
break;
- case ARM64:
- c = make<RangeExtensionThunkARM64>(ctx, target);
+ case Triple::aarch64:
+ c = make<RangeExtensionThunkARM64>(machine, target);
break;
default:
llvm_unreachable("Unexpected architecture");
@@ -471,6 +474,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
if (!sc)
continue;
+ MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1;
// Try to get a good enough estimate of where new thunks will be placed.
@@ -497,11 +501,12 @@ bool Writer::createThunks(OutputSection *os, int margin) {
uint64_t s = sym->getRVA();
- if (isInRange(rel.Type, s, p, margin))
+ if (isInRange(rel.Type, s, p, margin, machine))
continue;
// If the target isn't in range, hook it up to an existing or new thunk.
- auto [thunk, wasNew] = getThunk(lastThunks, sym, p, rel.Type, margin);
+ auto [thunk, wasNew] =
+ getThunk(lastThunks, sym, p, rel.Type, margin, machine);
if (wasNew) {
Chunk *thunkChunk = thunk->getChunk();
thunkChunk->setRVA(
@@ -603,6 +608,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
if (!sc)
continue;
+ MachineTypes machine = sc->getMachine();
ArrayRef<coff_relocation> relocs = sc->getRelocs();
for (const coff_relocation &rel : relocs) {
@@ -615,7 +621,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
uint64_t p = sc->getRVA() + rel.VirtualAddress;
uint64_t s = sym->getRVA();
- if (!isInRange(rel.Type, s, p, 0))
+ if (!isInRange(rel.Type, s, p, 0, machine))
return false;
}
}
@@ -625,7 +631,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
// Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() {
assignAddresses();
- if (ctx.config.machine != ARMNT && ctx.config.machine != ARM64)
+ if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
return;
size_t origNumChunks = 0;
diff --git a/lld/test/COFF/arm64ec-range-thunks.s b/lld/test/COFF/arm64ec-range-thunks.s
new file mode 100644
index 00000000000000..a4705a5208bd01
--- /dev/null
+++ b/lld/test/COFF/arm64ec-range-thunks.s
@@ -0,0 +1,179 @@
+# REQUIRES: aarch64, x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows funcs.s -o funcs-arm64ec.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+
+
+# Test generating range extension thunks for ARM64EC code. Place some x86_64 chunks in a middle
+# and make sure that thunks stay in ARM64EC code range.
+
+# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj space-x86_64.obj loadconfig-arm64ec.obj -out:test.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d test.dll | FileCheck --check-prefix=DISASM %s
+
+# DISASM: Disassembly of section .code1:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180003000 <.code1>:
+# DISASM-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASM-NEXT: 180003004: d65f03c0 ret
+# DISASM-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
+# DISASM-NEXT: 18000300c: 91000210 add x16, x16, #0x0
+# DISASM-NEXT: 180003010: d61f0200 br x16
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code2:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180004000 <.code2>:
+# DISASM-NEXT: ...
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code3:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180005000 <.code3>:
+# DISASM-NEXT: ...
+# DISASM-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASM-NEXT: 18000c004: d65f03c0 ret
+# DISASM-NEXT: 18000c008: 00000000 udf #0x0
+# DISASM-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
+# DISASM-NEXT: 18000c010: 91006210 add x16, x16, #0x18
+# DISASM-NEXT: 18000c014: d61f0200 br x16
+# DISASM-NEXT: ...
+# DISASM-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASM-NEXT: 18001401c: d65f03c0 ret
+# DISASM-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
+# DISASM-NEXT: 180014024: 91000210 add x16, x16, #0x0
+# DISASM-NEXT: 180014028: d61f0200 br x16
+
+# RUN: llvm-readobj --coff-load-config test.dll | FileCheck --check-prefix=LOADCFG %s
+
+# LOADCFG: CodeMap [
+# LOADCFG-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFG-NEXT: 0x4000 - 0x4300 X64
+# LOADCFG-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFG-NEXT: ]
+
+
+# A similar test using a hybrid binary and native placeholder chunks.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s
+
+# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s
+
+# LOADCFGX: CodeMap [
+# LOADCFGX-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFGX-NEXT: 0x4000 - 0x4300 ARM64
+# LOADCFGX-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFGX-NEXT: ]
+
+
+# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
+# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s
+
+# DISASMX: Disassembly of section .code1:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180003000 <.code1>:
+# DISASMX-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASMX-NEXT: 180003004: d65f03c0 ret
+# DISASMX-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
+# DISASMX-NEXT: 18000300c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180003010: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code2:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180004000 <.code2>:
+# DISASMX-NEXT: 180004000: 36000040 tbz w0, #0x0, 0x180004008 <.code2+0x8>
+# DISASMX-NEXT: 180004004: d65f03c0 ret
+# DISASMX-NEXT: 180004008: b0000090 adrp x16, 0x180015000
+# DISASMX-NEXT: 18000400c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180004010: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code3:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180005000 <.code3>:
+# DISASMX-NEXT: ...
+# DISASMX-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASMX-NEXT: 18000c004: d65f03c0 ret
+# DISASMX-NEXT: 18000c008: 00000000 udf #0x0
+# DISASMX-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
+# DISASMX-NEXT: 18000c010: 91006210 add x16, x16, #0x18
+# DISASMX-NEXT: 18000c014: d61f0200 br x16
+# DISASMX-NEXT: ...
+# DISASMX-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASMX-NEXT: 18001401c: d65f03c0 ret
+# DISASMX-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
+# DISASMX-NEXT: 180014024: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180014028: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code4:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180015000 <.code4>:
+# DISASMX-NEXT: 180015000: 36000040 tbz w0, #0x0, 0x180015008 <.code4+0x8>
+# DISASMX-NEXT: 180015004: d65f03c0 ret
+# DISASMX-NEXT: 180015008: f0ffff70 adrp x16, 0x180004000 <.code2>
+# DISASMX-NEXT: 18001500c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180015010: d61f0200 br x16
+
+# RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s
+
+# LOADCFGX2: CodeMap [
+# LOADCFGX2-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFGX2-NEXT: 0x4000 - 0x4014 ARM64
+# LOADCFGX2-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFGX2-NEXT: 0x15000 - 0x15014 ARM64
+# LOADCFGX2-NEXT: ]
+
+
+#--- funcs.s
+ .globl main
+ .globl func1
+ .globl func2
+ .section .code1, "xr"
+main:
+ tbz w0, #0, func1
+ ret
+ .section .code3$a, "xr"
+ .space 0x7000
+ .section .code3$b, "xr"
+func1:
+ tbz w0, #0, func2
+ ret
+ .space 1
+ .section .code3$c, "xr"
+ .space 0x8000
+ .section .code3$d, "xr"
+ .align 2
+func2:
+ tbz w0, #0, main
+ ret
+
+#--- space.s
+ .section .code2$a, "xr"
+ .space 0x100
+ .section .code2$b, "xr"
+ .space 0x100
+ .section .code2$c, "xr"
+ .space 0x100
+
+#--- native-funcs.s
+ .globl nmain
+ .globl nfunc
+ .section .code2, "xr"
+nmain:
+ tbz w0, #0, nfunc
+ ret
+ .section .code4, "xr"
+ .align 2
+nfunc:
+ tbz w0, #0, nmain
+ ret
|
@llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesThunks themselves are the same as regular ARM64 thunks; they just need to report the correct machine type. When processing the code, we also need to use the current chunk's machine type instead of the global one: we don't want to treat x86_64 thunks as ARM64EC, and we need to report the correct machine type in hybrid binaries. Full diff: https://github.com/llvm/llvm-project/pull/106289.diff 4 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 72a9ad05ca11c1..386012e3ce8237 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -842,14 +842,9 @@ const uint8_t arm64Thunk[] = {
0x00, 0x02, 0x1f, 0xd6, // br x16
};
-size_t RangeExtensionThunkARM64::getSize() const {
- assert(ctx.config.machine == ARM64);
- (void)&ctx;
- return sizeof(arm64Thunk);
-}
+size_t RangeExtensionThunkARM64::getSize() const { return sizeof(arm64Thunk); }
void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
- assert(ctx.config.machine == ARM64);
memcpy(buf, arm64Thunk, sizeof(arm64Thunk));
applyArm64Addr(buf + 0, target->getRVA(), rva, 12);
applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index fe202008971a54..e8e647b01c74b5 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -617,18 +617,19 @@ class RangeExtensionThunkARM : public NonSectionCodeChunk {
class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
public:
- explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t)
- : target(t), ctx(ctx) {
+ explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
+ : target(t), machine(machine) {
setAlignment(4);
+ assert(llvm::COFF::isAnyArm64(machine));
}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
- MachineTypes getMachine() const override { return ARM64; }
+ MachineTypes getMachine() const override { return machine; }
Defined *target;
private:
- COFFLinkerContext &ctx;
+ MachineTypes machine;
};
// Windows-specific.
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 35e0f98926ee8a..4a0eed4d00997e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -219,10 +219,12 @@ class Writer {
void sortECChunks();
void removeUnusedSections();
void assignAddresses();
- bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin);
+ bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+ MachineTypes machine);
std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
Defined *target, uint64_t p,
- uint16_t type, int margin);
+ uint16_t type, int margin,
+ MachineTypes machine);
bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks);
void createECCodeMap();
@@ -396,8 +398,9 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {
// Check whether the target address S is in range from a relocation
// of type relType at address P.
-bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
- if (ctx.config.machine == ARMNT) {
+bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
+ MachineTypes machine) {
+ if (machine == ARMNT) {
int64_t diff = AbsoluteDifference(s, p + 4) + margin;
switch (relType) {
case IMAGE_REL_ARM_BRANCH20T:
@@ -408,7 +411,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
default:
return true;
}
- } else if (ctx.config.machine == ARM64) {
+ } else if (isAnyArm64(machine)) {
int64_t diff = AbsoluteDifference(s, p) + margin;
switch (relType) {
case IMAGE_REL_ARM64_BRANCH26:
@@ -421,7 +424,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
return true;
}
} else {
- llvm_unreachable("Unexpected architecture");
+ return true;
}
}
@@ -429,17 +432,17 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
// or create a new one.
std::pair<Defined *, bool>
Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
- uint64_t p, uint16_t type, int margin) {
+ uint64_t p, uint16_t type, int margin, MachineTypes machine) {
Defined *&lastThunk = lastThunks[target->getRVA()];
- if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin))
+ if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
return {lastThunk, false};
Chunk *c;
- switch (ctx.config.machine) {
- case ARMNT:
+ switch (getMachineArchType(machine)) {
+ case Triple::thumb:
c = make<RangeExtensionThunkARM>(ctx, target);
break;
- case ARM64:
- c = make<RangeExtensionThunkARM64>(ctx, target);
+ case Triple::aarch64:
+ c = make<RangeExtensionThunkARM64>(machine, target);
break;
default:
llvm_unreachable("Unexpected architecture");
@@ -471,6 +474,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
if (!sc)
continue;
+ MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1;
// Try to get a good enough estimate of where new thunks will be placed.
@@ -497,11 +501,12 @@ bool Writer::createThunks(OutputSection *os, int margin) {
uint64_t s = sym->getRVA();
- if (isInRange(rel.Type, s, p, margin))
+ if (isInRange(rel.Type, s, p, margin, machine))
continue;
// If the target isn't in range, hook it up to an existing or new thunk.
- auto [thunk, wasNew] = getThunk(lastThunks, sym, p, rel.Type, margin);
+ auto [thunk, wasNew] =
+ getThunk(lastThunks, sym, p, rel.Type, margin, machine);
if (wasNew) {
Chunk *thunkChunk = thunk->getChunk();
thunkChunk->setRVA(
@@ -603,6 +608,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
if (!sc)
continue;
+ MachineTypes machine = sc->getMachine();
ArrayRef<coff_relocation> relocs = sc->getRelocs();
for (const coff_relocation &rel : relocs) {
@@ -615,7 +621,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
uint64_t p = sc->getRVA() + rel.VirtualAddress;
uint64_t s = sym->getRVA();
- if (!isInRange(rel.Type, s, p, 0))
+ if (!isInRange(rel.Type, s, p, 0, machine))
return false;
}
}
@@ -625,7 +631,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
// Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() {
assignAddresses();
- if (ctx.config.machine != ARMNT && ctx.config.machine != ARM64)
+ if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
return;
size_t origNumChunks = 0;
diff --git a/lld/test/COFF/arm64ec-range-thunks.s b/lld/test/COFF/arm64ec-range-thunks.s
new file mode 100644
index 00000000000000..a4705a5208bd01
--- /dev/null
+++ b/lld/test/COFF/arm64ec-range-thunks.s
@@ -0,0 +1,179 @@
+# REQUIRES: aarch64, x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows funcs.s -o funcs-arm64ec.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
+# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
+# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+
+
+# Test generating range extension thunks for ARM64EC code. Place some x86_64 chunks in a middle
+# and make sure that thunks stay in ARM64EC code range.
+
+# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj space-x86_64.obj loadconfig-arm64ec.obj -out:test.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d test.dll | FileCheck --check-prefix=DISASM %s
+
+# DISASM: Disassembly of section .code1:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180003000 <.code1>:
+# DISASM-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASM-NEXT: 180003004: d65f03c0 ret
+# DISASM-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
+# DISASM-NEXT: 18000300c: 91000210 add x16, x16, #0x0
+# DISASM-NEXT: 180003010: d61f0200 br x16
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code2:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180004000 <.code2>:
+# DISASM-NEXT: ...
+# DISASM-EMPTY:
+# DISASM-NEXT: Disassembly of section .code3:
+# DISASM-EMPTY:
+# DISASM-NEXT: 0000000180005000 <.code3>:
+# DISASM-NEXT: ...
+# DISASM-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASM-NEXT: 18000c004: d65f03c0 ret
+# DISASM-NEXT: 18000c008: 00000000 udf #0x0
+# DISASM-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
+# DISASM-NEXT: 18000c010: 91006210 add x16, x16, #0x18
+# DISASM-NEXT: 18000c014: d61f0200 br x16
+# DISASM-NEXT: ...
+# DISASM-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASM-NEXT: 18001401c: d65f03c0 ret
+# DISASM-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
+# DISASM-NEXT: 180014024: 91000210 add x16, x16, #0x0
+# DISASM-NEXT: 180014028: d61f0200 br x16
+
+# RUN: llvm-readobj --coff-load-config test.dll | FileCheck --check-prefix=LOADCFG %s
+
+# LOADCFG: CodeMap [
+# LOADCFG-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFG-NEXT: 0x4000 - 0x4300 X64
+# LOADCFG-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFG-NEXT: ]
+
+
+# A similar test using a hybrid binary and native placeholder chunks.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
+# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s
+
+# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s
+
+# LOADCFGX: CodeMap [
+# LOADCFGX-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFGX-NEXT: 0x4000 - 0x4300 ARM64
+# LOADCFGX-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFGX-NEXT: ]
+
+
+# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.
+
+# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
+# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
+# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes
+
+# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s
+
+# DISASMX: Disassembly of section .code1:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180003000 <.code1>:
+# DISASMX-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
+# DISASMX-NEXT: 180003004: d65f03c0 ret
+# DISASMX-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
+# DISASMX-NEXT: 18000300c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180003010: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code2:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180004000 <.code2>:
+# DISASMX-NEXT: 180004000: 36000040 tbz w0, #0x0, 0x180004008 <.code2+0x8>
+# DISASMX-NEXT: 180004004: d65f03c0 ret
+# DISASMX-NEXT: 180004008: b0000090 adrp x16, 0x180015000
+# DISASMX-NEXT: 18000400c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180004010: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code3:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180005000 <.code3>:
+# DISASMX-NEXT: ...
+# DISASMX-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
+# DISASMX-NEXT: 18000c004: d65f03c0 ret
+# DISASMX-NEXT: 18000c008: 00000000 udf #0x0
+# DISASMX-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
+# DISASMX-NEXT: 18000c010: 91006210 add x16, x16, #0x18
+# DISASMX-NEXT: 18000c014: d61f0200 br x16
+# DISASMX-NEXT: ...
+# DISASMX-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
+# DISASMX-NEXT: 18001401c: d65f03c0 ret
+# DISASMX-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
+# DISASMX-NEXT: 180014024: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180014028: d61f0200 br x16
+# DISASMX-EMPTY:
+# DISASMX-NEXT: Disassembly of section .code4:
+# DISASMX-EMPTY:
+# DISASMX-NEXT: 0000000180015000 <.code4>:
+# DISASMX-NEXT: 180015000: 36000040 tbz w0, #0x0, 0x180015008 <.code4+0x8>
+# DISASMX-NEXT: 180015004: d65f03c0 ret
+# DISASMX-NEXT: 180015008: f0ffff70 adrp x16, 0x180004000 <.code2>
+# DISASMX-NEXT: 18001500c: 91000210 add x16, x16, #0x0
+# DISASMX-NEXT: 180015010: d61f0200 br x16
+
+# RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s
+
+# LOADCFGX2: CodeMap [
+# LOADCFGX2-NEXT: 0x3000 - 0x3014 ARM64EC
+# LOADCFGX2-NEXT: 0x4000 - 0x4014 ARM64
+# LOADCFGX2-NEXT: 0x5000 - 0x1402C ARM64EC
+# LOADCFGX2-NEXT: 0x15000 - 0x15014 ARM64
+# LOADCFGX2-NEXT: ]
+
+
+#--- funcs.s
+ .globl main
+ .globl func1
+ .globl func2
+ .section .code1, "xr"
+main:
+ tbz w0, #0, func1
+ ret
+ .section .code3$a, "xr"
+ .space 0x7000
+ .section .code3$b, "xr"
+func1:
+ tbz w0, #0, func2
+ ret
+ .space 1
+ .section .code3$c, "xr"
+ .space 0x8000
+ .section .code3$d, "xr"
+ .align 2
+func2:
+ tbz w0, #0, main
+ ret
+
+#--- space.s
+ .section .code2$a, "xr"
+ .space 0x100
+ .section .code2$b, "xr"
+ .space 0x100
+ .section .code2$c, "xr"
+ .space 0x100
+
+#--- native-funcs.s
+ .globl nmain
+ .globl nfunc
+ .section .code2, "xr"
+nmain:
+ tbz w0, #0, nfunc
+ ret
+ .section .code4, "xr"
+ .align 2
+nfunc:
+ tbz w0, #0, nmain
+ ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM overall, thanks!
Nit; commit message/subject typo, s/extention/extension/
.
explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t) | ||
: target(t), ctx(ctx) { | ||
explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t) | ||
: target(t), machine(machine) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add an explicit comment above the class, or somewhere here, saying that contrary to the other classes for other architectures, this can work with both arm64 and arm64ec machine types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a comment, but skipped formulating it as a 'contrary'. I plan to do a similar thing to import thunks and some *ARM64 chunks from DLL.cpp.
return {lastThunk, false}; | ||
Chunk *c; | ||
switch (ctx.config.machine) { | ||
case ARMNT: | ||
switch (getMachineArchType(machine)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I presume this is just for the purpose of collapsing the three aarch64 variants into one case? Or could we just keep checking machine values and add cases for ARM64EC
and ARM64X
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that or using if (isAnyArm64(...))
. Such switch
statements were a common enough (with a bit confusing ARM64X
case) that led to #87370 and followups.
main: | ||
tbz w0, #0, func1 | ||
ret | ||
.section .code3$a, "xr" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I think I would find these test snippets easier to follow if there would be an empty line before each .section
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added them, thanks for review.
Thunks themselves are the same as regular ARM64 thunks; they just need to report the correct machine type. When processing the code, we also need to use the current chunk's machine type instead of the global one: we don't want to treat x86_64 thunks as ARM64EC, and we need to report the correct machine type in hybrid binaries.
bf1945e
to
a33c2ff
Compare
…f66167c79 Local branch amd-gfx 4b0f661 Merged main:8f96be921c1a97594ee94c2789cee9b131525f63 into amd-gfx:c7b3b9d8ffb4 Remote branch main efad561 [LLD][COFF] Add support for range extension thunks for ARM64EC targets. (llvm#106289)
Thunks themselves are the same as regular ARM64 thunks; they just need to report the correct machine type. When processing the code, we also need to use the current chunk's machine type instead of the global one: we don't want to treat x86_64 thunks as ARM64EC, and we need to report the correct machine type in hybrid binaries.