Skip to content

[lld][LoongArch] Relax TLS LE/GD/LD #123600

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ea9fea2
Relax PCHi20Lo12.
ylzsx Dec 27, 2024
95f4540
la.pcrel relax test modify.
ylzsx Dec 24, 2024
7b133c2
Add test for PCHi20Lo12
ylzsx Dec 27, 2024
abc1a45
Add test for got symbols relaxation.
ylzsx Dec 30, 2024
1b1804e
Modify test. NFC
ylzsx Jan 15, 2025
30cb382
Add check for register.
ylzsx Jan 20, 2025
f1f995b
Relax call36/tail36.
ylzsx Dec 26, 2024
f227ae5
modify test for call36/tail36.
ylzsx Dec 27, 2024
f2aae15
Modify test. Add the option --relax.
ylzsx Jan 16, 2025
7993434
Relax TLS LE/GD/LD.
ylzsx Dec 27, 2024
1e9aa52
Add test for TLSLD/TLSGD when relax enabled.
ylzsx Dec 29, 2024
91da25e
Modify test for TLSLE when relax enabled.
ylzsx Dec 31, 2024
2066c5f
Add test for loongarch-relax-tls-le.s and modify loongarch-relax-emit…
ylzsx Dec 31, 2024
b57c40e
Modify test. Add --relax option.
ylzsx Jan 16, 2025
924d511
Fixes for reviews.
ylzsx Feb 14, 2025
b9c2ea1
Revert "Modify test. Add the option --relax."
ylzsx Feb 14, 2025
1101829
Fixes for reviews.
ylzsx Feb 14, 2025
b3900f6
Merge branch 'users/ylzsx/r-pchi20lo12' into users/ylzsx/r-call36
ylzsx Feb 14, 2025
a77197a
Merge branch 'users/ylzsx/r-call36' into users/ylzsx/r-tls-noie
ylzsx Feb 14, 2025
1192441
Revert "Modify test. Add --relax option."
ylzsx Feb 14, 2025
e627784
Remove unnecessary spaces.
ylzsx Feb 14, 2025
26c1e0c
Fixes for reviews.
ylzsx Feb 17, 2025
7dc7264
Merge branch 'main' into users/ylzsx/r-tls-noie
ylzsx Mar 9, 2025
dcf857e
Merge branch 'main' into users/ylzsx/r-tls-noie
ylzsx Mar 10, 2025
1484f94
revise indentation.
ylzsx Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 67 additions & 4 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ static uint32_t setJ20(uint32_t insn, uint32_t imm) {
return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
}

static uint32_t setJ5(uint32_t insn, uint32_t imm) {
return (insn & 0xfffffc1f) | (extractBits(imm, 4, 0) << 5);
}

static uint32_t setK12(uint32_t insn, uint32_t imm) {
return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
}
Expand Down Expand Up @@ -761,10 +765,10 @@ static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {

// Relax code sequence.
// From:
// pcalau12i $a0, %pc_hi20(sym)
// addi.w/d $a0, $a0, %pc_lo12(sym)
// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym)
// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
// To:
// pcaddi $a0, %pc_lo12(sym)
// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
//
// From:
// pcalau12i $a0, %got_pc_hi20(sym_got)
Expand All @@ -778,6 +782,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
rLo12.type == R_LARCH_PCALA_LO12) ||
(rHi20.type == R_LARCH_GOT_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12)))
return;

Expand All @@ -798,6 +806,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
dest = rHi20.sym->getVA(ctx);
else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
dest = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
else {
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
<< rHi20.expr << ") against symbol " << rHi20.sym
Expand Down Expand Up @@ -827,7 +837,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
return;

sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
else
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
remove = 4;
}
Expand Down Expand Up @@ -863,6 +878,33 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
}
}

// Relax code sequence.
// From:
// lu12i.w $rd, %le_hi20_r(sym)
// add.w/d $rd, $rd, $tp, %le_add_r(sym)
// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
// To:
// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
uint64_t val = r.sym->getVA(ctx, r.addend);
// Check if the val exceeds the range of addi/ld/st.
if (!isInt<12>(val))
return;
uint32_t currInsn = read32le(sec.content().data() + r.offset);
switch (r.type) {
case R_LARCH_TLS_LE_HI20_R:
case R_LARCH_TLS_LE_ADD_R:
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
remove = 4;
break;
case R_LARCH_TLS_LE_LO12_R:
sec.relaxAux->writes.push_back(setJ5(currInsn, R_TP));
sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R;
break;
}
}

static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
Expand Down Expand Up @@ -903,6 +945,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
case R_LARCH_PCALA_HI20:
case R_LARCH_GOT_PC_HI20:
case R_LARCH_TLS_GD_PC_HI20:
case R_LARCH_TLS_LD_PC_HI20:
// The overflow check for i+2 will be carried out in isPairRelaxable.
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
Expand All @@ -911,6 +955,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
if (relaxable(relocs, i))
relaxCall36(ctx, sec, i, loc, r, remove);
break;
case R_LARCH_TLS_LE_HI20_R:
case R_LARCH_TLS_LE_ADD_R:
case R_LARCH_TLS_LE_LO12_R:
if (relaxable(relocs, i))
relaxTlsLe(ctx, sec, i, loc, r, remove);
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -1015,8 +1065,21 @@ void LoongArch::finalizeRelax(int passes) const {
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
break;
case R_LARCH_B26:
case R_LARCH_TLS_LE_LO12_R:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
break;
case R_LARCH_TLS_GD_PCREL20_S2:
// Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead
// of R_TLSLD_PC due to historical reasons. In fact, right now TLSLD
// behaves exactly like TLSGD on LoongArch.
//
// This reason has also been mentioned in mold commit:
// https://github.com/rui314/mold/commit/5dfa1cf07c03bd57cb3d493b652ef22441bcd71c
case R_LARCH_TLS_LD_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
r.expr = R_TLSGD_PC;
break;
default:
llvm_unreachable("unsupported type");
Expand Down
121 changes: 115 additions & 6 deletions lld/test/ELF/loongarch-relax-emit-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,45 @@
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_PCREL20_S2 _start
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX32-NEXT: nop
# RELAX32-NEXT: R_LARCH_ALIGN *ABS*+0xc
# RELAX32-NEXT: nop
# RELAX32-NEXT: ret

# RELAX64-NEXT: bl -8
# RELAX64-NEXT: R_LARCH_B26 _start
# RELAX64-NEXT: R_LARCH_RELAX *ABS*
# RELAX64-NEXT: b -12
# RELAX64-NEXT: R_LARCH_B26 _start
# RELAX64-NEXT: R_LARCH_RELAX *ABS*
# RELAX64-NEXT: ret

# RELAX-NEXT: lu12i.w $a0, 0
# RELAX-NEXT: R_LARCH_TLS_LE_HI20 a
# RELAX-NEXT: ori $a0, $a0, 0
# RELAX-NEXT: R_LARCH_TLS_LE_LO12 a
# RELAX-NEXT: pcaddi $a0, [[#]]
# RELAX-NEXT: R_LARCH_RELAX a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_TLS_GD_PCREL20_S2 a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: pcaddi $a0, [[#]]
# RELAX-NEXT: R_LARCH_RELAX a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_TLS_LD_PCREL20_S2 a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: addi.{{[dw]}} $a0, $tp, 0
# RELAX-NEXT: R_LARCH_RELAX a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_RELAX a
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_TLS_LE_LO12_R a
# RELAX-NEXT: R_LARCH_RELAX *ABS*

# RELAX32-NEXT: nop
# RELAX32-NEXT: R_LARCH_ALIGN *ABS*+0xc
# RELAX32-NEXT: ret

# RELAX64-NEXT: nop
# RELAX64-NEXT: R_LARCH_ALIGN *ABS*+0xc
# RELAX64-NEXT: nop
# RELAX64-NEXT: nop
# RELAX64-NEXT: ret

# NORELAX: <_start>:
# NORELAX-NEXT: pcalau12i $a0, 0
Expand All @@ -62,8 +88,36 @@
# NORELAX-NEXT: R_LARCH_CALL36 _start
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: jirl $zero, $a0, -24
# NORELAX-NEXT: ret
# NORELAX-NEXT: lu12i.w $a0, 0
# NORELAX-NEXT: R_LARCH_TLS_LE_HI20 a
# NORELAX-NEXT: ori $a0, $a0, 0
# NORELAX-NEXT: R_LARCH_TLS_LE_LO12 a
# NORELAX-NEXT: pcalau12i $a0, 16
# NORELAX-NEXT: R_LARCH_TLS_GD_PC_HI20 a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: addi.d $a0, $a0, 8
# NORELAX-NEXT: R_LARCH_GOT_PC_LO12 a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: pcalau12i $a0, 16
# NORELAX-NEXT: R_LARCH_TLS_LD_PC_HI20 a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: addi.d $a0, $a0, 8
# NORELAX-NEXT: R_LARCH_GOT_PC_LO12 a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: lu12i.w $a0, 0
# NORELAX-NEXT: R_LARCH_TLS_LE_HI20_R a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: add.d $a0, $a0, $tp
# NORELAX-NEXT: R_LARCH_TLS_LE_ADD_R a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: addi.d $a0, $a0, 0
# NORELAX-NEXT: R_LARCH_TLS_LE_LO12_R a
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: nop
# NORELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc
# NORELAX-NEXT: nop
# NORELAX-NEXT: nop
# NORELAX-NEXT: ret

# CHECKR: <_start>:
# CHECKR-NEXT: pcalau12i $a0, 0
Expand All @@ -86,12 +140,53 @@
# CHECKR-NEXT: R_LARCH_CALL36 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: jr $a0
# CHECKR-NEXT: lu12i.w $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_LE_HI20 a
# CHECKR-NEXT: ori $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_LE_LO12 a
# CHECKR-NEXT: pcalau12i $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_GD_PC_HI20 a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: addi.d $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_GOT_PC_LO12 a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: pcalau12i $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_LD_PC_HI20 a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: addi.d $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_GOT_PC_LO12 a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: lu12i.w $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_LE_HI20_R a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: add.d $a0, $a0, $tp
# CHECKR-NEXT: R_LARCH_TLS_LE_ADD_R a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: addi.d $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_TLS_LE_LO12_R a
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: nop
# CHECKR-NEXT: R_LARCH_ALIGN *ABS*+0xc
# CHECKR-NEXT: nop
# CHECKR-NEXT: nop
# CHECKR-NEXT: ret

.macro add dst, src1, src2, src3
.ifdef ELF64
add.d \dst, \src1, \src2, \src3
.else
add.w \dst, \src1, \src2, \src3
.endif
.endm

.macro addi dst, src1, src2
.ifdef ELF64
addi.d \dst, \src1, \src2
.else
addi.w \dst, \src1, \src2
.endif
.endm

.global _start
_start:
la.pcrel $a0, _start
Expand All @@ -101,5 +196,19 @@ _start:
call36 _start
tail36 $a0, _start
.endif

la.tls.le $a0, a # without R_LARCH_RELAX reloaction
la.tls.gd $a0, a
la.tls.ld $a0, a

lu12i.w $a0, %le_hi20_r(a)
add $a0, $a0, $tp, %le_add_r(a)
addi $a0, $a0, %le_lo12_r(a)

.p2align 4
ret

.section .tbss,"awT",@nobits
.globl a
a:
.zero 4
Loading