Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lld][LoongArch] Convert TLS IE to LE in the normal or medium code model #123680

Open
wants to merge 5 commits into
base: users/ylzsx/r-tlsdesc-noconversion
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo {
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
bool relaxOnce(int pass) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
void finalizeRelax(int passes) const override;

private:
void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
};
} // end anonymous namespace

Expand All @@ -53,6 +57,8 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
ORI = 0x03800000,
LU12I_W = 0x14000000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
Expand Down Expand Up @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) {
return changed;
}

// Convert TLS IE to LE in the normal or medium code model.
// Original code sequence:
// * pcalau12i $a0, %ie_pc_hi20(sym)
// * ld.d $a0, $a0, %ie_pc_lo12(sym)
//
// The code sequence converted is as follows:
// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP
// * ori $a0 $a0, %le_lo12(sym)
//
// When relaxation enables, redundant NOPs can be removed.
void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
assert(isInt<32>(val) &&
"val exceeds the range of medium code model in tlsIeToLe");

bool isUInt12 = isUInt<12>(val);
const uint32_t currInsn = read32le(loc);
switch (rel.type) {
case R_LARCH_TLS_IE_PC_HI20:
if (isUInt12)
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
else
write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
0)); // lu12i.w $a0, %le_hi20
break;
case R_LARCH_TLS_IE_PC_LO12:
if (isUInt12)
write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
val)); // ori $a0, $r0, %le_lo12
else
write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
lo12(val))); // ori $a0, $a0, %le_lo12
break;
}
}

void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
const unsigned bits = ctx.arg.is64 ? 64 : 32;
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
secAddr += s->outSecOff;
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
secAddr += ehIn->getParent()->outSecOff;
bool isExtreme = false;
const MutableArrayRef<Relocation> relocs = sec.relocs();
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
uint64_t val = SignExtend64(
sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);

switch (rel.expr) {
case R_RELAX_HINT:
continue;
case R_RELAX_TLS_IE_TO_LE:
if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
// LoongArch does not support IE to LE optimize in the extreme code
// model. In this case, the relocs are as follows:
//
// * i -- R_LARCH_TLS_IE_PC_HI20
// * i+1 -- R_LARCH_TLS_IE_PC_LO12
// * i+2 -- R_LARCH_TLS_IE64_PC_LO20
// * i+3 -- R_LARCH_TLS_IE64_PC_HI12
isExtreme =
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
}
if (isExtreme) {
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
bits);
relocateNoSym(loc, rel.type, val);
} else
tlsIeToLe(loc, rel, val);
continue;
default:
break;
}
relocate(loc, rel, val);
}
}

// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
// shrinkage may reduce displacement and make more relocations eligible for
Expand Down
17 changes: 16 additions & 1 deletion lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1343,14 +1343,20 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch support IE to LE optimization in non-extreme code model.
bool execOptimizeInLoongArch =
ctx.arg.emachine == EM_LOONGARCH &&
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);

// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
// optimizations.
// RISC-V supports TLSDESC to IE/LE optimizations.
// For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
// optimization as well.
bool execOptimize =
!ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
ctx.arg.emachine != EM_HEXAGON && ctx.arg.emachine != EM_LOONGARCH &&
ctx.arg.emachine != EM_HEXAGON &&
(ctx.arg.emachine != EM_LOONGARCH || execOptimizeInLoongArch) &&
!(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
!sec->file->ppc64DisableTLSRelax;

Expand Down Expand Up @@ -1444,6 +1450,15 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch TLS GD/LD relocs reuse the RE_LOONGARCH_GOT, in which
// NEEDS_TLSIE shouldn't set. So we check independently.
if (ctx.arg.emachine == EM_LOONGARCH && expr == RE_LOONGARCH_GOT &&
execOptimize && isLocalInExecutable) {
ctx.hasTlsIe.store(true, std::memory_order_relaxed);
sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
return 1;
}

return 0;
}

Expand Down
70 changes: 70 additions & 0 deletions lld/test/ELF/loongarch-relax-tls-ie.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# REQUIRES: loongarch
## Test LA64 IE -> LE in various cases.

# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o

## FIXME: IE relaxation has not yet been implemented.
## --relax/--no-relax has the same result. Also check --emit-relocs.
# RUN: ld.lld --emit-relocs %t.o -o %t
# RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s

# RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
# RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s
# RUN: llvm-objdump -dr --no-show-raw-insn %t.norelax | FileCheck --check-prefixes=LE %s

# LE-GOT: could not find section '.got'

# a@tprel = st_value(a) = 0xfff
# b@tprel = st_value(a) = 0x1000
# LE: 20158: nop
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: ori $a0, $zero, 4095
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: add.d $a0, $a0, $tp
# LE-NEXT: 20164: lu12i.w $a1, 1
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LE-NEXT: ori $a1, $a1, 0
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LE-NEXT: add.d $a1, $a1, $tp
# LE-NEXT: 20170: nop
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: lu12i.w $a3, 1
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: ori $a2, $zero, 4095
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LE-NEXT: ori $a3, $a3, 0
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LE-NEXT: add.d $a2, $a2, $tp
# LE-NEXT: add.d $a3, $a3, $tp

la.tls.ie $a0, a # relax
add.d $a0, $a0, $tp

# PCALAU12I does not have R_LARCH_RELAX. No relaxation.
pcalau12i $a1, %ie_pc_hi20(b)
ld.d $a1, $a1, %ie_pc_lo12(b)
add.d $a1, $a1, $tp

# Test instructions are interleaved.
# PCALAU12I has an R_LARCH_RELAX. We preform relaxation.
pcalau12i $a2, %ie_pc_hi20(a)
.reloc .-4, R_LARCH_RELAX, 0
pcalau12i $a3, %ie_pc_hi20(b)
.reloc .-4, R_LARCH_RELAX, 0
ld.d $a2, $a2, %ie_pc_lo12(a)
ld.d $a3, $a3, %ie_pc_lo12(b)
add.d $a2, $a2, $tp
add.d $a3, $a3, $tp

.section .tbss,"awT",@nobits
.globl a
.zero 0xfff ## Place a at 0xfff, LE needs only one ins.
a:
.zero 1 ## Place b at 0x1000, LE needs two ins.
b:
.zero 4
30 changes: 12 additions & 18 deletions lld/test/ELF/loongarch-tls-ie.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
## LA32 IE -> LE
# RUN: ld.lld %t/32.o -o %t/32
# RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s
# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s
# RUN: llvm-readelf -x .got %t/32 2>&1 | FileCheck --check-prefix=LE32-GOT %s
# RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s

## LA64 IE
Expand All @@ -23,7 +23,7 @@
## LA64 IE -> LE
# RUN: ld.lld %t/64.o -o %t/64
# RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s
# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s
# RUN: llvm-readelf -x .got %t/64 2>&1 | FileCheck --check-prefix=LE64-GOT %s
# RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s

# IE32-REL: FLAGS STATIC_TLS
Expand Down Expand Up @@ -62,29 +62,23 @@

# a@tprel = st_value(a) = 0x8
# b@tprel = st_value(a) = 0xc
# LE32-GOT: section '.got':
# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000
# LE64-GOT: section '.got':
# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000
# LE32-GOT: could not find section '.got'
# LE64-GOT: could not find section '.got'

## LA32:
## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c
## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130
# LE32: 20114: pcalau12i $a4, 16
# LE32-NEXT: ld.w $a4, $a4, 300
# LE32: 200d4: nop
# LE32-NEXT: ori $a4, $zero, 8
# LE32-NEXT: add.w $a4, $a4, $tp
# LE32-NEXT: 20120: pcalau12i $a5, 16
# LE32-NEXT: ld.w $a5, $a5, 304
# LE32-NEXT: 200e0: nop
# LE32-NEXT: ori $a5, $zero, 12
# LE32-NEXT: add.w $a5, $a5, $tp

## LA64:
## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0
## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8
# LE64: 201c8: pcalau12i $a4, 16
# LE64-NEXT: ld.d $a4, $a4, 480
# LE64: 20158: nop
# LE64-NEXT: ori $a4, $zero, 8
# LE64-NEXT: add.d $a4, $a4, $tp
# LE64-NEXT: 201d4: pcalau12i $a5, 16
# LE64-NEXT: ld.d $a5, $a5, 488
# LE64-NEXT: 20164: nop
# LE64-NEXT: ori $a5, $zero, 12
# LE64-NEXT: add.d $a5, $a5, $tp

#--- 32.s
Expand Down
Loading