Skip to content

[lld][LoongArch] Relax PCHi20Lo12: R_LARCH_{PCALA,GOT_PC}_{HI20,LO12} #122209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 108 additions & 2 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
Expand Down Expand Up @@ -131,6 +132,8 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
}

static uint32_t getD5(uint64_t v) { return extractBits(v, 4, 0); }

static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
uint32_t immLo = extractBits(imm, 15, 0);
uint32_t immHi = extractBits(imm, 20, 16);
Expand Down Expand Up @@ -743,6 +746,84 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}

static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
}

static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
return relaxable(relocs, i) && relaxable(relocs, i + 2) &&
relocs[i].offset + 4 == relocs[i + 2].offset;
}

// Relax code sequence.
// From:
// pcalau12i $a0, %pc_hi20(sym)
// addi.w/d $a0, $a0, %pc_lo12(sym)
// To:
// pcaddi $a0, %pc_lo12(sym)
//
// From:
// pcalau12i $a0, %got_pc_hi20(sym_got)
// ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
// To:
// pcaddi $a0, %got_pc_hi20(sym_got)
static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &rHi20, Relocation &rLo12,
uint32_t &remove) {
// check if the relocations are relaxable sequences.
if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
rLo12.type == R_LARCH_PCALA_LO12) ||
(rHi20.type == R_LARCH_GOT_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12)))
return;

// GOT references to absolute symbols can't be relaxed to use pcaddi in
// position-independent code, because these instructions produce a relative
// address.
// Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
// these symbols may be resolve in runtime.
if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
(!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
rHi20.sym->isGnuIFunc() ||
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section)))
return;

uint64_t symBase = 0;
if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
symBase = rHi20.sym->getPltVA(ctx);
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
symBase = rHi20.sym->getVA(ctx);
else {
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
<< rHi20.expr << ") against symbol " << rHi20.sym
<< "in relaxPCHi20Lo12";
return;
}
const uint64_t symLocal = symBase + rHi20.addend;

const int64_t distance = symLocal - loc;
// Check if the distance aligns 4 bytes or exceeds the range of pcaddi.
if ((distance & 0x3) != 0 || !isInt<22>(distance))
return;

// Note: If we can ensure that the .o files generated by LLVM only contain
// relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
// decode instructions. The relaxable instruction sequences imply the
// following constraints:
// * For relocation pairs related to got_pc, the opcodes of instructions
// must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
// addi.w/d.
// * The destination register of pcalau12i is guaranteed to be used only by
// the immediately following instruction.
const uint32_t nextInsn = read32le(sec.content().data() + rLo12.offset);

sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
remove = 4;
}

static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
Expand Down Expand Up @@ -781,6 +862,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
break;
}
case R_LARCH_PCALA_HI20:
case R_LARCH_GOT_PC_HI20:
// The overflow check for i+2 will be carried out in isPairRelaxable.
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -851,6 +938,7 @@ void LoongArch::finalizeRelax(int passes) const {
MutableArrayRef<Relocation> rels = sec->relocs();
ArrayRef<uint8_t> old = sec->content();
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
size_t writesIdx = 0;
uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(newSize);
uint64_t offset = 0;
int64_t delta = 0;
Expand All @@ -867,11 +955,29 @@ void LoongArch::finalizeRelax(int passes) const {
continue;

// Copy from last location to the current relocated location.
const Relocation &r = rels[i];
Relocation &r = rels[i];
uint64_t size = r.offset - offset;
memcpy(p, old.data() + offset, size);
p += size;
offset = r.offset + remove;

int64_t skip = 0;
if (RelType newType = aux.relocTypes[i]) {
switch (newType) {
case R_LARCH_RELAX:
break;
case R_LARCH_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
// RelExpr is needed for relocating.
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
break;
default:
llvm_unreachable("unsupported type");
}
}

p += skip;
offset = r.offset + skip + remove;
}
memcpy(p, old.data() + offset, old.size() - offset);

Expand Down
129 changes: 82 additions & 47 deletions lld/test/ELF/loongarch-relax-align.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,95 @@

# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n
# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --relax %t.32.o -o %t.32
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --relax %t.64.o -o %t.64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --no-relax %t.32.o -o %t.32n
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --no-relax %t.64.o -o %t.64n
# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck --check-prefixes=RELAX,NOOLD %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck --check-prefixes=RELAX,NOOLD %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck --check-prefixes=NORELAX,NOOLD %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck --check-prefixes=NORELAX,NOOLD %s

## Test the R_LARCH_ALIGN without symbol index.
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n
# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --relax %t.o64.o -o %t.o64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 --no-relax %t.o64.o -o %t.o64n
# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck --check-prefixes=RELAX,OLD %s
# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck --check-prefixes=NORELAX,OLD %s

## -r keeps section contents unchanged.
# RUN: ld.lld -r %t.64.o -o %t.64.r
# RUN: ld.lld -r --relax %t.64.o -o %t.64.r
# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR

# CHECK-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
# CHECK-DAG: {{0*}}10038 l .text {{0*}}0c .L1
# CHECK-DAG: {{0*}}10040 l .text {{0*}}04 .L2
# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start

# CHECK: <.Ltext_start>:
# CHECK-NEXT: break 1
# CHECK-NEXT: break 2
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 3
# CHECK-NEXT: break 4
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: pcalau12i $a0, 0
# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
# CHECK-NEXT: pcalau12i $a0, 0
# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 56
# CHECK-NEXT: pcalau12i $a0, 0
# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 64
# CHECK-EMPTY:
# CHECK-NEXT: <.L1>:
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-EMPTY:
# CHECK-NEXT: <.L2>:
# CHECK-NEXT: break 5

# CHECK: <.Ltext2_start>:
# CHECK-NEXT: pcalau12i $a0, 0
# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 6
# NOOLD: {{0*}}10000 l .text {{0*}}00 .Lalign_symbol
# OLD: {{0*}}00001 l *ABS* {{0*}}00 old

# NORELAX-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
# NORELAX-DAG: {{0*}}10038 l .text {{0*}}0c .L1
# NORELAX-DAG: {{0*}}10040 l .text {{0*}}04 .L2
# NORELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start

# NORELAX: <.Ltext_start>:
# NORELAX-NEXT: break 1
# NORELAX-NEXT: break 2
# NORELAX-NEXT: nop
# NORELAX-NEXT: nop
# NORELAX-NEXT: break 3
# NORELAX-NEXT: break 4
# NORELAX-NEXT: nop
# NORELAX-NEXT: nop
# NORELAX-NEXT: pcalau12i $a0, 0
# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
# NORELAX-NEXT: pcalau12i $a0, 0
# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 56
# NORELAX-NEXT: pcalau12i $a0, 0
# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 64
# NORELAX-EMPTY:
# NORELAX-NEXT: <.L1>:
# NORELAX-NEXT: nop
# NORELAX-NEXT: nop
# NORELAX-EMPTY:
# NORELAX-NEXT: <.L2>:
# NORELAX-NEXT: break 5

# NORELAX: <.Ltext2_start>:
# NORELAX-NEXT: pcalau12i $a0, 0
# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
# NORELAX-NEXT: nop
# NORELAX-NEXT: nop
# NORELAX-NEXT: break 6


# RELAX-DAG: {{0*}}10000 l .text {{0*}}34 .Ltext_start
# RELAX-DAG: {{0*}}1002c l .text {{0*}}08 .L1
# RELAX-DAG: {{0*}}10030 l .text {{0*}}04 .L2
# RELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start

# RELAX: <.Ltext_start>:
# RELAX-NEXT: break 1
# RELAX-NEXT: break 2
# RELAX-NEXT: nop
# RELAX-NEXT: nop
# RELAX-NEXT: break 3
# RELAX-NEXT: break 4
# RELAX-NEXT: nop
# RELAX-NEXT: nop
# RELAX-NEXT: pcaddi $a0, -8
# RELAX-NEXT: pcaddi $a0, 2
# RELAX-NEXT: pcaddi $a0, 2
# RELAX-EMPTY:
# RELAX-NEXT: <.L1>:
# RELAX-NEXT: nop
# RELAX-EMPTY:
# RELAX-NEXT: <.L2>:
# RELAX-NEXT: break 5

# RELAX: <.Ltext2_start>:
# RELAX-NEXT: pcaddi $a0, 0
# RELAX-NEXT: nop
# RELAX-NEXT: nop
# RELAX-NEXT: nop
# RELAX-NEXT: break 6

# CHECKR: <.Ltext2_start>:
# CHECKR-NEXT: pcalau12i $a0, 0
Expand Down
66 changes: 46 additions & 20 deletions lld/test/ELF/loongarch-relax-emit-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,64 @@

# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32
# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64
# RUN: llvm-objdump -dr %t.32 | FileCheck %s
# RUN: llvm-objdump -dr %t.64 | FileCheck %s
# RUN: ld.lld -Ttext=0x10000 -section-start=.got=0x20000 --emit-relocs --relax %t.32.o -o %t.32
# RUN: ld.lld -Ttext=0x10000 -section-start=.got=0x20000 --emit-relocs --relax %t.64.o -o %t.64
# RUN: llvm-objdump -dr %t.32 | FileCheck %s --check-prefix=RELAX
# RUN: llvm-objdump -dr %t.64 | FileCheck %s --check-prefix=RELAX

## -r should keep original relocations.
# RUN: ld.lld -r %t.64.o -o %t.64.r
# RUN: ld.lld --relax -r %t.64.o -o %t.64.r
# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR

## --no-relax should keep original relocations.
## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now.
# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax
# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s
# RUN: ld.lld -Ttext=0x10000 -section-start=.got=0x20000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax
# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s --check-prefix=NORELAX

# CHECK: 00010000 <_start>:
# CHECK-NEXT: pcalau12i $a0, 0
# CHECK-NEXT: R_LARCH_PCALA_HI20 _start
# CHECK-NEXT: R_LARCH_RELAX *ABS*
# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
# CHECK-NEXT: R_LARCH_PCALA_LO12 _start
# CHECK-NEXT: R_LARCH_RELAX *ABS*
# CHECK-NEXT: nop
# CHECK-NEXT: R_LARCH_ALIGN *ABS*+0xc
# CHECK-NEXT: nop
# CHECK-NEXT: ret
# RELAX: 00010000 <_start>:
# RELAX-NEXT: pcaddi $a0, 0
# RELAX-NEXT: R_LARCH_RELAX _start
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_PCREL20_S2 _start
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: pcaddi $a0, -1
# RELAX-NEXT: R_LARCH_RELAX _start
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: R_LARCH_PCREL20_S2 _start
# RELAX-NEXT: R_LARCH_RELAX *ABS*
# RELAX-NEXT: nop
# RELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc
# RELAX-NEXT: nop
# RELAX-NEXT: ret

# NORELAX: <_start>:
# NORELAX-NEXT: pcalau12i $a0, 0
# NORELAX-NEXT: R_LARCH_PCALA_HI20 _start
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: addi.d $a0, $a0, 0
# NORELAX-NEXT: R_LARCH_PCALA_LO12 _start
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: pcalau12i $a0, 16
# NORELAX-NEXT: R_LARCH_GOT_PC_HI20 _start
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: ld.d $a0, $a0, 0
# NORELAX-NEXT: R_LARCH_GOT_PC_LO12 _start
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
# NORELAX-NEXT: ret
# NORELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc

# CHECKR: <_start>:
# CHECKR-NEXT: pcalau12i $a0, 0
# CHECKR-NEXT: R_LARCH_PCALA_HI20 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: addi.d $a0, $a0, 0
# CHECKR-NEXT: addi.d $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: pcalau12i $a0, 0
# CHECKR-NEXT: R_LARCH_GOT_PC_HI20 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: ld.d $a0, $a0, 0
# CHECKR-NEXT: R_LARCH_GOT_PC_LO12 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: nop
# CHECKR-NEXT: R_LARCH_ALIGN *ABS*+0xc
# CHECKR-NEXT: nop
Expand All @@ -45,5 +70,6 @@
.global _start
_start:
la.pcrel $a0, _start
la.got $a0, _start
.p2align 4
ret
Loading
Loading