Skip to content

Commit 7c0e9c1

Browse files
committed
[ELF] Add --compress-sections
--compress-sections <section-glib>=[zlib|zstd] is like a generalized --compress-debug-sections that applies to arbitrary sections, including SHF_ALLOC ones. This option has a number of candidate use cases for metadata sections, including: * code coverage sections llvm#48499 * `__asan_globals` section * -fexperimental-sanitize-metadata= sections For SHF_ALLOC use cases, a supporting runtime library can identify the section content with a pair of symbols `__start_<sectionname>` and `__stop_<sectionname>` and check the header to know whether it is compressed or not. There are some caveats: * We compute the section content/size once in finalizeAddressDependentContent before compression. If the content or size changes, the compressed content will be invalid, but we don't detect changed content (e.g., data commands). However, we detect size changes in assignOffsets. * If there are dynamic relocations, rtld do not skip these relocations and will cause runtime crash or writable data corruption. In general, label differences should be used (see `foo0` in the test) and the runtime library needs to adjust the differences. * Symbols defined relative to the output section desginate the offsets to the uncompressed content. GNU ld feature request: https://sourceware.org/bugzilla/show_bug.cgi?id=27452 Link: https://discourse.llvm.org/t/rfc-compress-arbitrary-sections-with-ld-lld-compress-sections/71674 Link: https://groups.google.com/g/generic-abi/c/HUVhliUrTG0 ("Allow SHF_ALLOC | SHF_COMPRESSED sections") Differential Revision: https://reviews.llvm.org/D154641
1 parent a403124 commit 7c0e9c1

File tree

11 files changed

+213
-11
lines changed

11 files changed

+213
-11
lines changed

lld/ELF/Config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ struct Config {
212212
bool checkSections;
213213
bool checkDynamicRelocs;
214214
llvm::DebugCompressionType compressDebugSections;
215+
llvm::SmallVector<std::pair<llvm::GlobPattern, llvm::DebugCompressionType>, 0>
216+
compressSections;
215217
bool cref;
216218
llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
217219
deadRelocInNonAlloc;

lld/ELF/Driver.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,6 +1431,23 @@ static void readConfigs(opt::InputArgList &args) {
14311431
}
14321432
}
14331433

1434+
for (opt::Arg *arg : args.filtered(OPT_compress_sections)) {
1435+
SmallVector<StringRef, 0> fields;
1436+
StringRef(arg->getValue()).split(fields, '=');
1437+
if (fields.size() != 2 || fields[1].empty()) {
1438+
error(arg->getSpelling() +
1439+
": parse error, not 'section-glob=[zlib|zstd]'");
1440+
continue;
1441+
}
1442+
auto type = getCompressionType(fields[1], arg->getSpelling());
1443+
if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) {
1444+
config->compressSections.emplace_back(std::move(*pat), type);
1445+
} else {
1446+
error(arg->getSpelling() + ": " + toString(pat.takeError()));
1447+
continue;
1448+
}
1449+
}
1450+
14341451
for (opt::Arg *arg : args.filtered(OPT_z)) {
14351452
std::pair<StringRef, StringRef> option =
14361453
StringRef(arg->getValue()).split('=');

lld/ELF/LinkerScript.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,8 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
10331033
if (sec == findFirstSection(l))
10341034
l->lmaOffset = state->lmaOffset;
10351035

1036+
const uint64_t savedDot2 = dot;
1037+
const size_t savedSize = sec->size;
10361038
// We can call this method multiple times during the creation of
10371039
// thunks and want to start over calculation each time.
10381040
sec->size = 0;
@@ -1074,6 +1076,15 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
10741076
}
10751077
}
10761078

1079+
// See the comment in finalizeAddressDependentContent.
1080+
if (sec->compressed.shards) {
1081+
if (sec->size != sec->compressed.uncompressedSize)
1082+
fatal("uncompressed size of SHF_COMPRESSED section '" + sec->name +
1083+
"' is dependent on linker script commands");
1084+
sec->size = savedSize;
1085+
dot = savedDot2 + savedSize;
1086+
}
1087+
10771088
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
10781089
// as they are not part of the process image.
10791090
if (!(sec->flags & SHF_ALLOC)) {

lld/ELF/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ defm compress_debug_sections:
6464
Eq<"compress-debug-sections", "Compress DWARF debug sections">,
6565
MetaVarName<"[none,zlib,zstd]">;
6666

67+
defm compress_sections: EEq<"compress-sections", "Compress output sections matching <section-glob>">,
68+
MetaVarName<"<section-glob>=[zlib|zstd]">;
69+
6770
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
6871

6972
defm optimize_bb_jumps: BB<"optimize-bb-jumps",

lld/ELF/OutputSections.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,14 @@ template <class ELFT> void OutputSection::maybeCompress() {
330330
(void)sizeof(Elf_Chdr);
331331

332332
// Compress only DWARF debug sections.
333-
if (config->compressDebugSections == DebugCompressionType::None ||
334-
(flags & SHF_ALLOC) || !name.starts_with(".debug_") || size == 0)
333+
DebugCompressionType type = DebugCompressionType::None;
334+
for (auto &[glob, t] : config->compressSections)
335+
if (glob.match(name))
336+
type = t;
337+
if (config->compressDebugSections != DebugCompressionType::None &&
338+
!(flags & SHF_ALLOC) && name.starts_with(".debug_") && size)
339+
type = config->compressDebugSections;
340+
if (type == DebugCompressionType::None)
335341
return;
336342

337343
llvm::TimeTraceScope timeScope("Compress debug sections");
@@ -347,9 +353,10 @@ template <class ELFT> void OutputSection::maybeCompress() {
347353
// Use ZSTD's streaming compression API which permits parallel workers working
348354
// on the stream. See http://facebook.github.io/zstd/zstd_manual.html
349355
// "Streaming compression - HowTo".
350-
if (config->compressDebugSections == DebugCompressionType::Zstd) {
356+
if (type == DebugCompressionType::Zstd) {
351357
// Allocate a buffer of half of the input size, and grow it by 1.5x if
352358
// insufficient.
359+
compressed.type = ELFCOMPRESS_ZSTD;
353360
compressed.shards = std::make_unique<SmallVector<uint8_t, 0>[]>(1);
354361
SmallVector<uint8_t, 0> &out = compressed.shards[0];
355362
out.resize_for_overwrite(std::max<size_t>(size / 2, 32));
@@ -422,6 +429,7 @@ template <class ELFT> void OutputSection::maybeCompress() {
422429
}
423430
size += 4; // checksum
424431

432+
compressed.type = ELFCOMPRESS_ZLIB;
425433
compressed.shards = std::move(shardsOut);
426434
compressed.numShards = numShards;
427435
compressed.checksum = checksum;
@@ -453,15 +461,14 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
453461
// just write it down.
454462
if (compressed.shards) {
455463
auto *chdr = reinterpret_cast<typename ELFT::Chdr *>(buf);
464+
chdr->ch_type = compressed.type;
456465
chdr->ch_size = compressed.uncompressedSize;
457466
chdr->ch_addralign = addralign;
458467
buf += sizeof(*chdr);
459-
if (config->compressDebugSections == DebugCompressionType::Zstd) {
460-
chdr->ch_type = ELFCOMPRESS_ZSTD;
468+
if (compressed.type == ELFCOMPRESS_ZSTD) {
461469
memcpy(buf, compressed.shards[0].data(), compressed.shards[0].size());
462470
return;
463471
}
464-
chdr->ch_type = ELFCOMPRESS_ZLIB;
465472

466473
// Compute shard offsets.
467474
auto offsets = std::make_unique<size_t[]>(compressed.numShards);

lld/ELF/OutputSections.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct PhdrEntry;
2323

2424
struct CompressedData {
2525
std::unique_ptr<SmallVector<uint8_t, 0>[]> shards;
26+
uint32_t type = 0;
2627
uint32_t numShards = 0;
2728
uint32_t checksum = 0;
2829
uint64_t uncompressedSize;
@@ -116,12 +117,13 @@ class OutputSection final : public SectionBase {
116117
void sortInitFini();
117118
void sortCtorsDtors();
118119

120+
// Used for implementation of --compress-debug-sections and
121+
// --compress-sections.
122+
CompressedData compressed;
123+
119124
private:
120125
SmallVector<InputSection *, 0> storage;
121126

122-
// Used for implementation of --compress-debug-sections option.
123-
CompressedData compressed;
124-
125127
std::array<uint8_t, 4> getFiller();
126128
};
127129

lld/ELF/Writer.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,6 @@ template <class ELFT> void Writer<ELFT>::run() {
539539

540540
// If --compressed-debug-sections is specified, compress .debug_* sections.
541541
// Do it right now because it changes the size of output sections.
542-
for (OutputSection *sec : outputSections)
543-
sec->maybeCompress<ELFT>();
544542

545543
if (script->hasSectionsCommand)
546544
script->allocateHeaders(mainPart->phdrs);
@@ -1620,6 +1618,14 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
16201618
if (config->emachine == EM_HEXAGON)
16211619
hexagonTLSSymbolUpdate(outputSections);
16221620

1621+
// Compress SHF_COMPRESSED sections using assignAddresses computed content and
1622+
// sizes. If there are data commands with changed values, the compressed
1623+
// content will be invalid, but we do not detect the case. If the section has
1624+
// changes size due to future assignAddresses calls, we will report an error
1625+
// in assignOffsets.
1626+
for (OutputSection *sec : outputSections)
1627+
sec->maybeCompress<ELFT>();
1628+
16231629
uint32_t pass = 0, assignPasses = 0;
16241630
for (;;) {
16251631
bool changed = target->needsThunks ? tc.createThunks(pass, outputSections)

lld/docs/ld.lld.1

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ to set the compression level to 6.
148148
The compression level is 5.
149149
.El
150150
.Pp
151+
.It Fl -compress-sections Ns = Ns Ar section-glob=[zlib|zstd]
152+
Compress output sections matching the glob with zlib or zstd.
151153
.It Fl -cref
152154
Output cross reference table. If
153155
.Fl Map

lld/test/ELF/compress-sections-err.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
# RUN: ld.lld %t.o --compress-debug-sections=zlib --compress-debug-sections=none -o /dev/null 2>&1 | count 0
66
# RUN: not ld.lld %t.o --compress-debug-sections=zlib -o /dev/null 2>&1 | \
77
# RUN: FileCheck %s --implicit-check-not=error:
8+
# RUN: not ld.lld %t.o --compress-sections=foo=zlib -o /dev/null 2>&1 | \
9+
# RUN: FileCheck %s --check-prefix=CHECK2 --implicit-check-not=error:
810

911
# CHECK: error: --compress-debug-sections: LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
12+
# CHECK2: error: --compress-sections: LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
1013

1114
.globl _start
1215
_start:

lld/test/ELF/compress-sections.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# REQUIRES: x86, zlib, zstd
2+
3+
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
4+
# RUN: ld.lld -pie %t.o -o %t --compress-sections '*0=zlib' --compress-sections '*0=none'
5+
# RUN: llvm-readelf -Srs %t | FileCheck %s --check-prefix=CHECK1
6+
7+
# CHECK1: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 A 0 0 1
8+
# CHECK1-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 1
9+
# CHECK1-NEXT: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4
10+
# CHECK1: write0 PROGBITS [[#%x,WRITE0:]] [[#%x,]] [[#%x,]] 00 WA 0 0 1
11+
# CHECK1-NEXT: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1
12+
# CHECK1-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1
13+
# CHECK1-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1
14+
15+
# CHECK1: Offset {{.*}} Type Symbol's Value Symbol's Name + Addend
16+
# CHECK1-NEXT: {{0*}}[[#WRITE0]] {{.*}} R_X86_64_RELATIVE [[#TEXT]]
17+
# CHECK1-NEXT: {{0*}}[[#WRITE0+8]] {{.*}} R_X86_64_RELATIVE [[#TEXT]]
18+
19+
# CHECK1: [[#FOO0]] 0 NOTYPE LOCAL DEFAULT [[#]] foo0_sym
20+
# CHECK1: [[#FOO1]] 0 NOTYPE LOCAL DEFAULT [[#]] foo1_sym
21+
# CHECK1: [[#FOO0]] 0 NOTYPE GLOBAL PROTECTED [[#]] __start_foo0
22+
# CHECK1: [[#FOO1]] 0 NOTYPE GLOBAL PROTECTED [[#]] __stop_foo0
23+
24+
# RUN: ld.lld -pie %t.o -o %t --compress-sections '*0=zlib' --compress-sections .debug_str=zstd
25+
# RUN: llvm-readelf -Srs -x foo0 -x write0 -x nonalloc0 -x .debug_str %t | FileCheck %s --check-prefix=CHECK2
26+
27+
# CHECK2: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 AC 0 0 1
28+
# CHECK2-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 1
29+
# CHECK2-NEXT: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4
30+
# CHECK2: write0 PROGBITS [[#%x,WRITE0:]] [[#%x,]] [[#%x,]] 00 WAC 0 0 1
31+
# CHECK2-NEXT: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 1
32+
# CHECK2-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1
33+
# CHECK2-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC 0 0 1
34+
35+
# CHECK2: Offset {{.*}} Type Symbol's Value Symbol's Name + Addend
36+
# CHECK2-NEXT: {{0*}}[[#WRITE0]] {{.*}} R_X86_64_RELATIVE [[#TEXT]]
37+
# CHECK2-NEXT: {{0*}}[[#WRITE0+8]] {{.*}} R_X86_64_RELATIVE [[#TEXT]]
38+
39+
# CHECK2: Hex dump of section 'foo0':
40+
## zlib with ch_size=0x10
41+
# CHECK2-NEXT: 01000000 00000000 10000000 00000000
42+
# CHECK2-NEXT: 01000000 00000000 {{.*}}
43+
# CHECK2: Hex dump of section 'write0':
44+
## zlib with ch_size=0x10
45+
# CHECK2-NEXT: 01000000 00000000 10000000 00000000
46+
# CHECK2-NEXT: 01000000 00000000 {{.*}}
47+
# CHECK2: Hex dump of section 'nonalloc0':
48+
## zlib with ch_size=0x10
49+
# CHECK2-NEXT: 01000000 00000000 10000000 00000000
50+
# CHECK2-NEXT: 01000000 00000000 {{.*}}
51+
# CHECK2: Hex dump of section '.debug_str':
52+
## zstd with ch_size=0x38
53+
# CHECK2-NEXT: 02000000 00000000 38000000 00000000
54+
# CHECK2-NEXT: 01000000 00000000 {{.*}}
55+
56+
# RUN: not ld.lld --compress-sections=foo %t.o -o /dev/null 2>&1 | \
57+
# RUN: FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
58+
# ERR1: error: --compress-sections: parse error, not 'section-glob=[zlib|zstd]'
59+
60+
# RUN: not ld.lld --compress-sections 'a[=zlib' %t.o -o /dev/null 2>&1 | \
61+
# RUN: FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
62+
# ERR2: error: --compress-sections: invalid glob pattern: a[
63+
64+
# RUN: not ld.lld %t.o -o /dev/null --compress-sections='.debug*=zlib-gabi' --compress-sections='.debug*=' 2>&1 | \
65+
# RUN: FileCheck -check-prefix=ERR3 %s
66+
# ERR3: unknown --compress-sections value: zlib-gabi
67+
# ERR3-NEXT: --compress-sections: parse error, not 'section-glob=[zlib|zstd]'
68+
69+
.globl _start
70+
_start:
71+
leaq __start_foo0(%rip), %rax
72+
leaq __stop_foo0(%rip), %rax
73+
ret
74+
75+
.section foo0,"a"
76+
foo0_sym:
77+
.quad .text-.
78+
.quad .text-.
79+
.section foo1,"a"
80+
foo1_sym:
81+
.quad .text-.
82+
.quad .text-.
83+
.section write0,"aw"
84+
.quad .text
85+
.quad .text
86+
.section nonalloc0,""
87+
.quad .text
88+
.quad .text
89+
.section nonalloc1,""
90+
.quad 42
91+
92+
.section .debug_str,"MS",@progbits,1
93+
.Linfo_string0:
94+
.asciz "AAAAAAAAAAAAAAAAAAAAAAAAAAA"
95+
.Linfo_string1:
96+
.asciz "BBBBBBBBBBBBBBBBBBBBBBBBBBB"

0 commit comments

Comments
 (0)