Skip to content

Commit b00b2b8

Browse files
committed
Merging r351899:
------------------------------------------------------------------------ r351899 | pcc | 2019-01-23 00:54:49 +0100 (Wed, 23 Jan 2019) | 26 lines COFF, ELF: ICF: Perform 2 rounds of relocation hash propagation. LLD's performance on PGO instrumented Windows binaries was still not great even with the fix in D56955; out of the 2m41s linker runtime, around 2 minutes were still being spent in ICF. I looked into this more closely and discovered that the vast majority of the runtime was being spent segregating .pdata sections with the following relocation chain: .pdata -> identical .text -> unique PGO counter (not eligible for ICF) This patch causes us to perform 2 rounds of relocation hash propagation, which allows the hash for the .pdata sections to incorporate the identifier from the PGO counter. With that, the amount of time spent in ICF was reduced to about 2 seconds. I also found that the same change led to a significant ICF performance improvement in a regular release build of Chromium's chrome_child.dll, where ICF time was reduced from around 1s to around 700ms. With the same change applied to the ELF linker, median of 100 runs for lld-speed-test/chrome reduced from 4.53s to 4.45s on my machine. I also experimented with increasing the number of propagation rounds further, but I did not observe any further significant performance improvements linking Chromium or Firefox. Differential Revision: https://reviews.llvm.org/D56986 ------------------------------------------------------------------------ llvm-svn: 351964
1 parent 40cbc00 commit b00b2b8

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

lld/COFF/ICF.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -263,19 +263,21 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
263263

264264
// Initially, we use hash values to partition sections.
265265
parallelForEach(Chunks, [&](SectionChunk *SC) {
266-
SC->Class[1] = xxHash64(SC->getContents());
266+
SC->Class[0] = xxHash64(SC->getContents());
267267
});
268268

269269
// Combine the hashes of the sections referenced by each section into its
270270
// hash.
271-
parallelForEach(Chunks, [&](SectionChunk *SC) {
272-
uint32_t Hash = SC->Class[1];
273-
for (Symbol *B : SC->symbols())
274-
if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
275-
Hash += Sym->getChunk()->Class[1];
276-
// Set MSB to 1 to avoid collisions with non-hash classs.
277-
SC->Class[0] = Hash | (1U << 31);
278-
});
271+
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
272+
parallelForEach(Chunks, [&](SectionChunk *SC) {
273+
uint32_t Hash = SC->Class[Cnt % 2];
274+
for (Symbol *B : SC->symbols())
275+
if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
276+
Hash += Sym->getChunk()->Class[Cnt % 2];
277+
// Set MSB to 1 to avoid collisions with non-hash classs.
278+
SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
279+
});
280+
}
279281

280282
// From now on, sections in Chunks are ordered so that sections in
281283
// the same group are consecutive in the vector.

lld/ELF/ICF.cpp

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -426,16 +426,17 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
426426
// Combine the hashes of the sections referenced by the given section into its
427427
// hash.
428428
template <class ELFT, class RelTy>
429-
static void combineRelocHashes(InputSection *IS, ArrayRef<RelTy> Rels) {
430-
uint32_t Hash = IS->Class[1];
429+
static void combineRelocHashes(unsigned Cnt, InputSection *IS,
430+
ArrayRef<RelTy> Rels) {
431+
uint32_t Hash = IS->Class[Cnt % 2];
431432
for (RelTy Rel : Rels) {
432433
Symbol &S = IS->template getFile<ELFT>()->getRelocTargetSym(Rel);
433434
if (auto *D = dyn_cast<Defined>(&S))
434435
if (auto *RelSec = dyn_cast_or_null<InputSection>(D->Section))
435-
Hash += RelSec->Class[1];
436+
Hash += RelSec->Class[Cnt % 2];
436437
}
437438
// Set MSB to 1 to avoid collisions with non-hash IDs.
438-
IS->Class[0] = Hash | (1U << 31);
439+
IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
439440
}
440441

441442
static void print(const Twine &S) {
@@ -453,15 +454,17 @@ template <class ELFT> void ICF<ELFT>::run() {
453454

454455
// Initially, we use hash values to partition sections.
455456
parallelForEach(Sections, [&](InputSection *S) {
456-
S->Class[1] = xxHash64(S->data());
457+
S->Class[0] = xxHash64(S->data());
457458
});
458459

459-
parallelForEach(Sections, [&](InputSection *S) {
460-
if (S->AreRelocsRela)
461-
combineRelocHashes<ELFT>(S, S->template relas<ELFT>());
462-
else
463-
combineRelocHashes<ELFT>(S, S->template rels<ELFT>());
464-
});
460+
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
461+
parallelForEach(Sections, [&](InputSection *S) {
462+
if (S->AreRelocsRela)
463+
combineRelocHashes<ELFT>(Cnt, S, S->template relas<ELFT>());
464+
else
465+
combineRelocHashes<ELFT>(Cnt, S, S->template rels<ELFT>());
466+
});
467+
}
465468

466469
// From now on, sections in Sections vector are ordered so that sections
467470
// in the same equivalence class are consecutive in the vector.

0 commit comments

Comments
 (0)