Skip to content

Commit c53c205

Browse files
Cache uwnind frame headers as they are found.
Summary: This improves unwind performance quite substantially, and follows a somewhat similar approach used in libgcc_s as described in the thread here: https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html On certain extremely exception heavy internal tests, the time drops from about 80 minutes to about five minutes. Subscribers: libcxx-commits Tags: #libc Differential Revision: https://reviews.llvm.org/D75954
1 parent 30804d0 commit c53c205

File tree

3 files changed

+243
-2
lines changed

3 files changed

+243
-2
lines changed

libunwind/src/AddressSpace.hpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,11 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
452452
#error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
453453
#endif
454454

455+
#include "FrameHeaderCache.hpp"
456+
457+
// There should be just one of these per process.
458+
static FrameHeaderCache ProcessFrameHeaderCache;
459+
455460
static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
456461
dl_iterate_cb_data *cbdata) {
457462
if (phdr->p_type == PT_LOAD) {
@@ -466,10 +471,13 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
466471
return false;
467472
}
468473

469-
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
474+
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
475+
void *data) {
470476
auto cbdata = static_cast<dl_iterate_cb_data *>(data);
471477
if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
472478
return 0;
479+
if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
480+
return 1;
473481

474482
Elf_Addr image_base = calculateImageBase(pinfo);
475483
bool found_obj = false;
@@ -496,8 +504,10 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
496504
} else if (!found_obj) {
497505
found_obj = checkAddrInSegment(phdr, image_base, cbdata);
498506
}
499-
if (found_obj && found_hdr)
507+
if (found_obj && found_hdr) {
508+
ProcessFrameHeaderCache.add(cbdata->sects);
500509
return 1;
510+
}
501511
}
502512
cbdata->sects->dwarf_section_length = 0;
503513
return 0;

libunwind/src/FrameHeaderCache.hpp

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
//===-FrameHeaderCache.hpp ------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// Cache the elf program headers necessary to unwind the stack more efficiently
8+
// in the presence of many dsos.
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#ifndef __FRAMEHEADER_CACHE_HPP__
13+
#define __FRAMEHEADER_CACHE_HPP__
14+
15+
#include "config.h"
16+
#include <limits.h>
17+
18+
#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
19+
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
20+
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...) \
21+
_LIBUNWIND_LOG(msg, __VA_ARGS__)
22+
#else
23+
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
24+
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
25+
#endif
26+
27+
// This cache should only be be used from within a dl_iterate_phdr callback.
28+
// dl_iterate_phdr does the necessary synchronization to prevent problems
29+
// with concurrent access via the libc load lock. Adding synchronization
30+
// for other uses is possible, but not currently done.
31+
32+
class _LIBUNWIND_HIDDEN FrameHeaderCache {
33+
struct CacheEntry {
34+
uintptr_t LowPC() { return Info.dso_base; };
35+
uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
36+
UnwindInfoSections Info;
37+
CacheEntry *Next;
38+
};
39+
40+
static const size_t kCacheEntryCount = 8;
41+
42+
// Can't depend on the C++ standard library in libunwind, so use an array to
43+
// allocate the entries, and two linked lists for ordering unused and recently
44+
// used entries. FIXME: Would the the extra memory for a doubly-linked list
45+
// be better than the runtime cost of traversing a very short singly-linked
46+
// list on a cache miss? The entries themselves are all small and consecutive,
47+
// so unlikely to cause page faults when following the pointers. The memory
48+
// spent on additional pointers could also be spent on more entries.
49+
50+
CacheEntry Entries[kCacheEntryCount];
51+
CacheEntry *MostRecentlyUsed;
52+
CacheEntry *Unused;
53+
54+
void resetCache() {
55+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
56+
MostRecentlyUsed = nullptr;
57+
Unused = &Entries[0];
58+
for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
59+
Entries[i].Next = &Entries[i + 1];
60+
}
61+
Entries[kCacheEntryCount - 1].Next = nullptr;
62+
}
63+
64+
bool cacheNeedsReset(dl_phdr_info *PInfo) {
65+
// C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
66+
// loading and unloading shared libraries. If these values change between
67+
// iterations of dl_iterate_phdr, then invalidate the cache.
68+
69+
// These are static to avoid needing an initializer, and unsigned long long
70+
// because that is their type within the extended dl_phdr_info. Initialize
71+
// these to something extremely unlikely to be found upon the first call to
72+
// dl_iterate_phdr.
73+
static unsigned long long LastAdds = ULLONG_MAX;
74+
static unsigned long long LastSubs = ULLONG_MAX;
75+
if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
76+
// Resetting the entire cache is a big hammer, but this path is rare--
77+
// usually just on the very first call, when the cache is empty anyway--so
78+
// added complexity doesn't buy much.
79+
LastAdds = PInfo->dlpi_adds;
80+
LastSubs = PInfo->dlpi_subs;
81+
resetCache();
82+
return true;
83+
}
84+
return false;
85+
}
86+
87+
public:
88+
bool find(dl_phdr_info *PInfo, size_t, void *data) {
89+
if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
90+
return false;
91+
92+
auto *CBData = static_cast<dl_iterate_cb_data *>(data);
93+
CacheEntry *Current = MostRecentlyUsed;
94+
CacheEntry *Previous = nullptr;
95+
while (Current != nullptr) {
96+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
97+
"FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
98+
Current->LowPC(), Current->HighPC());
99+
if (Current->LowPC() <= CBData->targetAddr &&
100+
CBData->targetAddr < Current->HighPC()) {
101+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE(
102+
"FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
103+
Current->LowPC(), Current->HighPC());
104+
if (Previous) {
105+
// If there is no Previous, then Current is already the
106+
// MostRecentlyUsed, and no need to move it up.
107+
Previous->Next = Current->Next;
108+
Current->Next = MostRecentlyUsed;
109+
MostRecentlyUsed = Current;
110+
}
111+
*CBData->sects = Current->Info;
112+
return true;
113+
}
114+
Previous = Current;
115+
Current = Current->Next;
116+
}
117+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
118+
CBData->targetAddr);
119+
return false;
120+
}
121+
122+
void add(const UnwindInfoSections *UIS) {
123+
CacheEntry *Current = nullptr;
124+
125+
if (Unused != nullptr) {
126+
Current = Unused;
127+
Unused = Unused->Next;
128+
} else {
129+
Current = MostRecentlyUsed;
130+
CacheEntry *Previous = nullptr;
131+
while (Current->Next != nullptr) {
132+
Previous = Current;
133+
Current = Current->Next;
134+
}
135+
Previous->Next = nullptr;
136+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
137+
Current->LowPC(), Current->HighPC());
138+
}
139+
140+
Current->Info = *UIS;
141+
Current->Next = MostRecentlyUsed;
142+
MostRecentlyUsed = Current;
143+
_LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
144+
MostRecentlyUsed->LowPC(),
145+
MostRecentlyUsed->HighPC());
146+
}
147+
};
148+
149+
#endif // __FRAMEHEADER_CACHE_HPP__
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// The other libunwind tests don't test internal interfaces, so the include path
2+
// is a little wonky.
3+
#include "../src/config.h"
4+
5+
// Only run this test under supported configurations.
6+
// This #if chain is ugly, but see the comments in AddressSpace.hpp for
7+
// the reasoning.
8+
9+
#ifdef __APPLE__
10+
int main() { return 0; }
11+
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
12+
int main() { return 0; }
13+
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
14+
int main() { return 0; }
15+
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
16+
int main() { return 0; }
17+
#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
18+
int main() { return 0; }
19+
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__)
20+
int main() { return 0; }
21+
#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
22+
23+
#include <link.h>
24+
#include <stdio.h>
25+
26+
// This file defines several of the data structures needed here,
27+
// and includes FrameHeaderCache.hpp as well.
28+
#include "../src/AddressSpace.hpp"
29+
30+
#define kBaseAddr 0xFFF000
31+
#define kDwarfSectionLength 0xFF
32+
33+
using namespace libunwind;
34+
35+
int main() {
36+
FrameHeaderCache FHC;
37+
struct dl_phdr_info PInfo;
38+
memset(&PInfo, 0, sizeof(PInfo));
39+
// The cache itself should only care about these two fields--they
40+
// tell the cache to invalidate or not; everything else is handled
41+
// by AddressSpace.hpp.
42+
PInfo.dlpi_adds = 6;
43+
PInfo.dlpi_subs = 7;
44+
45+
UnwindInfoSections UIS;
46+
UIS.dso_base = kBaseAddr;
47+
UIS.dwarf_section_length = kDwarfSectionLength;
48+
dl_iterate_cb_data CBData;
49+
// Unused by the cache.
50+
CBData.addressSpace = nullptr;
51+
CBData.sects = &UIS;
52+
CBData.targetAddr = kBaseAddr + 1;
53+
54+
// Nothing present, shouldn't find.
55+
if (FHC.find(&PInfo, 0, &CBData))
56+
abort();
57+
FHC.add(&UIS);
58+
// Just added. Should find.
59+
if (!FHC.find(&PInfo, 0, &CBData))
60+
abort();
61+
// Cache is invalid. Shouldn't find.
62+
PInfo.dlpi_adds++;
63+
if (FHC.find(&PInfo, 0, &CBData))
64+
abort();
65+
66+
FHC.add(&UIS);
67+
CBData.targetAddr = kBaseAddr - 1;
68+
// Shouldn't find something outside of the addresses.
69+
if (FHC.find(&PInfo, 0, &CBData))
70+
abort();
71+
// Add enough things to the cache that the entry is evicted.
72+
for (int i = 0; i < 9; i++) {
73+
UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i);
74+
FHC.add(&UIS);
75+
}
76+
CBData.targetAddr = kBaseAddr;
77+
// Should have been evicted.
78+
if (FHC.find(&PInfo, 0, &CBData))
79+
abort();
80+
return 0;
81+
}
82+
#endif

0 commit comments

Comments
 (0)