Skip to content

Commit 76a14bb

Browse files
committed
[PDB] Add public symbol lookup by address
1 parent 507ff08 commit 76a14bb

File tree

6 files changed

+218
-0
lines changed

6 files changed

+218
-0
lines changed

llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@ namespace llvm {
1818
namespace msf {
1919
class MappedBlockStream;
2020
}
21+
namespace codeview {
22+
class PublicSym32;
23+
}
2124
namespace pdb {
2225
struct PublicsStreamHeader;
2326
struct SectionOffset;
27+
class SymbolStream;
2428

2529
class PublicsStream {
2630
public:
@@ -42,6 +46,20 @@ class PublicsStream {
4246
return SectionOffsets;
4347
}
4448

49+
/// Find a public symbol by a segment and offset.
50+
///
51+
/// In case there is more than one symbol (for example due to ICF), the first
52+
/// one is returned.
53+
///
54+
/// \return If a symbol was found, the symbol at the provided address is
55+
/// returned as well as the index of this symbol in the address map. If
56+
/// the binary was linked with ICF, there might be more symbols with the
57+
/// same address after the returned one. If no symbol is found,
58+
/// `std::nullopt` is returned.
59+
LLVM_ABI std::optional<std::pair<codeview::PublicSym32, size_t>>
60+
findByAddress(const SymbolStream &Symbols, uint16_t Segment,
61+
uint32_t Offset) const;
62+
4563
private:
4664
std::unique_ptr<msf::MappedBlockStream> Stream;
4765
GSIHashTable PublicsTable;

llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222
//===----------------------------------------------------------------------===//
2323

2424
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
25+
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
26+
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
2527
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
2628
#include "llvm/DebugInfo/PDB/Native/RawError.h"
2729
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
30+
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
2831
#include "llvm/Support/BinaryStreamReader.h"
2932
#include "llvm/Support/Error.h"
3033
#include <cstdint>
@@ -96,3 +99,91 @@ Error PublicsStream::reload() {
9699
"Corrupted publics stream.");
97100
return Error::success();
98101
}
102+
103+
static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset,
104+
uint16_t RhsSegment, uint32_t RhsOffset) {
105+
if (LhsSegment == RhsSegment)
106+
return LhsOffset - RhsOffset;
107+
return LhsSegment - RhsSegment;
108+
}
109+
110+
static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst,
111+
const codeview::PublicSym32 &Rhs) {
112+
return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset);
113+
}
114+
115+
// This is a reimplementation of NearestSym:
116+
// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
117+
std::optional<std::pair<codeview::PublicSym32, size_t>>
118+
PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
119+
uint32_t Offset) const {
120+
// The address map is sorted by address, so we do binary search.
121+
// Each element is an offset into the symbols for a public symbol.
122+
auto Lo = AddressMap.begin();
123+
auto Hi = AddressMap.end();
124+
Hi -= 1;
125+
126+
while (Lo < Hi) {
127+
auto Cur = Lo + ((Hi - Lo + 1) / 2);
128+
auto Sym = Symbols.readRecord(Cur->value());
129+
if (Sym.kind() != codeview::S_PUB32)
130+
return std::nullopt; // this is most likely corrupted debug info
131+
132+
auto Psym =
133+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
134+
if (!Psym) {
135+
consumeError(Psym.takeError());
136+
return std::nullopt;
137+
}
138+
139+
uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym);
140+
if (Cmp < 0) {
141+
Cur -= 1;
142+
Hi = Cur;
143+
} else if (Cmp == 0)
144+
Lo = Hi = Cur;
145+
else
146+
Lo = Cur;
147+
}
148+
149+
auto Sym = Symbols.readRecord(Lo->value());
150+
if (Sym.kind() != codeview::S_PUB32)
151+
return std::nullopt; // this is most likely corrupted debug info
152+
153+
auto MaybePsym =
154+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
155+
if (!MaybePsym) {
156+
consumeError(MaybePsym.takeError());
157+
return std::nullopt;
158+
}
159+
codeview::PublicSym32 Psym = std::move(*MaybePsym);
160+
161+
uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym);
162+
if (Cmp != 0)
163+
return std::nullopt;
164+
165+
// We found a symbol. Due to ICF, multiple symbols can have the same
166+
// address, so return the first one
167+
while (Lo != AddressMap.begin()) {
168+
--Lo;
169+
Sym = Symbols.readRecord(Lo->value());
170+
if (Sym.kind() != codeview::S_PUB32)
171+
return std::nullopt;
172+
MaybePsym =
173+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
174+
if (!MaybePsym) {
175+
consumeError(MaybePsym.takeError());
176+
return std::nullopt;
177+
}
178+
179+
if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) {
180+
++Lo;
181+
break;
182+
}
183+
184+
Psym = std::move(*MaybePsym);
185+
}
186+
187+
std::ptrdiff_t IterOffset = Lo - AddressMap.begin();
188+
return std::pair{Psym, static_cast<size_t>(IterOffset)};
189+
}

llvm/unittests/DebugInfo/PDB/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests
1111
StringTableBuilderTest.cpp
1212
PDBApiTest.cpp
1313
PDBVariantTest.cpp
14+
PublicsStreamTest.cpp
1415
)
1516

1617
target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// clang-format off
2+
3+
// Compile with
4+
// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy
5+
// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF
6+
// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml
7+
// llvm-pdbutil yaml2pdb PublicSymbols.yaml
8+
//
9+
// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml
10+
11+
int foobar(int i){ return i + 1; }
12+
// these should be merged with ICF
13+
int dup1(int i){ return i + 2; }
14+
int dup2(int i){ return i + 2; }
15+
int dup3(int i){ return i + 2; }
16+
17+
class AClass {
18+
public:
19+
void AMethod(int, char*) {}
20+
static bool Something(char c) {
21+
return c == ' ';
22+
}
23+
};
24+
25+
struct Base {
26+
virtual ~Base() = default;
27+
};
28+
struct Derived : public Base {};
29+
struct Derived2 : public Base {};
30+
struct Derived3 : public Derived2, public Derived {};
31+
32+
int AGlobal;
33+
34+
void operator delete(void *,unsigned __int64) {}
35+
36+
int main() {
37+
foobar(1);
38+
dup1(1);
39+
dup2(1);
40+
dup3(1);
41+
AClass a;
42+
a.AMethod(1, nullptr);
43+
AClass::Something(' ');
44+
Derived3 d3;
45+
return AGlobal;
46+
}
52 KB
Binary file not shown.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
10+
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
11+
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
12+
#include "llvm/Support/BinaryByteStream.h"
13+
#include "llvm/Support/MemoryBuffer.h"
14+
15+
#include "llvm/Testing/Support/SupportHelpers.h"
16+
17+
#include "gtest/gtest.h"
18+
19+
using namespace llvm;
20+
using namespace llvm::pdb;
21+
22+
extern const char *TestMainArgv0;
23+
24+
static std::string getExePath() {
25+
SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
26+
llvm::sys::path::append(InputsDir, "PublicSymbols.pdb");
27+
return std::string(InputsDir);
28+
}
29+
30+
TEST(PublicsStreamTest, FindByAddress) {
31+
std::string ExePath = getExePath();
32+
auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false,
33+
/*RequiresNullTerminator=*/false);
34+
ASSERT_TRUE(bool(Buffer));
35+
auto Stream = std::make_unique<MemoryBufferByteStream>(
36+
std::move(*Buffer), llvm::endianness::little);
37+
38+
BumpPtrAllocator Alloc;
39+
PDBFile File(ExePath, std::move(Stream), Alloc);
40+
ASSERT_FALSE(bool(File.parseFileHeaders()));
41+
ASSERT_FALSE(bool(File.parseStreamData()));
42+
43+
auto Publics = File.getPDBPublicsStream();
44+
ASSERT_TRUE(bool(Publics));
45+
auto Symbols = File.getPDBSymbolStream();
46+
ASSERT_TRUE(bool(Symbols));
47+
48+
auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
49+
ASSERT_TRUE(VTableDerived.has_value());
50+
// both derived and derived2 have their vftables there - but derived2 is first
51+
// (due to ICF)
52+
ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
53+
ASSERT_EQ(VTableDerived->second, 26);
54+
55+
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
56+
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());
57+
58+
auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
59+
ASSERT_TRUE(GlobalSym.has_value());
60+
ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
61+
ASSERT_EQ(GlobalSym->second, 30);
62+
}

0 commit comments

Comments
 (0)