Skip to content

Commit c82d09c

Browse files
authored
[PDB] Add public symbol lookup by address (#157361)
This adds a method on the `PublicsStream` to look up symbols using their address (segment + offset). It's largely a reimplementation of [`NearestSym`](https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581) from the reference. However, we don't return the nearest symbol, but the exact symbol. Still, in case of ICF, we return the symbol that's first in the address map. Users can then use the returned offset to read the next records to check if multiple symbols overlap, if desired. From #149701.
1 parent e92cbfb commit c82d09c

File tree

4 files changed

+295
-0
lines changed

4 files changed

+295
-0
lines changed

llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@ namespace llvm {
1818
namespace msf {
1919
class MappedBlockStream;
2020
}
21+
namespace codeview {
22+
class PublicSym32;
23+
}
2124
namespace pdb {
2225
struct PublicsStreamHeader;
2326
struct SectionOffset;
27+
class SymbolStream;
2428

2529
class PublicsStream {
2630
public:
@@ -42,6 +46,20 @@ class PublicsStream {
4246
return SectionOffsets;
4347
}
4448

49+
/// Find a public symbol by a segment and offset.
50+
///
51+
/// In case there is more than one symbol (for example due to ICF), the first
52+
/// one is returned.
53+
///
54+
/// \return If a symbol was found, the symbol at the provided address is
55+
/// returned as well as the index of this symbol in the address map. If
56+
/// the binary was linked with ICF, there might be more symbols with the
57+
/// same address after the returned one. If no symbol is found,
58+
/// `std::nullopt` is returned.
59+
LLVM_ABI std::optional<std::pair<codeview::PublicSym32, size_t>>
60+
findByAddress(const SymbolStream &Symbols, uint16_t Segment,
61+
uint32_t Offset) const;
62+
4563
private:
4664
std::unique_ptr<msf::MappedBlockStream> Stream;
4765
GSIHashTable PublicsTable;

llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222
//===----------------------------------------------------------------------===//
2323

2424
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
25+
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
26+
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
2527
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
2628
#include "llvm/DebugInfo/PDB/Native/RawError.h"
2729
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
30+
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
2831
#include "llvm/Support/BinaryStreamReader.h"
2932
#include "llvm/Support/Error.h"
3033
#include <cstdint>
@@ -96,3 +99,50 @@ Error PublicsStream::reload() {
9699
"Corrupted publics stream.");
97100
return Error::success();
98101
}
102+
103+
// This is a reimplementation of NearestSym:
104+
// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
105+
std::optional<std::pair<codeview::PublicSym32, size_t>>
106+
PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
107+
uint32_t Offset) const {
108+
// The address map is sorted by address, so we can use lower_bound to find the
109+
// position. Each element is an offset into the symbols for a public symbol.
110+
auto It = llvm::lower_bound(
111+
AddressMap, std::tuple(Segment, Offset),
112+
[&](support::ulittle32_t Cur, auto Addr) {
113+
auto Sym = Symbols.readRecord(Cur.value());
114+
if (Sym.kind() != codeview::S_PUB32)
115+
return false; // stop here, this is most likely corrupted debug info
116+
117+
auto Psym =
118+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(
119+
Sym);
120+
if (!Psym) {
121+
consumeError(Psym.takeError());
122+
return false;
123+
}
124+
125+
return std::tie(Psym->Segment, Psym->Offset) < Addr;
126+
});
127+
128+
if (It == AddressMap.end())
129+
return std::nullopt;
130+
131+
auto Sym = Symbols.readRecord(It->value());
132+
if (Sym.kind() != codeview::S_PUB32)
133+
return std::nullopt; // this is most likely corrupted debug info
134+
135+
auto MaybePsym =
136+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
137+
if (!MaybePsym) {
138+
consumeError(MaybePsym.takeError());
139+
return std::nullopt;
140+
}
141+
codeview::PublicSym32 Psym = std::move(*MaybePsym);
142+
143+
if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset))
144+
return std::nullopt;
145+
146+
std::ptrdiff_t IterOffset = It - AddressMap.begin();
147+
return std::pair{Psym, static_cast<size_t>(IterOffset)};
148+
}

llvm/unittests/DebugInfo/PDB/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests
1111
StringTableBuilderTest.cpp
1212
PDBApiTest.cpp
1313
PDBVariantTest.cpp
14+
PublicsStreamTest.cpp
1415
)
1516

1617
target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport)
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
10+
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
11+
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
12+
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
13+
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14+
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
15+
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
16+
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
17+
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
18+
#include "llvm/Support/BinaryByteStream.h"
19+
20+
#include "gtest/gtest.h"
21+
22+
using namespace llvm;
23+
using namespace llvm::pdb;
24+
25+
namespace {
26+
struct PublicSym {
27+
llvm::StringRef Name;
28+
uint16_t Segment;
29+
uint32_t Offset;
30+
};
31+
32+
class MockPublics {
33+
public:
34+
MockPublics(size_t StreamSize, BumpPtrAllocator &Alloc,
35+
msf::MSFBuilder Builder);
36+
static Expected<std::unique_ptr<MockPublics>>
37+
create(BumpPtrAllocator &Allocator, size_t StreamSize);
38+
39+
void addPublics(ArrayRef<PublicSym> Syms);
40+
Error finish();
41+
42+
PublicsStream *publicsStream();
43+
SymbolStream *symbolStream();
44+
45+
MutableBinaryByteStream &stream() { return Stream; }
46+
47+
private:
48+
MutableBinaryByteStream Stream;
49+
50+
msf::MSFBuilder MsfBuilder;
51+
std::optional<msf::MSFLayout> MsfLayout;
52+
53+
GSIStreamBuilder Gsi;
54+
55+
std::unique_ptr<PublicsStream> Publics;
56+
std::unique_ptr<SymbolStream> Symbols;
57+
};
58+
59+
MockPublics::MockPublics(size_t StreamSize, BumpPtrAllocator &Allocator,
60+
msf::MSFBuilder Builder)
61+
: Stream({Allocator.Allocate<uint8_t>(StreamSize), StreamSize},
62+
llvm::endianness::little),
63+
MsfBuilder(std::move(Builder)), Gsi(this->MsfBuilder) {}
64+
65+
Expected<std::unique_ptr<MockPublics>>
66+
MockPublics::create(BumpPtrAllocator &Allocator, size_t StreamSize) {
67+
auto ExpectedMsf = msf::MSFBuilder::create(Allocator, 4096);
68+
if (!ExpectedMsf)
69+
return ExpectedMsf.takeError();
70+
return std::make_unique<MockPublics>(StreamSize, Allocator,
71+
std::move(*ExpectedMsf));
72+
}
73+
74+
void MockPublics::addPublics(ArrayRef<PublicSym> Publics) {
75+
std::vector<BulkPublic> Bulks;
76+
for (const auto &Sym : Publics) {
77+
BulkPublic BP;
78+
BP.Name = Sym.Name.data();
79+
BP.NameLen = Sym.Name.size();
80+
BP.Offset = Sym.Offset;
81+
BP.Segment = Sym.Segment;
82+
Bulks.emplace_back(BP);
83+
}
84+
Gsi.addPublicSymbols(std::move(Bulks));
85+
}
86+
87+
Error MockPublics::finish() {
88+
auto Err = Gsi.finalizeMsfLayout();
89+
if (Err)
90+
return Err;
91+
92+
auto ExpectedLayout = MsfBuilder.generateLayout();
93+
if (!ExpectedLayout)
94+
return ExpectedLayout.takeError();
95+
MsfLayout = std::move(*ExpectedLayout);
96+
97+
return Gsi.commit(*MsfLayout, Stream);
98+
}
99+
100+
PublicsStream *MockPublics::publicsStream() {
101+
if (!Publics) {
102+
Publics = std::make_unique<PublicsStream>(
103+
msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
104+
Gsi.getPublicsStreamIndex(),
105+
MsfBuilder.getAllocator()));
106+
}
107+
return Publics.get();
108+
}
109+
110+
SymbolStream *MockPublics::symbolStream() {
111+
if (!Symbols) {
112+
Symbols = std::make_unique<SymbolStream>(
113+
msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
114+
Gsi.getRecordStreamIndex(),
115+
MsfBuilder.getAllocator()));
116+
}
117+
return Symbols.get();
118+
}
119+
120+
std::array GSymbols{
121+
PublicSym{"??0Base@@QEAA@XZ", /*Segment=*/1, /*Offset=*/0},
122+
PublicSym{"??0Derived@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32},
123+
PublicSym{"??0Derived2@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32},
124+
PublicSym{"??0Derived3@@QEAA@XZ", /*Segment=*/1, /*Offset=*/80},
125+
PublicSym{"??1Base@@UEAA@XZ", /*Segment=*/1, /*Offset=*/160},
126+
PublicSym{"??1Derived@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176},
127+
PublicSym{"??1Derived2@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176},
128+
PublicSym{"??1Derived3@@UEAA@XZ", /*Segment=*/1, /*Offset=*/208},
129+
PublicSym{"??3@YAXPEAX_K@Z", /*Segment=*/1, /*Offset=*/256},
130+
PublicSym{"??_EDerived3@@W7EAAPEAXI@Z", /*Segment=*/1, /*Offset=*/268},
131+
PublicSym{"??_GBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288},
132+
PublicSym{"??_EBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288},
133+
PublicSym{"??_EDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
134+
PublicSym{"??_EDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
135+
PublicSym{"??_GDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
136+
PublicSym{"??_GDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352},
137+
PublicSym{"??_EDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416},
138+
PublicSym{"??_GDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416},
139+
PublicSym{"?AMethod@AClass@@QEAAXHPEAD@Z", /*Segment=*/1, /*Offset=*/480},
140+
PublicSym{"?Something@AClass@@SA_ND@Z", /*Segment=*/1, /*Offset=*/496},
141+
PublicSym{"?dup1@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
142+
PublicSym{"?dup3@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
143+
PublicSym{"?dup2@@YAHH@Z", /*Segment=*/1, /*Offset=*/544},
144+
PublicSym{"?foobar@@YAHH@Z", /*Segment=*/1, /*Offset=*/560},
145+
PublicSym{"main", /*Segment=*/1, /*Offset=*/576},
146+
PublicSym{"??_7Base@@6B@", /*Segment=*/2, /*Offset=*/0},
147+
PublicSym{"??_7Derived@@6B@", /*Segment=*/2, /*Offset=*/8},
148+
PublicSym{"??_7Derived2@@6B@", /*Segment=*/2, /*Offset=*/8},
149+
PublicSym{"??_7Derived3@@6BDerived2@@@", /*Segment=*/2, /*Offset=*/16},
150+
PublicSym{"??_7Derived3@@6BDerived@@@", /*Segment=*/2, /*Offset=*/24},
151+
PublicSym{"?AGlobal@@3HA", /*Segment=*/3, /*Offset=*/0},
152+
};
153+
154+
} // namespace
155+
156+
static std::pair<uint32_t, uint32_t>
157+
nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) {
158+
auto Index = Publics->getAddressMap()[N].value();
159+
codeview::CVSymbol Sym = Symbols->readRecord(Index);
160+
auto ExpectedPub =
161+
codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
162+
if (!ExpectedPub)
163+
return std::pair(0, 0);
164+
return std::pair(ExpectedPub->Segment, ExpectedPub->Offset);
165+
}
166+
167+
TEST(PublicsStreamTest, FindByAddress) {
168+
BumpPtrAllocator Allocator;
169+
auto ExpectedMock = MockPublics::create(Allocator, 1 << 20);
170+
ASSERT_TRUE(bool(ExpectedMock));
171+
std::unique_ptr<MockPublics> Mock = std::move(*ExpectedMock);
172+
173+
Mock->addPublics(GSymbols);
174+
Error Err = Mock->finish();
175+
ASSERT_FALSE(Err) << Err;
176+
177+
auto *Publics = Mock->publicsStream();
178+
ASSERT_NE(Publics, nullptr);
179+
Err = Publics->reload();
180+
ASSERT_FALSE(Err) << Err;
181+
182+
auto *Symbols = Mock->symbolStream();
183+
ASSERT_NE(Symbols, nullptr);
184+
Err = Symbols->reload();
185+
ASSERT_FALSE(Err) << Err;
186+
187+
auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
188+
ASSERT_TRUE(VTableDerived.has_value());
189+
// both derived and derived2 have their vftables there - but derived2 is first
190+
// (due to ICF)
191+
ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
192+
ASSERT_EQ(VTableDerived->second, 26u);
193+
194+
// Again, make sure that we find the first symbol
195+
auto VectorDtorDerived = Publics->findByAddress(*Symbols, 1, 352);
196+
ASSERT_TRUE(VectorDtorDerived.has_value());
197+
ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI@Z");
198+
ASSERT_EQ(VectorDtorDerived->second, 12u);
199+
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1u, 352u));
200+
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1u, 352u));
201+
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1u, 352u));
202+
ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1u, 416u));
203+
204+
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
205+
ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());
206+
207+
auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
208+
ASSERT_TRUE(GlobalSym.has_value());
209+
ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
210+
ASSERT_EQ(GlobalSym->second, 30u);
211+
212+
// test corrupt debug info
213+
codeview::CVSymbol GlobalCVSym =
214+
Symbols->readRecord(Publics->getAddressMap()[30]);
215+
ASSERT_EQ(GlobalCVSym.kind(), codeview::S_PUB32);
216+
// CVSymbol::data returns a pointer to const data, so we modify the backing
217+
// data
218+
uint8_t *PDBData = Mock->stream().data().data();
219+
auto Offset = GlobalCVSym.data().data() - PDBData;
220+
reinterpret_cast<codeview::RecordPrefix *>(PDBData + Offset)->RecordKind =
221+
codeview::S_GDATA32;
222+
ASSERT_EQ(GlobalCVSym.kind(), codeview::S_GDATA32);
223+
224+
GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
225+
ASSERT_FALSE(GlobalSym.has_value());
226+
}

0 commit comments

Comments
 (0)