Skip to content

Commit 8636efe

Browse files
[DebugNames] Implement per-entry abbreviation support
Prior to this commit, the choice of forms to use in debug_names abbreviations was made independently of entries themselves. For example, if we wanted entries to have a "IDX_compile_unit", all entries needed to use the same form (e.g data2) for such IDX, even if a specific entry could fit its IDX in a single byte (e.g. data1). This commit changes that by creating an `AbbreviationContents` data structure to handle the encoding of abbreviations on a per-entry basis. We don't need the full generality of abbreviations from the debug_info section -- abbreviations in debug_names are more limited --, but this commit is desirable for two reasons: 1. Upcoming patches may want to use different forms for different entries (there is ongoing work for IDX_parent that will benefit from this) 2. This allows for space savings by using smaller forms. A few tests had to be updated, as this commit changes the abbreviation number (and therefore the order in which abbreviations are printed). Notably, this _decreases_ the abbreviation number used, which may lead to smaller ULEB encodings. A future patch should just renumber these to start with 1.
1 parent acdf40c commit 8636efe

File tree

6 files changed

+173
-83
lines changed

6 files changed

+173
-83
lines changed

llvm/include/llvm/CodeGen/AccelTable.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,9 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
305305
TUVectorTy TUSymbolsOrHashes;
306306

307307
public:
308-
struct UnitIndexAndEncoding {
308+
struct UnitIndex {
309309
unsigned Index;
310-
DWARF5AccelTableData::AttributeEncoding Encoding;
310+
bool IsType;
311311
};
312312
/// Returns type units that were constructed.
313313
const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
@@ -366,7 +366,7 @@ void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
366366
void emitDWARF5AccelTable(
367367
AsmPrinter *Asm, DWARF5AccelTable &Contents,
368368
ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
369-
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
369+
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
370370
const DWARF5AccelTableData &)>
371371
getIndexForEntry);
372372

llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp

Lines changed: 145 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,120 @@ class AppleAccelTableWriter : public AccelTableWriter {
179179
#endif
180180
};
181181

182+
/// Helper enum to encode whether something is a Type or Compile unit (or none)
183+
/// with the minimal number of bits.
184+
enum class TypeOrCompileUnit : uint8_t { Compile = 0, Type = 1, None = 2 };
185+
186+
/// Helper enum to encode DW_FORM_data{1,2,4,8} with the minimal number of
187+
/// bits.
188+
enum class FormDataLength : uint8_t {
189+
Data1 = 0,
190+
Data2 = 1,
191+
Data4 = 2,
192+
Data8 = 3
193+
};
194+
195+
/// Maps `Index` into the smallest DW_FORM_data{1,2,4,8} that can represent it,
196+
/// and returns the corresponding `FormDataLength`.
197+
static FormDataLength getFormForIndex(uint32_t Index) {
198+
dwarf::Form Form = DIEInteger::BestForm(false /*IsSigned*/, Index);
199+
switch (Form) {
200+
case dwarf::Form::DW_FORM_data1:
201+
return FormDataLength::Data1;
202+
case dwarf::Form::DW_FORM_data2:
203+
return FormDataLength::Data2;
204+
case dwarf::Form::DW_FORM_data4:
205+
return FormDataLength::Data4;
206+
case dwarf::Form::DW_FORM_data8:
207+
return FormDataLength::Data8;
208+
default:
209+
llvm_unreachable("invalid getFormForIndex");
210+
}
211+
}
212+
213+
/// Maps a `FormDataLength` back to the corresponding DW_FORM_data{1,2,4,8}
214+
static dwarf::Form toDwarfDataForm(FormDataLength DataLength) {
215+
switch (DataLength) {
216+
case FormDataLength::Data1:
217+
return dwarf::Form::DW_FORM_data1;
218+
case FormDataLength::Data2:
219+
return dwarf::Form::DW_FORM_data2;
220+
case FormDataLength::Data4:
221+
return dwarf::Form::DW_FORM_data4;
222+
case FormDataLength::Data8:
223+
return dwarf::Form::DW_FORM_data8;
224+
}
225+
llvm_unreachable("invalid toDwarfDataForm");
226+
}
227+
228+
/// Converts `UnitType` and `UnitForm` into an `AttributeEncoding` and push it
229+
/// into `Ans`, if UnitType != None.
230+
void pushTypeOrCompileUnitEncondings(
231+
SmallVectorImpl<DWARF5AccelTableData::AttributeEncoding> &Vec,
232+
TypeOrCompileUnit UnitType, FormDataLength UnitForm) {
233+
switch (UnitType) {
234+
case TypeOrCompileUnit::Compile:
235+
Vec.push_back({dwarf::DW_IDX_compile_unit, toDwarfDataForm(UnitForm)});
236+
break;
237+
case TypeOrCompileUnit::Type:
238+
Vec.push_back({dwarf::DW_IDX_type_unit, toDwarfDataForm((UnitForm))});
239+
break;
240+
case TypeOrCompileUnit::None:
241+
break;
242+
}
243+
return;
244+
}
245+
246+
/// Represent the contents of an Abbreviation Entry for a DWARF5AccelTable, so
247+
/// that a set of such entries may be created.
248+
/// All Abbreviations have some common content:
249+
/// 1. IDX_die_offset, with form ref4.
250+
/// 2. IDX_{type, compile}_unit with a form DW_FORM_data{1,2,4,8}.
251+
/// This class doesn't encode such common elements; instead, it only encodes
252+
/// contents that may change from one abbreviation to another, as those define
253+
/// the uniqueness of each abbreviation.
254+
/// Common elements still show up in accessor methods (e.g. `getAttrEncodings`).
255+
struct AbbreviationContents {
256+
uint16_t Tag : 16;
257+
TypeOrCompileUnit UnitType : 2;
258+
FormDataLength UnitForm : 2;
259+
260+
AbbreviationContents(uint16_t Tag, TypeOrCompileUnit UnitType,
261+
uint32_t UnitIndex)
262+
: Tag(Tag), UnitType(UnitType), UnitForm(getFormForIndex(UnitIndex)) {}
263+
AbbreviationContents(uint16_t Tag)
264+
: Tag(Tag), UnitType(TypeOrCompileUnit::None),
265+
UnitForm(getFormForIndex(0)) {}
266+
267+
uint32_t getUniqueCode() const {
268+
uint32_t Bitfield = static_cast<uint32_t>(UnitType);
269+
Bitfield |= static_cast<uint32_t>(UnitForm) << 2;
270+
Bitfield |= static_cast<uint32_t>(Tag) << 4;
271+
return Bitfield;
272+
}
273+
274+
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>
275+
getAttrEncodings() const {
276+
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> Ans;
277+
pushTypeOrCompileUnitEncondings(Ans, UnitType, UnitForm);
278+
Ans.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
279+
return Ans;
280+
}
281+
};
282+
283+
struct AbbreviationContentsInfo {
284+
static AbbreviationContents getEmptyKey() { return AbbreviationContents(0); };
285+
static AbbreviationContents getTombstoneKey() {
286+
return AbbreviationContents(~0);
287+
};
288+
static uint32_t getHashValue(AbbreviationContents Contents) {
289+
return Contents.getUniqueCode();
290+
}
291+
static bool isEqual(AbbreviationContents LHS, AbbreviationContents RHS) {
292+
return LHS.Tag == RHS.Tag && LHS.UnitType == RHS.UnitType;
293+
}
294+
};
295+
182296
/// Class responsible for emitting a DWARF v5 Accelerator Table. The only
183297
/// public function is emit(), which performs the actual emission.
184298
///
@@ -207,11 +321,10 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
207321
};
208322

209323
Header Header;
210-
DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>>
211-
Abbreviations;
324+
DenseSet<AbbreviationContents, AbbreviationContentsInfo> Abbreviations;
212325
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
213326
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
214-
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
327+
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
215328
const DWARF5AccelTableData &)>
216329
getIndexForEntry;
217330
MCSymbol *ContributionEnd = nullptr;
@@ -236,7 +349,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
236349
AsmPrinter *Asm, const AccelTableBase &Contents,
237350
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
238351
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
239-
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
352+
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
240353
const DWARF5AccelTableData &)>
241354
getIndexForEntry,
242355
bool IsSplitDwarf);
@@ -395,39 +508,27 @@ void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
395508
Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
396509
}
397510

398-
static uint32_t constexpr LowerBitSize = dwarf::DW_IDX_type_hash;
399-
static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
400-
return AbbrvTag >> LowerBitSize;
401-
}
402-
403-
/// Constructs a unique AbbrevTag that captures what a DIE accesses.
404-
/// Using this tag we can emit a unique abbreviation for each DIE.
405-
static uint32_t constructAbbreviationTag(
511+
static AbbreviationContents constructAbbreviationContents(
406512
const unsigned Tag,
407-
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) {
408-
uint32_t AbbrvTag = 0;
409-
if (EntryRet)
410-
AbbrvTag |= 1 << EntryRet->Encoding.Index;
411-
AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
412-
AbbrvTag |= Tag << LowerBitSize;
413-
return AbbrvTag;
513+
const std::optional<DWARF5AccelTable::UnitIndex> &EntryRet) {
514+
if (!EntryRet)
515+
return AbbreviationContents(Tag);
516+
return AbbreviationContents(Tag,
517+
EntryRet->IsType ? TypeOrCompileUnit::Type
518+
: TypeOrCompileUnit::Compile,
519+
EntryRet->Index);
414520
}
521+
415522
void Dwarf5AccelTableWriter::populateAbbrevsMap() {
416523
for (auto &Bucket : Contents.getBuckets()) {
417524
for (auto *Hash : Bucket) {
418525
for (auto *Value : Hash->Values) {
419-
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
526+
std::optional<DWARF5AccelTable::UnitIndex> EntryRet =
420527
getIndexForEntry(*static_cast<const DWARF5AccelTableData *>(Value));
421528
unsigned Tag =
422529
static_cast<const DWARF5AccelTableData *>(Value)->getDieTag();
423-
uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
424-
if (Abbreviations.count(AbbrvTag) == 0) {
425-
SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
426-
if (EntryRet)
427-
UA.push_back(EntryRet->Encoding);
428-
UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
429-
Abbreviations.try_emplace(AbbrvTag, UA);
430-
}
530+
auto AbbrevContents = constructAbbreviationContents(Tag, EntryRet);
531+
Abbreviations.insert(AbbrevContents);
431532
}
432533
}
433534
}
@@ -479,12 +580,11 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
479580
Asm->OutStreamer->emitLabel(AbbrevStart);
480581
for (const auto &Abbrev : Abbreviations) {
481582
Asm->OutStreamer->AddComment("Abbrev code");
482-
uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
483-
assert(Tag != 0);
484-
Asm->emitULEB128(Abbrev.first);
485-
Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
486-
Asm->emitULEB128(Tag);
487-
for (const auto &AttrEnc : Abbrev.second) {
583+
assert(Abbrev.Tag != 0);
584+
Asm->emitULEB128(Abbrev.getUniqueCode());
585+
Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.Tag));
586+
Asm->emitULEB128(Abbrev.Tag);
587+
for (const auto &AttrEnc : Abbrev.getAttrEncodings()) {
488588
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
489589
Asm->emitULEB128(AttrEnc.Form,
490590
dwarf::FormEncodingString(AttrEnc.Form).data());
@@ -498,17 +598,14 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
498598

499599
void Dwarf5AccelTableWriter::emitEntry(
500600
const DWARF5AccelTableData &Entry) const {
501-
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
502-
getIndexForEntry(Entry);
503-
uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet);
504-
auto AbbrevIt = Abbreviations.find(AbbrvTag);
505-
assert(AbbrevIt != Abbreviations.end() &&
601+
std::optional<DWARF5AccelTable::UnitIndex> EntryRet = getIndexForEntry(Entry);
602+
auto AbbrevContents =
603+
constructAbbreviationContents(Entry.getDieTag(), EntryRet);
604+
assert(Abbreviations.contains(AbbrevContents) &&
506605
"Why wasn't this abbrev generated?");
507-
assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
508-
"Invalid Tag");
509-
Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
606+
Asm->emitULEB128(AbbrevContents.getUniqueCode(), "Abbreviation code");
510607

511-
for (const auto &AttrEnc : AbbrevIt->second) {
608+
for (const auto &AttrEnc : AbbrevContents.getAttrEncodings()) {
512609
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
513610
switch (AttrEnc.Index) {
514611
case dwarf::DW_IDX_compile_unit:
@@ -545,7 +642,7 @@ Dwarf5AccelTableWriter::Dwarf5AccelTableWriter(
545642
AsmPrinter *Asm, const AccelTableBase &Contents,
546643
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
547644
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
548-
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
645+
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
549646
const DWARF5AccelTableData &)>
550647
getIndexForEntry,
551648
bool IsSplitDwarf)
@@ -620,20 +717,14 @@ void llvm::emitDWARF5AccelTable(
620717
Asm->getObjFileLowering().getDwarfDebugNamesSection());
621718

622719
Contents.finalize(Asm, "names");
623-
dwarf::Form CUIndexForm =
624-
DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1);
625-
dwarf::Form TUIndexForm =
626-
DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1);
627720
Dwarf5AccelTableWriter(
628721
Asm, Contents, CompUnits, TypeUnits,
629722
[&](const DWARF5AccelTableData &Entry)
630-
-> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
723+
-> std::optional<DWARF5AccelTable::UnitIndex> {
631724
if (Entry.isTU())
632-
return {{TUIndex[Entry.getUnitID()],
633-
{dwarf::DW_IDX_type_unit, TUIndexForm}}};
725+
return {{TUIndex[Entry.getUnitID()], true /*IsType*/}};
634726
if (CUIndex.size() > 1)
635-
return {{CUIndex[Entry.getUnitID()],
636-
{dwarf::DW_IDX_compile_unit, CUIndexForm}}};
727+
return {{CUIndex[Entry.getUnitID()], false /*IsType*/}};
637728
return std::nullopt;
638729
},
639730
DD.useSplitDwarf())
@@ -651,7 +742,7 @@ void DWARF5AccelTable::addTypeUnitSignature(DwarfTypeUnit &U) {
651742
void llvm::emitDWARF5AccelTable(
652743
AsmPrinter *Asm, DWARF5AccelTable &Contents,
653744
ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
654-
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
745+
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
655746
const DWARF5AccelTableData &)>
656747
getIndexForEntry) {
657748
std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;

llvm/lib/DWARFLinker/DWARFStreamer.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,18 +306,15 @@ void DwarfStreamer::emitDebugNames(DWARF5AccelTable &Table) {
306306
}
307307

308308
Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
309-
dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false,
310-
(uint64_t)UniqueIdToCuMap.size() - 1);
311309
/// llvm-dwarfutil doesn't support type units + .debug_names right now.
312310
// FIXME: add support for type units + .debug_names. For now the behavior is
313311
// unsuported.
314312
emitDWARF5AccelTable(
315313
Asm.get(), Table, CompUnits,
316314
[&](const DWARF5AccelTableData &Entry)
317-
-> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
315+
-> std::optional<DWARF5AccelTable::UnitIndex> {
318316
if (UniqueIdToCuMap.size() > 1)
319-
return {{UniqueIdToCuMap[Entry.getUnitID()],
320-
{dwarf::DW_IDX_compile_unit, Form}}};
317+
return {{UniqueIdToCuMap[Entry.getUnitID()], false /*IsType*/}};
321318
return std::nullopt;
322319
});
323320
}

llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,17 +230,14 @@ void DwarfEmitterImpl::emitDebugNames(DWARF5AccelTable &Table,
230230
return;
231231

232232
Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
233-
dwarf::Form Form =
234-
DIEInteger::BestForm(/*IsSigned*/ false, (uint64_t)CUidToIdx.size() - 1);
235233
// FIXME: add support for type units + .debug_names. For now the behavior is
236234
// unsuported.
237235
emitDWARF5AccelTable(
238236
Asm.get(), Table, CUOffsets,
239237
[&](const DWARF5AccelTableData &Entry)
240-
-> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
238+
-> std::optional<DWARF5AccelTable::UnitIndex> {
241239
if (CUidToIdx.size() > 1)
242-
return {{CUidToIdx[Entry.getUnitID()],
243-
{dwarf::DW_IDX_compile_unit, Form}}};
240+
return {{CUidToIdx[Entry.getUnitID()], false /*IsType*/}};
244241
return std::nullopt;
245242
});
246243
}

llvm/test/DebugInfo/Generic/debug-names-many-cu.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,26 @@
1414
; ...
1515
; CHECK: CU[256]: 0x{{[0-9a-f]*}}
1616

17-
; CHECK: Abbreviation [[ABBREV:0x[0-9a-f]*]]
17+
; CHECK: Abbreviation [[ABBREV_1BYTE:0x[0-9a-f]*]]
18+
; CHECK-NEXT: Tag: DW_TAG_variable
19+
; CHECK-NEXT: DW_IDX_compile_unit: DW_FORM_data1
20+
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
21+
22+
; CHECK: Abbreviation [[ABBREV_2BYTES:0x[0-9a-f]*]]
1823
; CHECK-NEXT: Tag: DW_TAG_variable
1924
; CHECK-NEXT: DW_IDX_compile_unit: DW_FORM_data2
2025
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
2126

2227
; CHECK: String: 0x{{[0-9a-f]*}} "foobar1"
2328
; CHECK-NEXT: Entry
24-
; CHECK-NEXT: Abbrev: [[ABBREV]]
29+
; CHECK-NEXT: Abbrev: [[ABBREV_1BYTE]]
2530
; CHECK-NEXT: Tag: DW_TAG_variable
26-
; CHECK-NEXT: DW_IDX_compile_unit: 0x0000
31+
; CHECK-NEXT: DW_IDX_compile_unit: 0x00
2732
; CHECK-NEXT: DW_IDX_die_offset: 0x{{[0-9a-f]*}}
2833

2934
; CHECK: String: 0x{{[0-9a-f]*}} "foobar257"
3035
; CHECK-NEXT: Entry
31-
; CHECK-NEXT: Abbrev: [[ABBREV]]
36+
; CHECK-NEXT: Abbrev: [[ABBREV_2BYTES]]
3237
; CHECK-NEXT: Tag: DW_TAG_variable
3338
; CHECK-NEXT: DW_IDX_compile_unit: 0x0100
3439
; CHECK-NEXT: DW_IDX_die_offset: 0x{{[0-9a-f]*}}

0 commit comments

Comments
 (0)