-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[llvm-debuginfo-analyzer] Add support for parsing DWARF / CodeView SourceLanguage #137223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[llvm-debuginfo-analyzer] Add support for parsing DWARF / CodeView SourceLanguage #137223
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-debuginfo Author: Javier Lopez-Gomez (jalopezg-git) ChangesThis pull request adds support for parsing the source language in both DWARF and CodeView. Specifically,
FYI, @CarlosAlbertoEnciso. I believe this patch is ready too; feel free to start reviewing it. Full diff: https://github.com/llvm/llvm-project/pull/137223.diff 8 Files Affected:
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
index 17fa04040ad77..22c24d0c0592c 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
@@ -14,10 +14,13 @@
#ifndef LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
#define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/LogicalView/Core/LVObject.h"
#include "llvm/Support/Casting.h"
#include <map>
#include <set>
+#include <variant>
#include <vector>
namespace llvm {
@@ -64,6 +67,22 @@ using LVElementKindSet = std::set<LVElementKind>;
using LVElementDispatch = std::map<LVElementKind, LVElementGetFunction>;
using LVElementRequest = std::vector<LVElementGetFunction>;
+/// A source language supported by any of the debug info representations.
+struct LVSourceLanguage {
+ LVSourceLanguage() = default;
+ LVSourceLanguage(llvm::dwarf::SourceLanguage SL) : Language(SL) {}
+ LVSourceLanguage(llvm::codeview::SourceLanguage SL) : Language(SL) {}
+
+ bool isValid() const { return Language.index() != 0; }
+ template <typename T> T getAs() { return std::get<T>(Language); }
+ StringRef getName() const;
+
+private:
+ std::variant<std::monostate, llvm::dwarf::SourceLanguage,
+ llvm::codeview::SourceLanguage>
+ Language;
+};
+
class LVElement : public LVObject {
enum class Property {
IsLine, // A logical line.
@@ -214,6 +233,9 @@ class LVElement : public LVObject {
virtual StringRef getProducer() const { return StringRef(); }
virtual void setProducer(StringRef ProducerName) {}
+ virtual LVSourceLanguage getSourceLanguage() const { return {}; }
+ virtual void setSourceLanguage(LVSourceLanguage SL) {}
+
virtual bool isCompileUnit() const { return false; }
virtual bool isRoot() const { return false; }
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
index 1b3c377cd7dbb..378f249029730 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
@@ -407,6 +407,9 @@ class LVScopeCompileUnit final : public LVScope {
// Toolchain producer.
size_t ProducerIndex = 0;
+ // Source language.
+ LVSourceLanguage SourceLanguage{};
+
// Compilation directory name.
size_t CompilationDirectoryIndex = 0;
@@ -540,6 +543,9 @@ class LVScopeCompileUnit final : public LVScope {
ProducerIndex = getStringPool().getIndex(ProducerName);
}
+ LVSourceLanguage getSourceLanguage() const override { return SourceLanguage; }
+ void setSourceLanguage(LVSourceLanguage SL) override { SourceLanguage = SL; }
+
void setCPUType(codeview::CPUType Type) { CompilationCPUType = Type; }
codeview::CPUType getCPUType() { return CompilationCPUType; }
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
index c6fb405baed1d..47aca07b7327e 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/LogicalView/Core/LVElement.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/LogicalView/Core/LVReader.h"
#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
@@ -19,6 +20,21 @@ using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::logicalview;
+StringRef LVSourceLanguage::getName() const {
+ if (!isValid())
+ return {};
+ switch (Language.index()) {
+ case 1: // DWARF
+ return llvm::dwarf::LanguageString(
+ std::get<llvm::dwarf::SourceLanguage>(Language));
+ case 2: // CodeView
+ static auto LangNames = llvm::codeview::getSourceLanguageNames();
+ return LangNames[std::get<llvm::codeview::SourceLanguage>(Language)].Name;
+ default:
+ llvm_unreachable("Unsupported language");
+ }
+}
+
#define DEBUG_TYPE "Element"
LVElementDispatch LVElement::Dispatch = {
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
index 8bbaf93db0caa..ae585567c9de9 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
@@ -1707,11 +1707,17 @@ void LVScopeCompileUnit::print(raw_ostream &OS, bool Full) const {
void LVScopeCompileUnit::printExtra(raw_ostream &OS, bool Full) const {
OS << formattedKind(kind()) << " '" << getName() << "'\n";
- if (options().getPrintFormatting() && options().getAttributeProducer())
+ if (options().getPrintFormatting() && options().getAttributeProducer()) {
printAttributes(OS, Full, "{Producer} ",
const_cast<LVScopeCompileUnit *>(this), getProducer(),
/*UseQuotes=*/true,
/*PrintRef=*/false);
+ if (auto SL = getSourceLanguage(); SL.isValid())
+ printAttributes(OS, Full, "{Language} ",
+ const_cast<LVScopeCompileUnit *>(this), SL.getName(),
+ /*UseQuotes=*/true,
+ /*PrintRef=*/false);
+ }
// Reset file index, to allow its children to print the correct filename.
options().resetFilenameIndex();
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
index 97214948d014a..3359cb8751923 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
@@ -947,8 +947,11 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
// The name of the CU, was extracted from the 'BuildInfo' subsection.
Reader->setCompileUnitCPUType(Compile2.Machine);
Scope->setName(CurrentObjectName);
- if (options().getAttributeProducer())
+ if (options().getAttributeProducer()) {
Scope->setProducer(Compile2.Version);
+ Scope->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::codeview::SourceLanguage>(Compile2.getLanguage())});
+ }
getReader().isSystemEntry(Scope, CurrentObjectName);
// The line records in CodeView are recorded per Module ID. Update
@@ -992,8 +995,11 @@ Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
// The name of the CU, was extracted from the 'BuildInfo' subsection.
Reader->setCompileUnitCPUType(Compile3.Machine);
Scope->setName(CurrentObjectName);
- if (options().getAttributeProducer())
+ if (options().getAttributeProducer()) {
Scope->setProducer(Compile3.Version);
+ Scope->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::codeview::SourceLanguage>(Compile3.getLanguage())});
+ }
getReader().isSystemEntry(Scope, CurrentObjectName);
// The line records in CodeView are recorded per Module ID. Update
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
index 42da957233667..e5bdd1ba614d6 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
@@ -383,6 +383,11 @@ void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
if (options().getAttributeProducer())
CurrentElement->setProducer(dwarf::toStringRef(FormValue));
break;
+ case dwarf::DW_AT_language:
+ if (options().getAttributeProducer())
+ CurrentElement->setSourceLanguage(LVSourceLanguage{
+ static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
+ break;
case dwarf::DW_AT_upper_bound:
CurrentElement->setUpperBound(GetBoundValue(FormValue));
break;
diff --git a/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp b/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
index c93a79094dce9..c6bdda782a17f 100644
--- a/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
@@ -78,6 +78,11 @@ void checkElementPropertiesClangCodeview(LVReader *Reader) {
EXPECT_EQ(CompileUnit->getBaseAddress(), 0u);
EXPECT_TRUE(CompileUnit->getProducer().starts_with("clang"));
EXPECT_EQ(CompileUnit->getName(), "test.cpp");
+ LVSourceLanguage Language = CompileUnit->getSourceLanguage();
+ EXPECT_TRUE(Language.isValid());
+ ASSERT_EQ(Language.getAs<llvm::codeview::SourceLanguage>(),
+ llvm::codeview::SourceLanguage::Cpp);
+ ASSERT_EQ(Language.getName(), "Cpp");
EXPECT_EQ(Function->lineCount(), 16u);
EXPECT_EQ(Function->scopeCount(), 1u);
diff --git a/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp b/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
index c062c15481da9..72d4bc1c7b7e5 100644
--- a/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
+++ b/llvm/unittests/DebugInfo/LogicalView/DWARFReaderTest.cpp
@@ -72,6 +72,11 @@ void checkElementProperties(LVReader *Reader) {
EXPECT_EQ(CompileUnit->getBaseAddress(), 0u);
EXPECT_TRUE(CompileUnit->getProducer().starts_with("clang"));
EXPECT_EQ(CompileUnit->getName(), "test.cpp");
+ LVSourceLanguage Language = CompileUnit->getSourceLanguage();
+ EXPECT_TRUE(Language.isValid());
+ EXPECT_EQ(Language.getAs<llvm::dwarf::SourceLanguage>(),
+ llvm::dwarf::DW_LANG_C_plus_plus_14);
+ EXPECT_EQ(Language.getName(), "DW_LANG_C_plus_plus_14");
EXPECT_EQ(CompileUnit->lineCount(), 0u);
EXPECT_EQ(CompileUnit->scopeCount(), 1u);
|
af226a0
to
e75238a
Compare
5dfcb95
to
2e59c39
Compare
@jalopezg-git Thanks for doing this. What I would suggest is to include the command line option may be |
llvm::codeview::SourceLanguage> | ||
Language; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May be explore the use of StringPool
to store the Language
string.
Basically, the setSourceLanguage
function to use the logic from StringRef LVSourceLanguage::getName()
to get the string and store it in the StringPool
. And to get back the string in 'getSourceLanguage' query the StringPool
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds promising, and a bit simpler, given the existing infrastructure.
However, in the current state of this PR, we could write a switch
statement on the returned value of getSourceLanguage.getAs<...>()
; this particular use case would become a bit harder to support if using strings as the underlying representation.
Initially, I thought this could be supported by exposing the LanguageIndex
data member (suggested below), but this is not reliable, as it depends on the interning order.
We could still go for StringPool
+ LanguageIndex
at the cost of having to do string comparisons somewhere else (I would prefer to avoid this).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought about this a bit and came to the conclusion that we could, either
- (1) Leave as-is, using
std::variant<Ts...>
, perhaps with some additional improvement. - (2) Make
LVSourceLanguage
essentially anenum
which contains enumerators for every language supported by any of the readers. We could use the most-significant bits to store the format that defines such language (e.g. DWARF, CodeView, etc.). This may be simple, as we can do something similar to
static constexpr unsigned LVTagDwarf = (0x01 << 16);
static constexpr unsigned LVTagCodeView = (0x02 << 16);
enum LVSourceLanguage : uint32_t {
/* DWARF */
#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \
DW_LANG_##NAME = LVTagDwarf | ID,
#include "llvm/BinaryFormat/Dwarf.def"
/* Codeview */
...
};
Then, the SourceLanguage becomes just a uint32_t
. Essentially, this tries to represent the same as above, but without using std::variant<Ts...>
, and possibly being more space-efficient.
- (3) Relying on
StringPool
, as you suggested. Then, it may be a bit uncomfortable fordebuginfologicalview
library users to do something conditionally on the source language of a compile unit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW, I tend to like (2) more, so if you also agree, I could switch to this option for the next revision.
@@ -407,6 +407,9 @@ class LVScopeCompileUnit final : public LVScope { | |||
// Toolchain producer. | |||
size_t ProducerIndex = 0; | |||
|
|||
// Source language. | |||
LVSourceLanguage SourceLanguage{}; | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May be just: size_t LanguageIndex = 0;
@@ -540,6 +543,9 @@ class LVScopeCompileUnit final : public LVScope { | |||
ProducerIndex = getStringPool().getIndex(ProducerName); | |||
} | |||
|
|||
LVSourceLanguage getSourceLanguage() const override { return SourceLanguage; } | |||
void setSourceLanguage(LVSourceLanguage SL) override { SourceLanguage = SL; } | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Get/Set the language string from/into the StringPool
.
llvm_unreachable("Unsupported language"); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic can be used to store the language string
in the StringPool
.
Thanks for the review, @CarlosAlbertoEnciso 👍! See my concern above about going for |
5e54a5c
to
b669e4f
Compare
Done too 👍; only missing discussion on whether to transition to |
✅ With the latest revision this PR passed the C/C++ code formatter. |
5ab70b8
to
ecbb30e
Compare
ecbb30e
to
975eb58
Compare
This pull request adds support for parsing the source language in both DWARF and CodeView. Specifically,
The
LVSourceLanguage
class is introduced to represent any supported language by any of the debug info representations.Update
LVDWARFReader.cpp
andLVCodeViewVisitor.cpp
to parse the source language where it applies. Similar to producing compiler,getAttributeProducer()
currently controls whether this information is being filled-in / printed.An additional option could be registered for this, but I deemed it unneeded (at least for now).
FYI, @CarlosAlbertoEnciso. I believe this patch is ready too; feel free to start reviewing it.