-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[GOFF] Add writing of section symbols #133799
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-mc Author: Kai Nacke (redstar) ChangesThe GOFF format uses symbol definitions to represent sections and symbols. Introducing a section can require up to 3 symbol definitions. However, most of these details are not needed by the AsmPrinter. To mapped from a section (a MCSectionGOFF) to the symbol definitions, a new class called MCGOFFSymbolMapper is used. The same information can also be used by the assembly output, which justifies this centralized approach. Writing the mapped symbols is then straight forward. Patch is 35.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133799.diff 8 Files Affected:
diff --git a/llvm/include/llvm/BinaryFormat/GOFF.h b/llvm/include/llvm/BinaryFormat/GOFF.h
index 443bcfc9479a8..43d80e0c247e9 100644
--- a/llvm/include/llvm/BinaryFormat/GOFF.h
+++ b/llvm/include/llvm/BinaryFormat/GOFF.h
@@ -169,6 +169,91 @@ enum SubsectionKind : uint8_t {
SK_PPA1 = 2,
SK_PPA2 = 4,
};
+
+// The standard System/390 convention is to name the high-order (leftmost) bit
+// in a byte as bit zero. The Flags type helps to set bits in byte according
+// to this numeration order.
+class Flags {
+ uint8_t Val;
+
+ constexpr static uint8_t bits(uint8_t BitIndex, uint8_t Length, uint8_t Value,
+ uint8_t OldValue) {
+ uint8_t Pos = 8 - BitIndex - Length;
+ uint8_t Mask = ((1 << Length) - 1) << Pos;
+ Value = Value << Pos;
+ return (OldValue & ~Mask) | Value;
+ }
+
+public:
+ constexpr Flags() : Val(0) {}
+ constexpr Flags(uint8_t BitIndex, uint8_t Length, uint8_t Value)
+ : Val(bits(BitIndex, Length, Value, 0)) {}
+
+ template <typename T>
+ constexpr void set(uint8_t BitIndex, uint8_t Length, T NewValue) {
+ Val = bits(BitIndex, Length, static_cast<uint8_t>(NewValue), Val);
+ }
+
+ template <typename T>
+ constexpr T get(uint8_t BitIndex, uint8_t Length) const {
+ return static_cast<T>((Val >> (8 - BitIndex - Length)) &
+ ((1 << Length) - 1));
+ }
+
+ constexpr operator uint8_t() const { return Val; }
+};
+
+// Structure for the flag field of a symbol. See
+// https://www.ibm.com/docs/en/zos/3.1.0?topic=formats-external-symbol-definition-record,
+// offset 41, for the definition.
+struct SymbolFlags {
+ Flags SymFlags;
+
+#define GOFF_SYMBOL_FLAG(NAME, TYPE, BITINDEX, LENGTH) \
+ void set##NAME(TYPE Val) { SymFlags.set<TYPE>(BITINDEX, LENGTH, Val); } \
+ TYPE get##NAME() const { return SymFlags.get<TYPE>(BITINDEX, LENGTH); }
+
+ GOFF_SYMBOL_FLAG(FillBytePresence, bool, 0, 1)
+ GOFF_SYMBOL_FLAG(Mangled, bool, 1, 1)
+ GOFF_SYMBOL_FLAG(Renameable, bool, 2, 1)
+ GOFF_SYMBOL_FLAG(RemovableClass, bool, 3, 1)
+ GOFF_SYMBOL_FLAG(ReservedQwords, ESDReserveQwords, 5, 3)
+
+#undef GOFF_SYMBOL_FLAG
+
+constexpr operator uint8_t() const { return static_cast<uint8_t>(SymFlags); }
+};
+
+// Structure for the behavioral attributes. See
+// https://www.ibm.com/docs/en/zos/3.1.0?topic=record-external-symbol-definition-behavioral-attributes
+// for the definition.
+struct BehavioralAttributes {
+ Flags Attr[10];
+
+#define GOFF_BEHAVIORAL_ATTRIBUTE(NAME, TYPE, ATTRIDX, BITINDEX, LENGTH) \
+ void set##NAME(TYPE Val) { Attr[ATTRIDX].set<TYPE>(BITINDEX, LENGTH, Val); } \
+ TYPE get##NAME() const { return Attr[ATTRIDX].get<TYPE>(BITINDEX, LENGTH); }
+
+ GOFF_BEHAVIORAL_ATTRIBUTE(Amode, GOFF::ESDAmode, 0, 0, 8)
+ GOFF_BEHAVIORAL_ATTRIBUTE(Rmode, GOFF::ESDRmode, 1, 0, 8)
+ GOFF_BEHAVIORAL_ATTRIBUTE(TextStyle, GOFF::ESDTextStyle, 2, 0, 4)
+ GOFF_BEHAVIORAL_ATTRIBUTE(BindingAlgorithm, GOFF::ESDBindingAlgorithm, 2, 4,
+ 4)
+ GOFF_BEHAVIORAL_ATTRIBUTE(TaskingBehavior, GOFF::ESDTaskingBehavior, 3, 0, 3)
+ GOFF_BEHAVIORAL_ATTRIBUTE(ReadOnly, bool, 3, 4, 1)
+ GOFF_BEHAVIORAL_ATTRIBUTE(Executable, GOFF::ESDExecutable, 3, 5, 3)
+ GOFF_BEHAVIORAL_ATTRIBUTE(DuplicateSymbolSeverity,
+ GOFF::ESDDuplicateSymbolSeverity, 4, 2, 2)
+ GOFF_BEHAVIORAL_ATTRIBUTE(BindingStrength, GOFF::ESDBindingStrength, 4, 4, 4)
+ GOFF_BEHAVIORAL_ATTRIBUTE(LoadingBehavior, GOFF::ESDLoadingBehavior, 5, 0, 2)
+ GOFF_BEHAVIORAL_ATTRIBUTE(COMMON, bool, 5, 2, 1)
+ GOFF_BEHAVIORAL_ATTRIBUTE(IndirectReference, bool, 5, 3, 1)
+ GOFF_BEHAVIORAL_ATTRIBUTE(BindingScope, GOFF::ESDBindingScope, 5, 4, 4)
+ GOFF_BEHAVIORAL_ATTRIBUTE(LinkageType, GOFF::ESDLinkageType, 6, 2, 1)
+ GOFF_BEHAVIORAL_ATTRIBUTE(Alignment, GOFF::ESDAlignment, 6, 3, 5)
+
+#undef GOFF_BEHAVIORAL_ATTRIBUTE
+};
} // end namespace GOFF
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCGOFFSymbolMapper.h b/llvm/include/llvm/MC/MCGOFFSymbolMapper.h
new file mode 100644
index 0000000000000..dbdc1408dab2f
--- /dev/null
+++ b/llvm/include/llvm/MC/MCGOFFSymbolMapper.h
@@ -0,0 +1,148 @@
+//===- MCGOFFSymbolMapper.h - Maps MC section/symbol to GOFF symbols ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Maps a section or a symbol to the GOFF symbols it is composed of, and their
+// attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCGOFFSYMBOLMAPPER_H
+#define LLVM_MC_MCGOFFSYMBOLMAPPER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/Support/Alignment.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+class MCAssembler;
+class MCContext;
+class MCSectionGOFF;
+
+// An "External Symbol Definition" in the GOFF file has a type, and depending on
+// the type a different subset of the fields is used.
+//
+// Unlike other formats, a 2 dimensional structure is used to define the
+// location of data. For example, the equivalent of the ELF .text section is
+// made up of a Section Definition (SD) and a class (Element Definition; ED).
+// The name of the SD symbol depends on the application, while the class has the
+// predefined name C_CODE64.
+//
+// Data can be placed into this structure in 2 ways. First, the data (in a text
+// record) can be associated with an ED symbol. To refer to data, a Label
+// Definition (LD) is used to give an offset into the data a name. When binding,
+// the whole data is pulled into the resulting executable, and the addresses
+// given by the LD symbols are resolved.
+//
+// The alternative is to use a Part Defiition (PR). In this case, the data (in a
+// text record) is associated with the part. When binding, only the data of
+// referenced PRs is pulled into the resulting binary.
+//
+// Both approaches are used, which means that the equivalent of a section in ELF
+// results in 3 GOFF symbol, either SD/ED/LD or SD/ED/PR. Moreover, certain
+// sections are fine with just defining SD/ED symbols. The SymbolMapper takes
+// care of all those details.
+
+// Attributes for SD symbols.
+struct SDAttr {
+ GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified;
+ GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for ED symbols.
+struct EDAttr {
+ bool IsReadOnly = false;
+ GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+ GOFF::ESDAmode Amode;
+ GOFF::ESDRmode Rmode;
+ GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+ GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented;
+ GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate;
+ GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial;
+ GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0;
+ GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword;
+};
+
+// Attributes for LD symbols.
+struct LDAttr {
+ bool IsRenamable = false;
+ GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+ GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+ GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong;
+ GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink;
+ GOFF::ESDAmode Amode;
+ GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for PR symbols.
+struct PRAttr {
+ bool IsRenamable = false;
+ bool IsReadOnly = false; // ???? Not documented.
+ GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+ GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+ GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink;
+ GOFF::ESDAmode Amode;
+ GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+ GOFF::ESDDuplicateSymbolSeverity DuplicateSymbolSeverity =
+ GOFF::ESD_DSS_NoWarning;
+ GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Byte;
+ uint32_t SortKey = 0;
+};
+
+struct GOFFSectionData {
+ // Name and attributes of SD symbol.
+ StringRef SDName;
+ SDAttr SDAttributes;
+
+ // Name and attributes of ED symbol.
+ StringRef EDName;
+ EDAttr EDAttributes;
+
+ // Name and attributes of LD or PR symbol.
+ StringRef LDorPRName;
+ LDAttr LDAttributes;
+ PRAttr PRAttributes;
+
+ // Indicates if there is a LD or PR symbol.
+ enum { None, LD, PR } Tag;
+
+ // Indicates if the SD symbol is to root symbol (aka the Csect Code).
+ bool IsSDRootSD;
+};
+
+class GOFFSymbolMapper {
+ MCContext &Ctx;
+
+ std::string RootSDName;
+ SDAttr RootSDAttributes;
+
+ std::string ADALDName;
+
+ StringRef BaseName;
+
+ bool IsCsectCodeNameEmpty;
+ bool Is64Bit;
+ bool UsesXPLINK;
+
+public:
+ GOFFSymbolMapper(MCContext &Ctx);
+ GOFFSymbolMapper(MCAssembler &Asm);
+
+ // Required order: .text first, then .ada.
+ std::pair<GOFFSectionData, bool> getSection(const MCSectionGOFF &Section);
+
+ void setBaseName();
+ void determineRootSD(StringRef CSectCodeName);
+ llvm::StringRef getRootSDName() const;
+ const SDAttr &getRootSD() const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index f49f14c848b90..967ec73a2be5b 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_component_library(LLVMMC
MCExpr.cpp
MCFragment.cpp
MCGOFFStreamer.cpp
+ MCGOFFSymbolMapper.cpp
MCInst.cpp
MCInstPrinter.cpp
MCInstrAnalysis.cpp
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index efaf5ff006ddc..92603c6fb1002 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -13,7 +13,11 @@
#include "llvm/BinaryFormat/GOFF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCGOFFObjectWriter.h"
+#include "llvm/MC/MCGOFFSymbolMapper.h"
+#include "llvm/MC/MCSectionGOFF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ConvertEBCDIC.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
@@ -23,44 +27,13 @@ using namespace llvm;
#define DEBUG_TYPE "goff-writer"
namespace {
-
-// The standard System/390 convention is to name the high-order (leftmost) bit
-// in a byte as bit zero. The Flags type helps to set bits in a byte according
-// to this numeration order.
-class Flags {
- uint8_t Val;
-
- constexpr static uint8_t bits(uint8_t BitIndex, uint8_t Length, uint8_t Value,
- uint8_t OldValue) {
- assert(BitIndex < 8 && "Bit index out of bounds!");
- assert(Length + BitIndex <= 8 && "Bit length too long!");
-
- uint8_t Mask = ((1 << Length) - 1) << (8 - BitIndex - Length);
- Value = Value << (8 - BitIndex - Length);
- assert((Value & Mask) == Value && "Bits set outside of range!");
-
- return (OldValue & ~Mask) | Value;
- }
-
-public:
- constexpr Flags() : Val(0) {}
- constexpr Flags(uint8_t BitIndex, uint8_t Length, uint8_t Value)
- : Val(bits(BitIndex, Length, Value, 0)) {}
-
- void set(uint8_t BitIndex, uint8_t Length, uint8_t Value) {
- Val = bits(BitIndex, Length, Value, Val);
- }
-
- constexpr operator uint8_t() const { return Val; }
-};
-
// Common flag values on records.
// Flag: This record is continued.
-constexpr uint8_t RecContinued = Flags(7, 1, 1);
+constexpr uint8_t RecContinued = GOFF::Flags(7, 1, 1);
// Flag: This record is a continuation.
-constexpr uint8_t RecContinuation = Flags(6, 1, 1);
+constexpr uint8_t RecContinuation = GOFF::Flags(6, 1, 1);
// The GOFFOstream is responsible to write the data into the fixed physical
// records of the format. A user of this class announces the begin of a new
@@ -223,13 +196,113 @@ void GOFFOstream::finalizeRecord() {
}
namespace {
+// A GOFFSymbol holds all the data required for writing an ESD record.
+class GOFFSymbol {
+public:
+ std::string Name;
+ uint32_t EsdId;
+ uint32_t ParentEsdId;
+ uint64_t Offset = 0; // Offset of the symbol into the section. LD only.
+ // Offset is only 32 bit, the larger type is used to
+ // enable error checking.
+ GOFF::ESDSymbolType SymbolType;
+ GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_ProgramManagementBinder;
+
+ GOFF::BehavioralAttributes BehavAttrs;
+ GOFF::SymbolFlags SymbolFlags;
+ uint32_t SortKey = 0;
+ uint32_t SectionLength = 0;
+ uint32_t ADAEsdId = 0;
+ uint32_t EASectionEDEsdId = 0;
+ uint32_t EASectionOffset = 0;
+ uint8_t FillByteValue = 0;
+
+ GOFFSymbol() : EsdId(0), ParentEsdId(0) {}
+
+ GOFFSymbol(StringRef Name, uint32_t EsdID, const SDAttr &Attr)
+ : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(0),
+ SymbolType(GOFF::ESD_ST_SectionDefinition) {
+ BehavAttrs.setTaskingBehavior(Attr.TaskingBehavior);
+ BehavAttrs.setBindingScope(Attr.BindingScope);
+ }
+
+ GOFFSymbol(StringRef Name, uint32_t EsdID, uint32_t ParentEsdID,
+ const EDAttr &Attr)
+ : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(ParentEsdID),
+ SymbolType(GOFF::ESD_ST_ElementDefinition) {
+ this->NameSpace = Attr.NameSpace;
+ // TODO Do we need/should set the "mangled" flag?
+ SymbolFlags.setFillBytePresence(1);
+ SymbolFlags.setReservedQwords(Attr.ReservedQwords);
+ BehavAttrs.setReadOnly(Attr.IsReadOnly);
+ BehavAttrs.setExecutable(Attr.Executable);
+ BehavAttrs.setAmode(Attr.Amode);
+ BehavAttrs.setRmode(Attr.Rmode);
+ BehavAttrs.setTextStyle(Attr.TextStyle);
+ BehavAttrs.setBindingAlgorithm(Attr.BindAlgorithm);
+ BehavAttrs.setLoadingBehavior(Attr.LoadBehavior);
+ BehavAttrs.setAlignment(Attr.Alignment);
+ }
+
+ GOFFSymbol(StringRef Name, uint32_t EsdID, uint32_t ParentEsdID,
+ const LDAttr &Attr)
+ : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(ParentEsdID),
+ SymbolType(GOFF::ESD_ST_LabelDefinition) {
+ this->NameSpace = Attr.NameSpace;
+ SymbolFlags.setRenameable(Attr.IsRenamable);
+ BehavAttrs.setExecutable(Attr.Executable);
+ BehavAttrs.setBindingStrength(Attr.BindingStrength);
+ BehavAttrs.setLinkageType(Attr.Linkage);
+ BehavAttrs.setAmode(Attr.Amode);
+ BehavAttrs.setBindingScope(Attr.BindingScope);
+ }
+
+ GOFFSymbol(StringRef Name, uint32_t EsdID, uint32_t ParentEsdID,
+ const PRAttr &Attr)
+ : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(ParentEsdID),
+ SymbolType(GOFF::ESD_ST_PartReference) {
+ this->NameSpace = Attr.NameSpace;
+ SymbolFlags.setRenameable(Attr.IsRenamable);
+ BehavAttrs.setExecutable(Attr.Executable);
+ BehavAttrs.setAlignment(Attr.Alignment);
+ BehavAttrs.setAmode(Attr.Amode);
+ BehavAttrs.setLinkageType(Attr.Linkage);
+ BehavAttrs.setBindingScope(Attr.BindingScope);
+ BehavAttrs.setDuplicateSymbolSeverity(Attr.DuplicateSymbolSeverity);
+ BehavAttrs.setReadOnly(Attr.IsReadOnly);
+ }
+};
+
class GOFFWriter {
GOFFOstream OS;
[[maybe_unused]] MCAssembler &Asm;
+ /// Mapping from MCSectionGOFF/MCSymbolGOFF to GOFF symbols and attributes.
+ GOFFSymbolMapper SymbolMapper;
+
+ /// Counter for symbol id's.
+ uint32_t EsdIdCounter = 0;
+
+ /// Id's of some special symbols.
+ uint32_t RootSDEsdId = 0;
+ uint32_t ADAEsdId = 0;
+
void writeHeader();
+ void writeSymbol(const GOFFSymbol &Symbol);
void writeEnd();
+ GOFFSymbol createGOFFSymbol(StringRef Name, const SDAttr &Attr);
+ GOFFSymbol createGOFFSymbol(StringRef Name, const EDAttr &Attr,
+ uint32_t ParentEsdId);
+ GOFFSymbol createGOFFSymbol(StringRef Name, const LDAttr &Attr,
+ uint32_t ParentEsdId);
+ GOFFSymbol createGOFFSymbol(StringRef Name, const PRAttr &Attr,
+ uint32_t ParentEsdId);
+
+ void defineRootSymbol(const MCSectionGOFF *Text);
+ void defineSectionSymbols(const MCSectionGOFF &Section);
+ void defineSymbols();
+
public:
GOFFWriter(raw_pwrite_stream &OS, MCAssembler &Asm);
uint64_t writeObject();
@@ -237,7 +310,108 @@ class GOFFWriter {
} // namespace
GOFFWriter::GOFFWriter(raw_pwrite_stream &OS, MCAssembler &Asm)
- : OS(OS), Asm(Asm) {}
+ : OS(OS), Asm(Asm), SymbolMapper(Asm) {}
+
+GOFFSymbol GOFFWriter::createGOFFSymbol(StringRef Name, const SDAttr &Attr) {
+ return GOFFSymbol(Name, ++EsdIdCounter, Attr);
+}
+
+GOFFSymbol GOFFWriter::createGOFFSymbol(StringRef Name, const EDAttr &Attr,
+ uint32_t ParentEsdId) {
+ return GOFFSymbol(Name, ++EsdIdCounter, ParentEsdId, Attr);
+}
+
+GOFFSymbol GOFFWriter::createGOFFSymbol(StringRef Name, const LDAttr &Attr,
+ uint32_t ParentEsdId) {
+ return GOFFSymbol(Name, ++EsdIdCounter, ParentEsdId, Attr);
+}
+
+GOFFSymbol GOFFWriter::createGOFFSymbol(StringRef Name, const PRAttr &Attr,
+ uint32_t ParentEsdId) {
+ return GOFFSymbol(Name, ++EsdIdCounter, ParentEsdId, Attr);
+}
+
+void GOFFWriter::defineRootSymbol(const MCSectionGOFF *Text) {
+ // There is always a text section except for DWARF unit tests.
+ SymbolMapper.determineRootSD("");
+ GOFFSymbol RootSD =
+ createGOFFSymbol(SymbolMapper.getRootSDName(), SymbolMapper.getRootSD());
+ writeSymbol(RootSD);
+ RootSDEsdId = RootSD.EsdId;
+}
+
+void GOFFWriter::defineSectionSymbols(const MCSectionGOFF &Section) {
+ auto [GOFFSectionData, Found] = SymbolMapper.getSection(Section);
+ if (Found) {
+ uint32_t SDEsdId = RootSDEsdId;
+ if (!GOFFSectionData.IsSDRootSD) {
+ GOFFSymbol SD = createGOFFSymbol(GOFFSectionData.SDName,
+ GOFFSectionData.SDAttributes);
+ SDEsdId = SD.EsdId;
+ writeSymbol(SD);
+ }
+
+ GOFFSymbol ED = createGOFFSymbol(GOFFSectionData.EDName,
+ GOFFSectionData.EDAttributes, SDEsdId);
+ if (GOFFSectionData.Tag == GOFFSectionData::None ||
+ GOFFSectionData.Tag == GOFFSectionData::LD) {
+ ED.SectionLength = Asm.getSectionAddressSize(Section);
+ }
+ writeSymbol(ED);
+
+ if (GOFFSectionData.Tag == GOFFSectionData::LD) {
+ GOFFSymbol LD = createGOFFSymbol(GOFFSectionData.LDorPRName,
+ GOFFSectionData.LDAttributes, ED.EsdId);
+ if (Section.isText())
+ LD.ADAEsdId = ADAEsdId;
+ writeSymbol(LD);
+ }
+
+ if (GOFFSectionData.Tag == GOFFSectionData::PR) {
+ GOFFSymbol PR = createGOFFSymbol(GOFFSectionData.LDorPRName,
+ GOFFSectionData.PRAttributes, ED.EsdId);
+ PR.SectionLength = Asm.getSectionAddressSize(Section);
+ if (Section.getName() == ".ada") {
+ // We cannot have a zero-length section for data. If we do,
+ // artificially inflate it. Use 2 bytes to avoid odd alignments. Note:
+ // if this is ever changed, you will need to update the code in
+ // SystemZAsmPrinter::emitCEEMAIN and SystemZAsmPrinter::emitCELQMAIN to
+ // generate -1 if there is no ADA
+ if (!PR.SectionLength)
+ PR.SectionLength = 2;
+ ADAEsdId = PR.EsdId;
+ }
+ writeSymbol(PR);
+ }
+ return;
+ }
+ // TODO It is possible to get here. This will be handled later.
+}
+
+void GOFFWriter::defineSymbols() {
+ // Search for .text and .ada sections. These should be the first sections in
+ // the list, so the loop should be cheap.
+ MCSectionGOFF *Text = nullptr;
+ MCSectionGOFF *ADA = nullptr;
+ for (MCSection &S : Asm) {
+ if (S.getName() == ".text")
+ Text = &cast<MCSectionGOFF>(S);
+ if (S.getName() == ".ada")
+ ADA = &cast<MCSectionGOFF>(S);
+ }
+ defineRootSymbol(Text);
+ if (ADA)
+ defineSectionSymbols(*ADA);
+ if (Text)
+ defineSectionSymbols(*Text);
+
+ // Process the other sections.
+ for (MCSection &S : Asm) {
+ auto &Section = cast<MCSectionGOFF>(S);
+ if (Text != &Section && ADA != &Section)
+ defineSectionSymbols(Section);
+ }
+}
void GOFFWriter::writeHeader() {
OS.newRecord(GOFF::RT_HDR);
@@ -253,6 +427,45 @@ void GOFFWriter::writeHeader() {
OS.write_zeros(6); // Reserved
}
+void GOFFWriter::writeSymbol(const GOFFSymbol &Symbol) {
+ if (Symbol.Offset >= (((...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a general comment, I'm not really sure why we need the whole SymbolMapper
indirection. The various flags for the sections seem basically hard-coded. Couldn't we just require callers of getGOFFSection
to pass in the necessary attributes, and then move the whole SymbolMapper
logic to initGOFFMCObjectFileInfo
and just have it use the appropriate attributes to begin with?
That was actually my first approach. There are a couple of challenges:
The name is either derived from the file name, or defined by a pragma or a command line option and passed as meta data in the module. When the sections are initialized in
The sections are stored in a hash map in the context. Of the 2 or 3 symbol names, there is not a single one which is unique, which means that a synthetic key like "SD name/ED name/PR name" needs to be constructed. Since the root SD is shared in many cases, and not available when the section is instantiated, it adds another special case.
If a section is only used once in the AsmPrinter, then there is no need to cache the reference in
However, the situation for As a result, I found it more convenient to have all GOFF specific attributes defined in a single class, and map the ELF-style sections to the actual GOFF symbols later. However, if you think it is better/more streamlined to move everything into
The root SD name and the binding scope would then be updated in |
I implemented the suggestion from @uweigand. The GOFF attributes are set directly at the |
Thanks, I actually like this better. Looking at the current state, I'm wondering if this could be simplified even further: couldn't we simply allocate separate MCSectionGOFF objects for each of the SD / ED / PR (and possibly LD, but that's maybe really rather a label instead of a section?) symbols? Then each of those could have its own name (there'd probably still have to be a synthesized name to avoid dupliation?) and attributes, and then a "parent" link to identify the containing section. During emission, we could follow those links to ensure the proper numbering and ordering. Or even simpler, we might be able to assign the ID numbers already in the MCSectionGOFF, similar to how the UniqueID is handled for MCSectionELF. Finally, this would allow representing the ADA link explicitly, without having to impose a particular policy in the low-level writer. Does this make sense or am I missing something here? |
I think that splitting the SD/ED/LD into 3 "section"s implies that a MCSectionGOFF has a fundamentally different semantic than the other MCSectionXXX. This is something I would like to avoid. On the other hand, the SD/ED pair is almost the same as an ELF section, so just putting those 2 into a MCSectionGOFF instance and handling the LD/PR symbol differently makes sense. In HLASM, a section definition looks like
When I switch sections, and later want to continue the code section, then I need to repeat both statements in a short form:
I thought about this when moving the definitions from the SymbolMapper to the MCObjectFileInfo class, but I got no good idea how to handle the LD or PR symbol. A possible way could be to attach the LD or PR symbol as the begin symbol of a section. I need to try this. One possible downside is that it makes handling of relocations much more complicated. For the base of a relocation, I need the PR symbol (when I have the SD/ED/PR case) or the LD and ED (in the SD/ED/LD case). Currently, this information is available in the MCSectionGOFF, without the need to chase a list to the correct element. |
Thinking a bit more about this, it looks to me that we should treat SD/ED/PR on the one hand differently from LD (and ER) on the other. The former identify a range of address space and may hold contents of those ranges in the form of text records; the latter identify a single address (and hold no content of their own). From that perspective, the former correspond to the "section" concept, while the latter correspond to the "symbol" concept. Now, among the section types SD/ED/PR, GOFF is a bit special in that those are nested - this is somewhat similar to the subsection concept, but it is explicit in the object file format (as opposed to, say, ELF subsections). It seems to me that modelling that nested section concept explicitly by creating a separate MCSectionGOFF for each of SD, ED, and PR, and linking them as appropriate via a In fact, considering that at some point we want to be able to implement a general HLASM AsmParser, which would require handling any allowed combination of CSECT with multiple CATTR, we should not merge SD and ED into a single section. (Also, by having them separately, we no longer need special treatment of the "root" SD in the writer.) Finally, having separate MCSession structures for each ESD record may allow using the MCSession::Ordinal field as the ESD ID, which matches its purpose for other object file formats, and which would allow easy resolution of parent (and ADA) section pointers to ESD IDs in the writer. The LD record, on the other hand, clearly should not get a MCSectionGOFF. Rather, it would make sense for this to be represented as a MCSymbolGOFF. Specifically, this symbol really represents the implicit section start symbol (which ELF also has!); so it should probably best be emitted not from the section table but from the symbol table. (MCSection already has a Attributes associated with the LD record should likewise come from the MCSymbolGOFF. This would include the ADA section, which means that association no longer needs to be hard-coded in the writer, but can instead set up by codegen as appropriate when defining symbols. (E.g. this would also allow handling arbitrary user-provided XATTR PSECT attributes in an HLASM AsmParser.) |
Yes, that is correct.
I try to implement this. Well, first I'll fix the failing test cases.... |
I refactored the code in the suggested way. Some notes:
|
Can't we just set the ordinal to the correct value to begin with? ELF sets the ordinals while writing out the sections; I think we might as well do the same. (The point being that if we have already ensured we write sections in the correct order, then we will only ever need to refer to ordinals of sections that have already been written.) Then we'd be back to a single counter used by the writer for all GOFF symbol records, those that come from sections as well as those that come from symbols. |
At first I was a bit confused about this, because the |
struct EDAttr { | ||
bool IsReadOnly = false; | ||
GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; | ||
GOFF::ESDAmode Amode; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The whole Amode/Rmode handling is a bit confusing to me. As I understand the GOFF docs, Amode is supposed to be a symbol property (i.e. set on LD and ER records) while Rmode is supposed to be an element property (i.e. set on ED records). So it is unclear what an Amode property on ED or PR is supposed to do, exactly. There also doesn't seem to be a way to specify those with any HLASM command I can see.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to do a bit of research here. The Amode at the ED symbol acts as a default when no Amode at the LD/ER is present. The Amode at the PR seems to be not necessary. However, I need to check if this results in binder errors if I remove this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No binding problems, so it seems sage to make this change. That said, amblist shows the Amode on PR and ED symbols.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I removed this. However, it feels like that there is an inconsistency in the documentation / implementations. The following HLASM code
stdin#C CSECT
C_WSA64 CATTR ALIGN(4),DEFLOAD,NOTEXECUTABLE,PART(a),RMODE(64)
DC 0X
END
results in RMODE(64)
set at the ED symbol, and AMODE(64)
set at the PR symbol.
Setting the Amode on a PR symbol makes sense to me because it is not possible to add a LD symbol to the part - this is causing binder errors. To reference the part from a different compilation unit, I have to use a
, thus the PR has also some symbol semantics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to double-check: your current code does not set Amode on the PR symbol. Is is necessary to do this or not?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To confirm, setting the Amode is not necessary on PR symbols.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, thanks for checking. Right now Amode is nowhere emitted to asm output at all anymore, but I guess that's because there is no asm output for LD symbols (function labels). I assume this will be added later?
bool IsRenamable = false; | ||
GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; | ||
GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; | ||
GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For LD (as opposed to ER), it seems "strong" is the only allowed value here. Again, if this is true, it doesn't make much sense to specify it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is actually needed for C++ inline functions etc. which can end up in several object files. Also for weak definitions, e.g.:
// a.c
#include <stdio.h>
__attribute__((weak)) void fun() {
printf("Weak fun\n");
}
void feature() {
fun();
}
and
// b.c
#include <stdio.h>
extern void feature();
void fun() {
printf("Other fun\n");
}
int main(int argc, char *argv[]) {
feature();
return 0;
}
(example taken from a blog by @MaskRay)
Another case were the documentation needs an update.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. This is not currently reflected in the HLASM output, however. How would one do this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I need to ask for this. I see only WXTRN for weak externals in the HLASM documentation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I got the hint that WXTRN may work for definitions, too. However, I still need to verify this. In any case, this needs to be handled when the function label is emitted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like above, right now there doesn't appear to be any code to emit LD symbols to asm output.
Also updates the test to match ideas, to make it easier to move those blocks around.
- Add sorting key - Change generation of Rmode/Amode
Also fixes the alignment for data.
The value would serve as default if not specified on LD or PR. However, we can make sure that it is always specified on LD and PR, s not need to have it on ED.
There were too many zeroes in the address.
88d4051
to
c2d0449
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/18070 Here is the relevant piece of the build log for the reference
|
Unlike other formats, the GOFF object file format uses a 2 dimensional structure to define the location of data. For example, the equivalent of the ELF .text section is made up of a Section Definition (SD) and a class (Element Definition; ED). The name of the SD symbol depends on the application, while the class has the predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. Data can be placed into this structure in 2 ways. First, the data (in a text record) can be associated with an ED symbol. To refer to data, a Label Definition (LD) is used to give an offset into the data a name. When binding, the whole data is pulled into the resulting executable, and the addresses given by the LD symbols are resolved. The alternative is to use a Part Definition (PR). In this case, the data (in a text record) is associated with the part. When binding, only the data of referenced PRs is pulled into the resulting binary. Both approaches are used. SD, ED, and PR elements are modeled by nested MCSectionGOFF instances, while LD elements are associated with MCSymbolGOFF instances. At the binary level, a record called "External Symbol Definition" (ESD) is used. The ESD has a type (SD, ED, PR, LD), and depending on the type a different subset of the fields is used.
Unlike other formats, the GOFF object file format uses a 2 dimensional structure to define the location of data. For example, the equivalent of the ELF .text section is made up of a Section Definition (SD) and a class (Element Definition; ED). The name of the SD symbol depends on the application, while the class has the predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. Data can be placed into this structure in 2 ways. First, the data (in a text record) can be associated with an ED symbol. To refer to data, a Label Definition (LD) is used to give an offset into the data a name. When binding, the whole data is pulled into the resulting executable, and the addresses given by the LD symbols are resolved. The alternative is to use a Part Definition (PR). In this case, the data (in a text record) is associated with the part. When binding, only the data of referenced PRs is pulled into the resulting binary. Both approaches are used. SD, ED, and PR elements are modeled by nested MCSectionGOFF instances, while LD elements are associated with MCSymbolGOFF instances. At the binary level, a record called "External Symbol Definition" (ESD) is used. The ESD has a type (SD, ED, PR, LD), and depending on the type a different subset of the fields is used.
The GOFF format uses symbol definitions to represent sections and symbols. Introducing a section can require up to 3 symbol definitions. However, most of these details are not needed by the AsmPrinter. To mapped from a section (a MCSectionGOFF) to the symbol definitions, a new class called MCGOFFSymbolMapper is used. The same information can also be used by the assembly output, which justifies this centralized approach. Writing the mapped symbols is then straight forward.