-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[StaticDataLayout][PGO]Implement reader and writer change for data access profiles #139997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[StaticDataLayout][PGO]Implement reader and writer change for data access profiles #139997
Conversation
Co-authored-by: Kazu Hirata <kazu@google.com>
7587864
to
d15ae3e
Compare
d15ae3e
to
4045c94
Compare
@llvm/pr-subscribers-pgo Author: Mingming Liu (mingmingl-llvm) Changes#138170 introduces classes to operate on data access profiles. This change supports the read and write of For indexed format:
Patch is 26.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139997.diff 14 Files Affected:
diff --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h
index e8504102238d1..f5f6abf0a2817 100644
--- a/llvm/include/llvm/ProfileData/DataAccessProf.h
+++ b/llvm/include/llvm/ProfileData/DataAccessProf.h
@@ -41,6 +41,8 @@ namespace data_access_prof {
struct SourceLocation {
SourceLocation(StringRef FileNameRef, uint32_t Line)
: FileName(FileNameRef.str()), Line(Line) {}
+
+ SourceLocation() {}
/// The filename where the data is located.
std::string FileName;
/// The line number in the source code.
@@ -53,6 +55,8 @@ namespace internal {
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
// to represent data locations internally.
struct SourceLocationRef {
+ SourceLocationRef(StringRef FileNameRef, uint32_t Line)
+ : FileName(FileNameRef), Line(Line) {}
// The filename where the data is located.
StringRef FileName;
// The line number in the source code.
@@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
/// The data access profiles for a symbol.
struct DataAccessProfRecord {
public:
- DataAccessProfRecord(SymbolHandleRef SymHandleRef,
- ArrayRef<internal::SourceLocationRef> LocRefs) {
+ DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
+ ArrayRef<internal::SourceLocationRef> LocRefs)
+ : AccessCount(AccessCount) {
if (std::holds_alternative<StringRef>(SymHandleRef)) {
SymHandle = std::get<StringRef>(SymHandleRef).str();
} else
@@ -110,8 +115,9 @@ struct DataAccessProfRecord {
for (auto Loc : LocRefs)
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
}
+ DataAccessProfRecord() {}
SymbolHandle SymHandle;
-
+ uint64_t AccessCount;
// The locations of data in the source code. Optional.
SmallVector<SourceLocation> Locations;
};
diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index 3c6c329d1c49d..66fa38472059b 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -10,14 +10,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
+#include <functional>
+#include <optional>
+
namespace llvm {
// Write the MemProf data to OS.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema);
+Error writeMemProf(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData);
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index c250a9ede39bc..a3436e1dfe711 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/MemProf.h"
@@ -703,10 +704,13 @@ class IndexedMemProfReader {
const unsigned char *CallStackBase = nullptr;
// The number of elements in the radix tree array.
unsigned RadixTreeSize = 0;
+ /// The data access profiles, deserialized from binary data.
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
Error deserializeRadixTreeBased(const unsigned char *Start,
- const unsigned char *Ptr);
+ const unsigned char *Ptr,
+ memprof::IndexedVersion Version);
public:
IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 67d85daa81623..cf1cec25c3cac 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
// Whether to generated random memprof hotness for testing.
bool MemprofGenerateRandomHotness;
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
+
public:
// For memprof testing, random hotness can be assigned to the contexts if
// MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
+ void addDataAccessProfData(
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile);
+
/// Merge existing function counts from the given writer.
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn);
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 29d9e57cae3e3..02defa189ea7e 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -228,6 +228,21 @@ class YAMLMemProfReader final : public MemProfReader {
create(std::unique_ptr<MemoryBuffer> Buffer);
void parse(StringRef YAMLData);
+
+ std::unique_ptr<data_access_prof::DataAccessProfData>
+ takeDataAccessProfData() {
+ return std::move(DataAccessProfileData);
+ }
+
+private:
+ // Called by `parse` to set data access profiles after parsing them from Yaml
+ // files.
+ void setDataAccessProfileData(
+ std::unique_ptr<data_access_prof::DataAccessProfData> Data) {
+ DataAccessProfileData = std::move(Data);
+ }
+
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
};
} // namespace memprof
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index 08dee253f615a..634edc4aa0122 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -2,6 +2,7 @@
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
@@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
MemProfRecord Record;
};
+// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct
+// members use owned strings. This is for simplicity and assumes that most real
+// world use cases do look-ups and regression test scale is small.
+struct YamlDataAccessProfData {
+ std::vector<data_access_prof::DataAccessProfRecord> Records;
+ std::vector<uint64_t> KnownColdHashes;
+ std::vector<std::string> KnownColdSymbols;
+
+ bool isEmpty() const {
+ return Records.empty() && KnownColdHashes.empty() &&
+ KnownColdSymbols.empty();
+ }
+};
+
// The top-level data structure, only used with YAML for now.
struct AllMemProfData {
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+ YamlDataAccessProfData YamlifiedDataAccessProfiles;
};
} // namespace memprof
@@ -206,9 +222,49 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
}
};
+template <> struct MappingTraits<data_access_prof::SourceLocation> {
+ static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) {
+ Io.mapOptional("FileName", Loc.FileName);
+ Io.mapOptional("Line", Loc.Line);
+ }
+};
+
+template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> {
+ static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) {
+ if (Io.outputting()) {
+ if (std::holds_alternative<std::string>(Rec.SymHandle)) {
+ Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
+ } else {
+ Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
+ }
+ } else {
+ std::string SymName;
+ uint64_t Hash = 0;
+ Io.mapOptional("Symbol", SymName);
+ Io.mapOptional("Hash", Hash);
+ if (!SymName.empty()) {
+ Rec.SymHandle = SymName;
+ } else {
+ Rec.SymHandle = Hash;
+ }
+ }
+
+ Io.mapOptional("Locations", Rec.Locations);
+ }
+};
+
+template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
+ static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
+ Io.mapOptional("SampledRecords", Data.Records);
+ Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
+ Io.mapOptional("KnownColdHashes", Data.KnownColdHashes);
+ }
+};
+
template <> struct MappingTraits<memprof::AllMemProfData> {
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+ Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
}
};
@@ -234,5 +290,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation)
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp
index c5d0099977cfa..61a73fab7269f 100644
--- a/llvm/lib/ProfileData/DataAccessProf.cpp
+++ b/llvm/lib/ProfileData/DataAccessProf.cpp
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
auto It = Records.find(Key);
if (It != Records.end()) {
- return DataAccessProfRecord(Key, It->second.Locations);
+ return DataAccessProfRecord(Key, It->second.AccessCount,
+ It->second.Locations);
}
return std::nullopt;
@@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples(
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
if (!CanonicalName)
return CanonicalName.takeError();
- KnownColdSymbols.insert(*CanonicalName);
+ KnownColdSymbols.insert(
+ saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
return Error::success();
}
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 3d20f7a7a5778..cc1b03101c880 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProf.h"
@@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS,
static Error writeMemProfRadixTreeBased(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion Version, bool MemProfFullSchema) {
+ memprof::IndexedVersion Version, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
"Unsupported version for radix tree format");
@@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased(
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
OS.write(0ULL); // Reserve space for the memprof record table offset.
+ if (Version == memprof::Version4)
+ OS.write(0ULL); // Reserve space for the data access profile offset.
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
@@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased(
uint64_t RecordTableOffset = writeMemProfRecords(
OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
+ uint64_t DataAccessProfOffset = 0;
+ if (DataAccessProfileData.has_value()) {
+ DataAccessProfOffset = OS.tell();
+ if (Error E = (*DataAccessProfileData).get().serialize(OS))
+ return E;
+ }
+
// Verify that the computation for the number of elements in the call stack
// array works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
- uint64_t Header[] = {
+ SmallVector<uint64_t, 4> Header = {
CallStackPayloadOffset,
RecordPayloadOffset,
RecordTableOffset,
};
+ if (Version == memprof::Version4)
+ Header.push_back(DataAccessProfOffset);
OS.patch({{HeaderUpdatePos, Header}});
return Error::success();
@@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
- MemProfFullSchema);
+ MemProfFullSchema, std::nullopt);
}
// Write out MemProf Version4
-static Error writeMemProfV4(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
+static Error writeMemProfV4(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
- MemProfFullSchema);
+ MemProfFullSchema, DataAccessProfileData);
}
// Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
+Error writeMemProf(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
case memprof::Version4:
- return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
+ return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
+ DataAccessProfileData);
}
return make_error<InstrProfError>(
@@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
}
Error IndexedMemProfReader::deserializeRadixTreeBased(
- const unsigned char *Start, const unsigned char *Ptr) {
+ const unsigned char *Start, const unsigned char *Ptr,
+ memprof::IndexedVersion Version) {
+ assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+ "Unsupported version for radix tree format");
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
@@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
const uint64_t RecordTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ uint64_t DataAccessProfOffset = 0;
+ if (Version == memprof::Version4)
+ DataAccessProfOffset =
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
@@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
/*Payload=*/Start + RecordPayloadOffset,
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
+ if (DataAccessProfOffset > RecordTableOffset) {
+ DataAccessProfileData =
+ std::make_unique<data_access_prof::DataAccessProfData>();
+ const unsigned char *DAPPtr = Start + DataAccessProfOffset;
+ if (Error E = DataAccessProfileData->deserialize(DAPPtr))
+ return E;
+ }
+
return Error::success();
}
@@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
case memprof::Version3:
case memprof::Version4:
// V3 and V4 share the same high-level structure (radix tree, linear IDs).
- if (Error E = deserializeRadixTreeBased(Start, Ptr))
+ if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
return E;
break;
}
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index e6c83430cd8e9..78aba992fcd65 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1551,6 +1551,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const {
Pair.Record = std::move(*Record);
AllMemProfData.HeapProfileRecords.push_back(std::move(Pair));
}
+ // Populate the data access profiles for yaml output.
+ if (DataAccessProfileData != nullptr) {
+ for (const auto &[SymHandleRef, RecordRef] :
+ DataAccessProfileData->getRecords())
+ AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
+ data_access_prof::DataAccessProfRecord(
+ SymHandleRef, RecordRef.AccessCount, RecordRef.Locations));
+ for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
+ AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
+ ColdSymbol.str());
+ for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
+ AllMemProfData.YamlifiedDataAccessProfiles.KnownColdHashes.push_back(
+ Hash);
+ }
return AllMemProfData;
}
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2759346935b14..b0012ccc4f4bf 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/IndexedMemProfData.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
@@ -29,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <ctime>
+#include <functional>
#include <memory>
#include <string>
#include <tuple>
@@ -152,9 +154,7 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
InfoObj->ValueProfDataEndianness = Endianness;
}
-void InstrProfWriter::setOutputSparse(bool Sparse) {
- this->Sparse = Sparse;
-}
+void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; }
void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
function_ref<void(Error)> Warn) {
@@ -329,6 +329,12 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
llvm::append_range(BinaryIds, BIs);
}
+void InstrProfWriter::addDataAccessProfData(
+ std::unique_ptr<data_access_prof::DataAccessProfData>
+ DataAccessProfDataIn) {
+ DataAccessProfileData = std::move(DataAccessProfDataIn);
+}
+
void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
asser...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just skimmed for now, couple of quick questions regarding the versioning below.
@@ -35,4 +35,15 @@ HeapProfileRecords: | |||
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true } | |||
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false } | |||
CalleeGuids: [ 0x300 ] | |||
DataAccessProfiles: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test only writes v4. Can there be a test that this is not written if the earlier output version is requested?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes sense. Added a test file memprof-in-no-dap.yaml
which catches a minor issue that yaml output shouldn't have DataAccessProfiles: {}
when the struct is empty.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test the non-empty yaml file with --memprof-version=3, expect an error?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similar to the text format instr profile or sample profiles, the text format doesn't support prior versions with or without this change.
Is the idea of the check backward compatibility or something else?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really imporant. --memprof-version is used for backward compatibility. When version3 is used on yaml file with data profile section, some kind of error is expected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new test is good too, but what I meant was a test that if the version given to llvm-profdata merge is <4 that we don't get the DAP output.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ping on this comment. Can you test that llvm-profdata merge with version set to 3, where the input has the dap profiles, doesn't write out the dap records? That's important for compatibility with existing tooling. Otherwise the patch lgtm
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you test that llvm-profdata merge with version set to 3, where the input has the dap profiles, doesn't write out the dap records?
Sure, testing backward compatibility definitely makes sense. Done now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks for reviews! PTAL.
@@ -35,4 +35,15 @@ HeapProfileRecords: | |||
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true } | |||
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false } | |||
CalleeGuids: [ 0x300 ] | |||
DataAccessProfiles: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes sense. Added a test file memprof-in-no-dap.yaml
which catches a minor issue that yaml output shouldn't have DataAccessProfiles: {}
when the struct is empty.
@@ -35,4 +35,15 @@ HeapProfileRecords: | |||
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true } | |||
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false } | |||
CalleeGuids: [ 0x300 ] | |||
DataAccessProfiles: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test the non-empty yaml file with --memprof-version=3, expect an error?
@@ -390,6 +419,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( | |||
/*Payload=*/Start + RecordPayloadOffset, | |||
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); | |||
|
|||
if (DataAccessProfOffset > RecordTableOffset) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it possible to have an non-zero DataAccessProfOffset that is <= RecordTableOffset? Perhaps assert (!DataAccessProfOffset || ... > RecordTableOffset), and use if (DataAccessProfOffset !=0) as the guard.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea to tighten the check resonates with me.
Maybe I can record both DAP offset and DAP length in the memprof header, and handle empty input in the deserialize
method like the diff shows. How does this sound?
-Error DataAccessProfData::deserialize(const unsigned char *&Ptr) {
+Error DataAccessProfData::deserialize(const unsigned char *&Ptr, uint64_t Len) {
+ if (Len == 0)
+ return Error::success();
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realized checking the length of data access profile payload before creating the class or calling its 'deserialize' method will do it, and there is no need to update the signature of DataAccessProfData::deserialize
. Since it's towards the end of the working week and the change is ~20 lines , I took the liberty to go ahead :)
I'm open to make the length field more general (e.g., recording the length of the total memprof profile will make it useful when there are new payloads after data access profiles) and would like to hear thoughts on it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really follow the benefit of adding the length for this particular section? Not completely against it, but we don't seem to have the length for the other sections. And it isn't clear to me that guarding the deserialization by the length is inherently better than comparing the offsets. I do think that some assertion checking could be added in either case though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Un-do the length changes and added the assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) && "message")
for simplicity.
Recording and checking length makes the code a little more straightforward and readable, but the implementation is a little more complex. I don't feel strong about it.
@@ -35,4 +35,15 @@ HeapProfileRecords: | |||
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true } | |||
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false } | |||
CalleeGuids: [ 0x300 ] | |||
DataAccessProfiles: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really imporant. --memprof-version is used for backward compatibility. When version3 is used on yaml file with data profile section, some kind of error is expected.
Tagging @SharonXSharon @ellishg . My understanding of memory profile guided data symbols ordering (https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744/4) is that iPGHO binaries produces the profiles in raw MemProf format. Could you share more about how the profile representation (and/or) iPGHO change look like in raw MemProf (e.g., a minimal draft PR or whatever makes the sharing easier), and how the profile is used by compiler? This PR adds data access profiles in indexed MemProf format, and compiler consumes it by reading indexed MemProf out of an indexed FDO profile. Raw memprof remains unchanged. How would it interact with the iPGHO-based data symbol ordering work on your side? |
Hi Mingming,
W.r.t usage of the profiles, as a first step, we translate (map back) the addresses in the raw profile to symbol names (we use 8 byte as the memblock granularity to have a better, more precise mapping) and then pass the symbol names as a order file to the linker, i.e. lld takes -order_file. Note that for cstrings, we pass the hashes of the cstring literal in the order file, I am upstreaming that part in #140307 . Our goal is mainly to improve mobile app startup performance by reducing page faults. In fact, for the purpose of upstreaming the binary access profile we implemented, do you and @teresajohnson have any suggestions on how to integrate the binary access profile above into the existing raw profile format? Or is it actually better to make it separate as we are doing? |
Thanks for the details and pointer to the lld patch @SharonXSharon. Some questions about the raw format changes --
I prefer evolving the raw format to support binary accesses instead of keeping them separate. I don't think the differences are significant enough to keep them separate. |
Not really, for our mobile apps, we improve the app startup performance by reducing the major page faults during the startup, so the access count doesn't really matter, we only need hot/cold split, so we can group the hot together in the binary layout to reduce unnecessary page faults.
The
I'd be happy to integrate in the existing raw format, do you have any suggestions how can we best integrate? |
Would you be able to share a draft patch of the current implementation? It would help me come up with concrete suggestions on how to proceed and reduce unneccessary churn. |
@@ -110,8 +115,9 @@ struct DataAccessProfRecord { | |||
for (auto Loc : LocRefs) | |||
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Emplace back to avoid a copy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -41,6 +41,8 @@ namespace data_access_prof { | |||
struct SourceLocation { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Re: line +38]
Should we change this to memprof? Reasoning is that this is closely tied to the memprof profile data format and we don't intend to use it elsewhere. Wdyt?
See this comment inline on Graphite.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this makes sense. Renamed namespace in this patch and I'm open to split renames to a separate patch before this one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine with keeping the rename in this patch.
It may take a bit of effort to construct a clean version of our patch, maybe I can just throw a raw draft despite of 100% correctness to help our discussion? does that work? |
Yes of course. It doesn't even have to compile, just enough for me to get an idea of the changes. Thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks for reviews! PTAL.
@@ -41,6 +41,8 @@ namespace data_access_prof { | |||
struct SourceLocation { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this makes sense. Renamed namespace in this patch and I'm open to split renames to a separate patch before this one.
@@ -110,8 +115,9 @@ struct DataAccessProfRecord { | |||
for (auto Loc : LocRefs) | |||
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with some minor suggestions.
@@ -41,6 +41,8 @@ namespace data_access_prof { | |||
struct SourceLocation { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine with keeping the rename in this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
@@ -7,6 +7,12 @@ | |||
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml | |||
; RUN: diff -b %t/memprof-in.yaml %t/memprof-out.yaml | |||
|
|||
; Merge text profile as v3 binary profile. Test that the merged v3 profile | |||
; are identical to memprof-in-v3.ymal, and doesn't have callee guids or dap. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/ymal/yaml/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
… data access profiles" (#141157) Reverts #139997 Sanitizer failures (https://lab.llvm.org/buildbot/#/builders/94/builds/7373) Will fix forward later.
… change for data access profiles" (#141157) Reverts llvm/llvm-project#139997 Sanitizer failures (https://lab.llvm.org/buildbot/#/builders/94/builds/7373) Will fix forward later.
#138170 introduces classes to operate on data access profiles. This change supports the read and write of
DataAccessProfData
in indexed format of MemProf (v4) as well as its the text (yaml) format.For indexed format:
std::unique_ptr<DataAccessProfData>
) the data access profiles, and gives a non-owned copy when it callswriteMemProf
.uint64_t
to record the byte offset of data access profiles. Thisuint64_t
field is zero if data access profile is not set (nullptr).DataAccessProfData
.For textual format:
099a0fa (by @snehasish) introduces v4 which contains CalleeGuids in CallSiteInfo, and this change changes the v4 format in place with data access profiles. The current plan is to bump the version and enable v4 profiles with both features, assuming waiting for this change won't delay the callsite change too long.