Skip to content

[StaticDataLayout][PGO]Implement reader and writer change for data access profiles #139997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions llvm/include/llvm/ProfileData/DataAccessProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
#ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
#define LLVM_PROFILEDATA_DATAACCESSPROF_H_

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfoVariant.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
Expand All @@ -35,12 +33,15 @@

namespace llvm {

namespace data_access_prof {
namespace memprof {

/// The location of data in the source code. Used by profile lookup API.
struct SourceLocation {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Re: line +38]

Should we change this to memprof? Reasoning is that this is closely tied to the memprof profile data format and we don't intend to use it elsewhere. Wdyt?

See this comment inline on Graphite.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes sense. Renamed namespace in this patch and I'm open to split renames to a separate patch before this one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with keeping the rename in this patch.

SourceLocation(StringRef FileNameRef, uint32_t Line)
: FileName(FileNameRef.str()), Line(Line) {}

// Empty constructor is used in yaml conversion.
SourceLocation() {}
/// The filename where the data is located.
std::string FileName;
/// The line number in the source code.
Expand All @@ -53,6 +54,8 @@ namespace internal {
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
// to represent data locations internally.
struct SourceLocationRef {
SourceLocationRef(StringRef FileNameRef, uint32_t Line)
: FileName(FileNameRef), Line(Line) {}
// The filename where the data is located.
StringRef FileName;
// The line number in the source code.
Expand Down Expand Up @@ -100,18 +103,21 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
/// The data access profiles for a symbol.
struct DataAccessProfRecord {
public:
DataAccessProfRecord(SymbolHandleRef SymHandleRef,
ArrayRef<internal::SourceLocationRef> LocRefs) {
DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
ArrayRef<internal::SourceLocationRef> LocRefs)
: AccessCount(AccessCount) {
if (std::holds_alternative<StringRef>(SymHandleRef)) {
SymHandle = std::get<StringRef>(SymHandleRef).str();
} else
SymHandle = std::get<uint64_t>(SymHandleRef);

for (auto Loc : LocRefs)
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
Locations.emplace_back(Loc.FileName, Loc.Line);
}
// Empty constructor is used in yaml conversion.
DataAccessProfRecord() {}
SymbolHandle SymHandle;

uint64_t AccessCount;
// The locations of data in the source code. Optional.
SmallVector<SourceLocation> Locations;
};
Expand Down Expand Up @@ -208,7 +214,7 @@ class DataAccessProfData {
llvm::SetVector<StringRef> KnownColdSymbols;
};

} // namespace data_access_prof
} // namespace memprof
} // namespace llvm

#endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_
12 changes: 9 additions & 3 deletions llvm/include/llvm/ProfileData/IndexedMemProfData.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
#ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
#define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H

#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"

#include <functional>
#include <optional>

namespace llvm {
namespace memprof {
struct IndexedMemProfData {
Expand Down Expand Up @@ -82,8 +86,10 @@ struct IndexedMemProfData {
} // namespace memprof

// Write the MemProf data to OS.
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested,
bool MemProfFullSchema);
Error writeMemProf(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData);

} // namespace llvm
#endif
6 changes: 5 additions & 1 deletion llvm/include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/MemProf.h"
Expand Down Expand Up @@ -703,10 +704,13 @@ class IndexedMemProfReader {
const unsigned char *CallStackBase = nullptr;
// The number of elements in the radix tree array.
unsigned RadixTreeSize = 0;
/// The data access profiles, deserialized from binary data.
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;

Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
Error deserializeRadixTreeBased(const unsigned char *Start,
const unsigned char *Ptr);
const unsigned char *Ptr,
memprof::IndexedVersion Version);

public:
IndexedMemProfReader() = default;
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/IndexedMemProfData.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Error.h"
Expand Down Expand Up @@ -81,6 +82,8 @@ class InstrProfWriter {
// Whether to generated random memprof hotness for testing.
bool MemprofGenerateRandomHotness;

std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;

public:
// For memprof testing, random hotness can be assigned to the contexts if
// MemprofGenerateRandomHotness is enabled. The random seed can be either
Expand Down Expand Up @@ -122,6 +125,9 @@ class InstrProfWriter {
// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);

void addDataAccessProfData(
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfile);

/// Merge existing function counts from the given writer.
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn);
Expand Down
14 changes: 14 additions & 0 deletions llvm/include/llvm/ProfileData/MemProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,20 @@ class YAMLMemProfReader final : public MemProfReader {
create(std::unique_ptr<MemoryBuffer> Buffer);

void parse(StringRef YAMLData);

std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() {
return std::move(DataAccessProfileData);
}

private:
// Called by `parse` to set data access profiles after parsing them from Yaml
// files.
void
setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) {
DataAccessProfileData = std::move(Data);
}

std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
};
} // namespace memprof
} // namespace llvm
Expand Down
61 changes: 61 additions & 0 deletions llvm/include/llvm/ProfileData/MemProfYAML.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define LLVM_PROFILEDATA_MEMPROFYAML_H_

#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
Expand All @@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
MemProfRecord Record;
};

// Helper struct to yamlify memprof::DataAccessProfData. The struct
// members use owned strings. This is for simplicity and assumes that most real
// world use cases do look-ups and regression test scale is small.
struct YamlDataAccessProfData {
std::vector<memprof::DataAccessProfRecord> Records;
std::vector<uint64_t> KnownColdStrHashes;
std::vector<std::string> KnownColdSymbols;

bool isEmpty() const {
return Records.empty() && KnownColdStrHashes.empty() &&
KnownColdSymbols.empty();
}
};

// The top-level data structure, only used with YAML for now.
struct AllMemProfData {
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
YamlDataAccessProfData YamlifiedDataAccessProfiles;
};
} // namespace memprof

Expand Down Expand Up @@ -206,9 +222,52 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
}
};

template <> struct MappingTraits<memprof::SourceLocation> {
static void mapping(IO &Io, memprof::SourceLocation &Loc) {
Io.mapOptional("FileName", Loc.FileName);
Io.mapOptional("Line", Loc.Line);
}
};

template <> struct MappingTraits<memprof::DataAccessProfRecord> {
static void mapping(IO &Io, memprof::DataAccessProfRecord &Rec) {
if (Io.outputting()) {
if (std::holds_alternative<std::string>(Rec.SymHandle)) {
Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
} else {
Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
}
} else {
std::string SymName;
uint64_t Hash = 0;
Io.mapOptional("Symbol", SymName);
Io.mapOptional("Hash", Hash);
if (!SymName.empty()) {
Rec.SymHandle = SymName;
} else {
Rec.SymHandle = Hash;
}
}

Io.mapOptional("Locations", Rec.Locations);
}
};

template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
Io.mapOptional("SampledRecords", Data.Records);
Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
Io.mapOptional("KnownColdStrHashes", Data.KnownColdStrHashes);
}
};

template <> struct MappingTraits<memprof::AllMemProfData> {
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
// Map data access profiles if reading input, or if writing output &&
// the struct is populated.
if (!Io.outputting() || !Data.YamlifiedDataAccessProfiles.isEmpty())
Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
}
};

Expand All @@ -234,5 +293,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::DataAccessProfRecord)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SourceLocation)

#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
7 changes: 4 additions & 3 deletions llvm/lib/ProfileData/DataAccessProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <sys/types.h>

namespace llvm {
namespace data_access_prof {
namespace memprof {

// If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
// creates an owned copy of `Str`, adds a map entry for it and returns the
Expand Down Expand Up @@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {

auto It = Records.find(Key);
if (It != Records.end()) {
return DataAccessProfRecord(Key, It->second.Locations);
return DataAccessProfRecord(Key, It->second.AccessCount,
It->second.Locations);
}

return std::nullopt;
Expand Down Expand Up @@ -261,5 +262,5 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
}
return Error::success();
}
} // namespace data_access_prof
} // namespace memprof
} // namespace llvm
Loading