Skip to content

Commit a53b306

Browse files
authored
[NFC][MemProf] Move Radix tree methods to their own header and cpp. (#140501)
Part of a larger refactoring with the following goals 1. Reduce the size of MemProf.h 2. Avoid including ModuleSummaryIndex just for a couple of types
1 parent 7268c4e commit a53b306

File tree

11 files changed

+617
-573
lines changed

11 files changed

+617
-573
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 0 additions & 336 deletions
Original file line numberDiff line numberDiff line change
@@ -818,133 +818,6 @@ class CallStackLookupTrait {
818818
}
819819
};
820820

821-
namespace detail {
822-
// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
823-
// to do so in one of two different ways depending on the type of the hash
824-
// table.
825-
template <typename value_type, typename IterTy>
826-
value_type DerefIterator(IterTy Iter) {
827-
using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
828-
if constexpr (std::is_same_v<deref_type, value_type>)
829-
return *Iter;
830-
else
831-
return Iter->second;
832-
}
833-
} // namespace detail
834-
835-
// A function object that returns a frame for a given FrameId.
836-
template <typename MapTy> struct FrameIdConverter {
837-
std::optional<FrameId> LastUnmappedId;
838-
MapTy &Map;
839-
840-
FrameIdConverter() = delete;
841-
FrameIdConverter(MapTy &Map) : Map(Map) {}
842-
843-
// Delete the copy constructor and copy assignment operator to avoid a
844-
// situation where a copy of FrameIdConverter gets an error in LastUnmappedId
845-
// while the original instance doesn't.
846-
FrameIdConverter(const FrameIdConverter &) = delete;
847-
FrameIdConverter &operator=(const FrameIdConverter &) = delete;
848-
849-
Frame operator()(FrameId Id) {
850-
auto Iter = Map.find(Id);
851-
if (Iter == Map.end()) {
852-
LastUnmappedId = Id;
853-
return Frame();
854-
}
855-
return detail::DerefIterator<Frame>(Iter);
856-
}
857-
};
858-
859-
// A function object that returns a call stack for a given CallStackId.
860-
template <typename MapTy> struct CallStackIdConverter {
861-
std::optional<CallStackId> LastUnmappedId;
862-
MapTy &Map;
863-
llvm::function_ref<Frame(FrameId)> FrameIdToFrame;
864-
865-
CallStackIdConverter() = delete;
866-
CallStackIdConverter(MapTy &Map,
867-
llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
868-
: Map(Map), FrameIdToFrame(FrameIdToFrame) {}
869-
870-
// Delete the copy constructor and copy assignment operator to avoid a
871-
// situation where a copy of CallStackIdConverter gets an error in
872-
// LastUnmappedId while the original instance doesn't.
873-
CallStackIdConverter(const CallStackIdConverter &) = delete;
874-
CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
875-
876-
std::vector<Frame> operator()(CallStackId CSId) {
877-
std::vector<Frame> Frames;
878-
auto CSIter = Map.find(CSId);
879-
if (CSIter == Map.end()) {
880-
LastUnmappedId = CSId;
881-
} else {
882-
llvm::SmallVector<FrameId> CS =
883-
detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
884-
Frames.reserve(CS.size());
885-
for (FrameId Id : CS)
886-
Frames.push_back(FrameIdToFrame(Id));
887-
}
888-
return Frames;
889-
}
890-
};
891-
892-
// A function object that returns a Frame stored at a given index into the Frame
893-
// array in the profile.
894-
struct LinearFrameIdConverter {
895-
const unsigned char *FrameBase;
896-
897-
LinearFrameIdConverter() = delete;
898-
LinearFrameIdConverter(const unsigned char *FrameBase)
899-
: FrameBase(FrameBase) {}
900-
901-
Frame operator()(LinearFrameId LinearId) {
902-
uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
903-
return Frame::deserialize(FrameBase + Offset);
904-
}
905-
};
906-
907-
// A function object that returns a call stack stored at a given index into the
908-
// call stack array in the profile.
909-
struct LinearCallStackIdConverter {
910-
const unsigned char *CallStackBase;
911-
llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame;
912-
913-
LinearCallStackIdConverter() = delete;
914-
LinearCallStackIdConverter(
915-
const unsigned char *CallStackBase,
916-
llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame)
917-
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
918-
919-
std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
920-
std::vector<Frame> Frames;
921-
922-
const unsigned char *Ptr =
923-
CallStackBase +
924-
static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
925-
uint32_t NumFrames =
926-
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
927-
Frames.reserve(NumFrames);
928-
for (; NumFrames; --NumFrames) {
929-
LinearFrameId Elem =
930-
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
931-
// Follow a pointer to the parent, if any. See comments below on
932-
// CallStackRadixTreeBuilder for the description of the radix tree format.
933-
if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
934-
Ptr += (-Elem) * sizeof(LinearFrameId);
935-
Elem =
936-
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
937-
}
938-
// We shouldn't encounter another pointer.
939-
assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
940-
Frames.push_back(FrameIdToFrame(Elem));
941-
Ptr += sizeof(LinearFrameId);
942-
}
943-
944-
return Frames;
945-
}
946-
};
947-
948821
struct LineLocation {
949822
LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {}
950823

@@ -970,73 +843,6 @@ struct LineLocation {
970843
// A pair of a call site location and its corresponding callee GUID.
971844
using CallEdgeTy = std::pair<LineLocation, uint64_t>;
972845

973-
// Used to extract caller-callee pairs from the call stack array. The leaf
974-
// frame is assumed to call a heap allocation function with GUID 0. The
975-
// resulting pairs are accumulated in CallerCalleePairs. Users can take it
976-
// with:
977-
//
978-
// auto Pairs = std::move(Extractor.CallerCalleePairs);
979-
struct CallerCalleePairExtractor {
980-
// The base address of the radix tree array.
981-
const unsigned char *CallStackBase;
982-
// A functor to convert a linear FrameId to a Frame.
983-
llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame;
984-
// A map from caller GUIDs to lists of call sites in respective callers.
985-
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs;
986-
987-
// The set of linear call stack IDs that we've visited.
988-
BitVector Visited;
989-
990-
CallerCalleePairExtractor() = delete;
991-
CallerCalleePairExtractor(
992-
const unsigned char *CallStackBase,
993-
llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame,
994-
unsigned RadixTreeSize)
995-
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame),
996-
Visited(RadixTreeSize) {}
997-
998-
void operator()(LinearCallStackId LinearCSId) {
999-
const unsigned char *Ptr =
1000-
CallStackBase +
1001-
static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
1002-
uint32_t NumFrames =
1003-
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
1004-
// The leaf frame calls a function with GUID 0.
1005-
uint64_t CalleeGUID = 0;
1006-
for (; NumFrames; --NumFrames) {
1007-
LinearFrameId Elem =
1008-
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
1009-
// Follow a pointer to the parent, if any. See comments below on
1010-
// CallStackRadixTreeBuilder for the description of the radix tree format.
1011-
if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
1012-
Ptr += (-Elem) * sizeof(LinearFrameId);
1013-
Elem =
1014-
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
1015-
}
1016-
// We shouldn't encounter another pointer.
1017-
assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
1018-
1019-
// Add a new caller-callee pair.
1020-
Frame F = FrameIdToFrame(Elem);
1021-
uint64_t CallerGUID = F.Function;
1022-
LineLocation Loc(F.LineOffset, F.Column);
1023-
CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID);
1024-
1025-
// Keep track of the indices we've visited. If we've already visited the
1026-
// current one, terminate the traversal. We will not discover any new
1027-
// caller-callee pair by continuing the traversal.
1028-
unsigned Offset =
1029-
std::distance(CallStackBase, Ptr) / sizeof(LinearFrameId);
1030-
if (Visited.test(Offset))
1031-
break;
1032-
Visited.set(Offset);
1033-
1034-
Ptr += sizeof(LinearFrameId);
1035-
CalleeGUID = CallerGUID;
1036-
}
1037-
}
1038-
};
1039-
1040846
struct IndexedMemProfData {
1041847
// A map to hold memprof data per function. The lower 64 bits obtained from
1042848
// the md5 hash of the function name is used to index into the map.
@@ -1087,148 +893,6 @@ struct IndexedMemProfData {
1087893
// Compute a CallStackId for a given call stack.
1088894
CallStackId hashCallStack(ArrayRef<FrameId> CS) const;
1089895
};
1090-
1091-
// A convenience wrapper around FrameIdConverter and CallStackIdConverter for
1092-
// tests.
1093-
struct IndexedCallstackIdConverter {
1094-
IndexedCallstackIdConverter() = delete;
1095-
IndexedCallstackIdConverter(IndexedMemProfData &MemProfData)
1096-
: FrameIdConv(MemProfData.Frames),
1097-
CSIdConv(MemProfData.CallStacks, FrameIdConv) {}
1098-
1099-
// Delete the copy constructor and copy assignment operator to avoid a
1100-
// situation where a copy of IndexedCallstackIdConverter gets an error in
1101-
// LastUnmappedId while the original instance doesn't.
1102-
IndexedCallstackIdConverter(const IndexedCallstackIdConverter &) = delete;
1103-
IndexedCallstackIdConverter &
1104-
operator=(const IndexedCallstackIdConverter &) = delete;
1105-
1106-
std::vector<Frame> operator()(CallStackId CSId) { return CSIdConv(CSId); }
1107-
1108-
FrameIdConverter<decltype(IndexedMemProfData::Frames)> FrameIdConv;
1109-
CallStackIdConverter<decltype(IndexedMemProfData::CallStacks)> CSIdConv;
1110-
};
1111-
1112-
struct FrameStat {
1113-
// The number of occurrences of a given FrameId.
1114-
uint64_t Count = 0;
1115-
// The sum of indexes where a given FrameId shows up.
1116-
uint64_t PositionSum = 0;
1117-
};
1118-
1119-
// Compute a histogram of Frames in call stacks.
1120-
template <typename FrameIdTy>
1121-
llvm::DenseMap<FrameIdTy, FrameStat>
1122-
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
1123-
&MemProfCallStackData);
1124-
1125-
// Construct a radix tree of call stacks.
1126-
//
1127-
// A set of call stacks might look like:
1128-
//
1129-
// CallStackId 1: f1 -> f2 -> f3
1130-
// CallStackId 2: f1 -> f2 -> f4 -> f5
1131-
// CallStackId 3: f1 -> f2 -> f4 -> f6
1132-
// CallStackId 4: f7 -> f8 -> f9
1133-
//
1134-
// where each fn refers to a stack frame.
1135-
//
1136-
// Since we expect a lot of common prefixes, we can compress the call stacks
1137-
// into a radix tree like:
1138-
//
1139-
// CallStackId 1: f1 -> f2 -> f3
1140-
// |
1141-
// CallStackId 2: +---> f4 -> f5
1142-
// |
1143-
// CallStackId 3: +---> f6
1144-
//
1145-
// CallStackId 4: f7 -> f8 -> f9
1146-
//
1147-
// Now, we are interested in retrieving call stacks for a given CallStackId, so
1148-
// we just need a pointer from a given call stack to its parent. For example,
1149-
// CallStackId 2 would point to CallStackId 1 as a parent.
1150-
//
1151-
// We serialize the radix tree above into a single array along with the length
1152-
// of each call stack and pointers to the parent call stacks.
1153-
//
1154-
// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
1155-
// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
1156-
// ^ ^ ^ ^
1157-
// | | | |
1158-
// CallStackId 4: 0 --+ | | |
1159-
// CallStackId 3: 4 --------------+ | |
1160-
// CallStackId 2: 7 -----------------------+ |
1161-
// CallStackId 1: 11 -----------------------------------+
1162-
//
1163-
// - LN indicates the length of a call stack, encoded as ordinary integer N.
1164-
//
1165-
// - JN indicates a pointer to the parent, encoded as -N.
1166-
//
1167-
// The radix tree allows us to reconstruct call stacks in the leaf-to-root
1168-
// order as we scan the array from left ro right while following pointers to
1169-
// parents along the way.
1170-
//
1171-
// For example, if we are decoding CallStackId 2, we start a forward traversal
1172-
// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
1173-
// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
1174-
// picking up f2 and f1. We are done after collecting 4 frames as indicated at
1175-
// the beginning of the traversal.
1176-
//
1177-
// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
1178-
// the radix tree array, so we do not explicitly encode mappings like:
1179-
// "CallStackId 1 -> 11".
1180-
template <typename FrameIdTy> class CallStackRadixTreeBuilder {
1181-
// The radix tree array.
1182-
std::vector<LinearFrameId> RadixArray;
1183-
1184-
// Mapping from CallStackIds to indexes into RadixArray.
1185-
llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
1186-
1187-
// In build, we partition a given call stack into two parts -- the prefix
1188-
// that's common with the previously encoded call stack and the frames beyond
1189-
// the common prefix -- the unique portion. Then we want to find out where
1190-
// the common prefix is stored in RadixArray so that we can link the unique
1191-
// portion to the common prefix. Indexes, declared below, helps with our
1192-
// needs. Intuitively, Indexes tells us where each of the previously encoded
1193-
// call stack is stored in RadixArray. More formally, Indexes satisfies:
1194-
//
1195-
// RadixArray[Indexes[I]] == Prev[I]
1196-
//
1197-
// for every I, where Prev is the the call stack in the root-to-leaf order
1198-
// previously encoded by build. (Note that Prev, as passed to
1199-
// encodeCallStack, is in the leaf-to-root order.)
1200-
//
1201-
// For example, if the call stack being encoded shares 5 frames at the root of
1202-
// the call stack with the previously encoded call stack,
1203-
// RadixArray[Indexes[0]] is the root frame of the common prefix.
1204-
// RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
1205-
std::vector<LinearCallStackId> Indexes;
1206-
1207-
using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;
1208-
1209-
// Encode a call stack into RadixArray. Return the starting index within
1210-
// RadixArray.
1211-
LinearCallStackId encodeCallStack(
1212-
const llvm::SmallVector<FrameIdTy> *CallStack,
1213-
const llvm::SmallVector<FrameIdTy> *Prev,
1214-
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
1215-
1216-
public:
1217-
CallStackRadixTreeBuilder() = default;
1218-
1219-
// Build a radix tree array.
1220-
void
1221-
build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
1222-
&&MemProfCallStackData,
1223-
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
1224-
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
1225-
1226-
ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
1227-
1228-
llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
1229-
return std::move(CallStackPos);
1230-
}
1231-
};
1232896
} // namespace memprof
1233897
} // namespace llvm
1234898

0 commit comments

Comments
 (0)