Skip to content

[scudo] Make block storage in TransferBatch trailing objects #144204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions compiler-rt/lib/scudo/standalone/allocator_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,58 +14,57 @@

namespace scudo {

template <class SizeClassAllocator> struct TransferBatch {
template <class SizeClassAllocator> struct Batch {
typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;

static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
void setFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached);
DCHECK_LE(N, SizeClassAllocator::MaxNumBlocksInBatch);
Count = N;
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
memcpy(Blocks, Array, sizeof(Blocks[0]) * Count);
}
void appendFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
DCHECK_LE(N, SizeClassAllocator::MaxNumBlocksInBatch - Count);
memcpy(Blocks + Count, Array, sizeof(Blocks[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
}
void appendFromTransferBatch(TransferBatch *B, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
void appendFromBatch(Batch *B, u16 N) {
DCHECK_LE(N, SizeClassAllocator::MaxNumBlocksInBatch - Count);
DCHECK_GE(B->Count, N);
// Append from the back of `B`.
memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
memcpy(Blocks + Count, B->Blocks + (B->Count - N), sizeof(Blocks[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
B->Count = static_cast<u16>(B->Count - N);
}
void clear() { Count = 0; }
bool empty() { return Count == 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
Batch[Count++] = P;
DCHECK_LT(Count, SizeClassAllocator::MaxNumBlocksInBatch);
Blocks[Count++] = P;
}
void moveToArray(CompactPtrT *Array) {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
memcpy(Array, Blocks, sizeof(Blocks[0]) * Count);
clear();
}

void moveNToArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, Count);
memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * N);
memcpy(Array, Blocks + Count - N, sizeof(Blocks[0]) * N);
Count = static_cast<u16>(Count - N);
}
u16 getCount() const { return Count; }
bool isEmpty() const { return Count == 0U; }
CompactPtrT get(u16 I) const {
DCHECK_LE(I, Count);
return Batch[I];
return Blocks[I];
}
TransferBatch *Next;
Batch *Next;

private:
CompactPtrT Batch[MaxNumCached];
u16 Count;
CompactPtrT Blocks[];
};

// A BatchGroup is used to collect blocks. Each group has a group id to
Expand All @@ -78,9 +77,12 @@ template <class SizeClassAllocator> struct BatchGroup {
// This is used to track how many bytes are not in-use since last time we
// tried to release pages.
uptr BytesInBGAtLastCheckpoint;
// Blocks are managed by TransferBatch in a list.
SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches;
// Blocks are managed by Batch in a list.
SinglyLinkedList<Batch<SizeClassAllocator>> Batches;
// Cache value of SizeClassAllocatorLocalCache::getMaxCached()
// TODO(chiahungduan): Except BatchClass, every Batch stores the same number
// of blocks. As long as we make BatchClass follows this constraint, this
// field can be removed.
u16 MaxCachedPerBatch;
};

Expand Down
105 changes: 56 additions & 49 deletions compiler-rt/lib/scudo/standalone/primary32.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace scudo {
// predictable address pattern (the predictability increases with the block
// size).
//
// Regions for size class 0 are special and used to hold TransferBatches, which
// Regions for size class 0 are special and used to hold Batches, which
// allow to transfer arrays of pointers from the global size class freelist to
// the thread specific freelist for said class, and back.
//
Expand All @@ -56,12 +56,21 @@ template <typename Config> class SizeClassAllocator32 {
typename Conditional<Config::getEnableBlockCache(),
SizeClassAllocatorLocalCache<ThisT>,
SizeClassAllocatorNoCache<ThisT>>::type;
typedef TransferBatch<ThisT> TransferBatchT;
typedef Batch<ThisT> BatchT;
typedef BatchGroup<ThisT> BatchGroupT;
static const u16 MaxNumBlocksInBatch = SizeClassMap::MaxNumCachedHint;

static constexpr uptr getSizeOfBatchClass() {
const uptr HeaderSize = sizeof(BatchT);
return HeaderSize + sizeof(CompactPtrT) * MaxNumBlocksInBatch;
}

static_assert(sizeof(BatchGroupT) <= getSizeOfBatchClass(),
"BatchGroupT also uses BatchClass");

static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
? Max(sizeof(BatchGroupT), sizeof(TransferBatchT))
? getSizeOfBatchClass()
: SizeClassMap::getSizeByClassId(ClassId);
}

Expand Down Expand Up @@ -124,7 +133,7 @@ template <typename Config> class SizeClassAllocator32 {

// When all blocks are freed, it has to be the same size as `AllocatedUser`.
void verifyAllBlocksAreReleasedTestOnly() {
// `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
// `BatchGroup` and `Batch` also use the blocks from BatchClass.
uptr BatchClassUsedInFreeLists = 0;
for (uptr I = 0; I < NumClasses; I++) {
// We have to count BatchClassUsedInFreeLists in other regions first.
Expand All @@ -134,7 +143,7 @@ template <typename Config> class SizeClassAllocator32 {
ScopedLock L1(Sci->Mutex);
uptr TotalBlocks = 0;
for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) {
// `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
// `BG::Batches` are `Batches`. +1 for `BatchGroup`.
BatchClassUsedInFreeLists += BG.Batches.size() + 1;
for (const auto &It : BG.Batches)
TotalBlocks += It.getCount();
Expand All @@ -153,7 +162,7 @@ template <typename Config> class SizeClassAllocator32 {
for (const auto &It : BG.Batches)
TotalBlocks += It.getCount();
} else {
// `BatchGroup` with empty freelist doesn't have `TransferBatch` record
// `BatchGroup` with empty freelist doesn't have `Batch` record
// itself.
++TotalBlocks;
}
Expand Down Expand Up @@ -487,39 +496,39 @@ template <typename Config> class SizeClassAllocator32 {
REQUIRES(Sci->Mutex) {
DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId));

// Free blocks are recorded by TransferBatch in freelist for all
// size-classes. In addition, TransferBatch is allocated from BatchClassId.
// Free blocks are recorded by Batch in freelist for all
// size-classes. In addition, Batch is allocated from BatchClassId.
// In order not to use additional block to record the free blocks in
// BatchClassId, they are self-contained. I.e., A TransferBatch records the
// BatchClassId, they are self-contained. I.e., A Batch records the
// block address of itself. See the figure below:
//
// TransferBatch at 0xABCD
// Batch at 0xABCD
// +----------------------------+
// | Free blocks' addr |
// | +------+------+------+ |
// | |0xABCD|... |... | |
// | +------+------+------+ |
// +----------------------------+
//
// When we allocate all the free blocks in the TransferBatch, the block used
// by TransferBatch is also free for use. We don't need to recycle the
// TransferBatch. Note that the correctness is maintained by the invariant,
// When we allocate all the free blocks in the Batch, the block used
// by Batch is also free for use. We don't need to recycle the
// Batch. Note that the correctness is maintained by the invariant,
//
// Each popBlocks() request returns the entire TransferBatch. Returning
// part of the blocks in a TransferBatch is invalid.
// Each popBlocks() request returns the entire Batch. Returning
// part of the blocks in a Batch is invalid.
//
// This ensures that TransferBatch won't leak the address itself while it's
// This ensures that Batch won't leak the address itself while it's
// still holding other valid data.
//
// Besides, BatchGroup is also allocated from BatchClassId and has its
// address recorded in the TransferBatch too. To maintain the correctness,
// address recorded in the Batch too. To maintain the correctness,
//
// The address of BatchGroup is always recorded in the last TransferBatch
// The address of BatchGroup is always recorded in the last Batch
// in the freelist (also imply that the freelist should only be
// updated with push_front). Once the last TransferBatch is popped,
// updated with push_front). Once the last Batch is popped,
// the block used by BatchGroup is also free for use.
//
// With this approach, the blocks used by BatchGroup and TransferBatch are
// With this approach, the blocks used by BatchGroup and Batch are
// reusable and don't need additional space for them.

Sci->FreeListInfo.PushedBlocks += Size;
Expand Down Expand Up @@ -548,27 +557,27 @@ template <typename Config> class SizeClassAllocator32 {
// 1. just allocated a new `BatchGroup`.
// 2. Only 1 block is pushed when the freelist is empty.
if (BG->Batches.empty()) {
// Construct the `TransferBatch` on the last element.
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
// Construct the `Batch` on the last element.
BatchT *TB = reinterpret_cast<BatchT *>(
decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
TB->clear();
// As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
// recorded in the TransferBatch.
// As mentioned above, addresses of `Batch` and `BatchGroup` are
// recorded in the Batch.
TB->add(Array[Size - 1]);
TB->add(
compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
--Size;
BG->Batches.push_front(TB);
}

TransferBatchT *CurBatch = BG->Batches.front();
BatchT *CurBatch = BG->Batches.front();
DCHECK_NE(CurBatch, nullptr);

for (u32 I = 0; I < Size;) {
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch = reinterpret_cast<TransferBatchT *>(
CurBatch = reinterpret_cast<BatchT *>(
decompactPtr(SizeClassMap::BatchClassId, Array[I]));
CurBatch->clear();
// Self-contained
Expand Down Expand Up @@ -596,7 +605,7 @@ template <typename Config> class SizeClassAllocator32 {
// TB
//
// Each BlockGroup(BG) will associate with unique group id and the free blocks
// are managed by a list of TransferBatch(TB). To reduce the time of inserting
// are managed by a list of Batch(TB). To reduce the time of inserting
// blocks, BGs are sorted and the input `Array` are supposed to be sorted so
// that we can get better performance of maintaining sorted property.
// Use `SameGroup=true` to indicate that all blocks in the array are from the
Expand All @@ -613,29 +622,29 @@ template <typename Config> class SizeClassAllocator32 {
BatchGroupT *BG = reinterpret_cast<BatchGroupT *>(
SizeClassAllocator->getBatchClassBlock());
BG->Batches.clear();
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
SizeClassAllocator->getBatchClassBlock());
BatchT *TB =
reinterpret_cast<BatchT *>(SizeClassAllocator->getBatchClassBlock());
TB->clear();

BG->CompactPtrGroupBase = CompactPtrGroupBase;
BG->Batches.push_front(TB);
BG->BytesInBGAtLastCheckpoint = 0;
BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
BG->MaxCachedPerBatch = MaxNumBlocksInBatch;

return BG;
};

auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) {
SinglyLinkedList<TransferBatchT> &Batches = BG->Batches;
TransferBatchT *CurBatch = Batches.front();
SinglyLinkedList<BatchT> &Batches = BG->Batches;
BatchT *CurBatch = Batches.front();
DCHECK_NE(CurBatch, nullptr);

for (u32 I = 0; I < Size;) {
DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch = reinterpret_cast<TransferBatchT *>(
CurBatch = reinterpret_cast<BatchT *>(
SizeClassAllocator->getBatchClassBlock());
CurBatch->clear();
Batches.push_front(CurBatch);
Expand Down Expand Up @@ -716,7 +725,7 @@ template <typename Config> class SizeClassAllocator32 {
if (Sci->FreeListInfo.BlockList.empty())
return 0U;

SinglyLinkedList<TransferBatchT> &Batches =
SinglyLinkedList<BatchT> &Batches =
Sci->FreeListInfo.BlockList.front()->Batches;

if (Batches.empty()) {
Expand All @@ -725,27 +734,26 @@ template <typename Config> class SizeClassAllocator32 {
Sci->FreeListInfo.BlockList.pop_front();

// Block used by `BatchGroup` is from BatchClassId. Turn the block into
// `TransferBatch` with single block.
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
// `Batch` with single block.
BatchT *TB = reinterpret_cast<BatchT *>(BG);
ToArray[0] =
compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
Sci->FreeListInfo.PoppedBlocks += 1;
return 1U;
}

// So far, instead of always filling the blocks to `MaxBlockCount`, we only
// examine single `TransferBatch` to minimize the time spent on the primary
// allocator. Besides, the sizes of `TransferBatch` and
// `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on
// accessing the primary allocator.
// examine single `Batch` to minimize the time spent on the primary
// allocator. Besides, the sizes of `Batch` and `CacheT::getMaxCached()` may
// also impact the time spent on accessing the primary allocator.
// TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
// blocks and/or adjust the size of `TransferBatch` according to
// `SizeClassAllocatorT::getMaxCached()`.
TransferBatchT *B = Batches.front();
// blocks and/or adjust the size of `Batch` according to
// `CacheT::getMaxCached()`.
BatchT *B = Batches.front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);

// BachClassId should always take all blocks in the TransferBatch. Read the
// BachClassId should always take all blocks in the Batch. Read the
// comment in `pushBatchClassBlocks()` for more details.
const u16 PopCount = ClassId == SizeClassMap::BatchClassId
? B->getCount()
Expand All @@ -756,7 +764,7 @@ template <typename Config> class SizeClassAllocator32 {
// done without holding `Mutex`.
if (B->empty()) {
Batches.pop_front();
// `TransferBatch` of BatchClassId is self-contained, no need to
// `Batch` of BatchClassId is self-contained, no need to
// deallocate. Read the comment in `pushBatchClassBlocks()` for more
// details.
if (ClassId != SizeClassMap::BatchClassId)
Expand All @@ -769,7 +777,7 @@ template <typename Config> class SizeClassAllocator32 {
// We don't keep BatchGroup with zero blocks to avoid empty-checking
// while allocating. Note that block used for constructing BatchGroup is
// recorded as free blocks in the last element of BatchGroup::Batches.
// Which means, once we pop the last TransferBatch, the block is
// Which means, once we pop the last Batch, the block is
// implicitly deallocated.
if (ClassId != SizeClassMap::BatchClassId)
SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG);
Expand Down Expand Up @@ -816,8 +824,7 @@ template <typename Config> class SizeClassAllocator32 {
static_cast<u32>((RegionSize - Offset) / Size));
DCHECK_GT(NumberOfBlocks, 0U);

constexpr u32 ShuffleArraySize =
MaxNumBatches * TransferBatchT::MaxNumCached;
constexpr u32 ShuffleArraySize = MaxNumBatches * MaxNumBlocksInBatch;
// Fill the transfer batches and put them in the size-class freelist. We
// need to randomize the blocks for security purposes, so we first fill a
// local array that we then shuffle before populating the batches.
Expand Down Expand Up @@ -1102,7 +1109,7 @@ template <typename Config> class SizeClassAllocator32 {
if (AllocatedGroupSize == 0)
continue;

// TransferBatches are pushed in front of BG.Batches. The first one may
// Batches are pushed in front of BG.Batches. The first one may
// not have all caches used.
const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
BG.Batches.front()->getCount();
Expand Down
Loading
Loading