-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL] Support for pooled small allocations. #5438
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bb924d4
72261b3
c4b86a1
26b6815
e9203d0
113ebd6
19f625a
23ead73
36c3df4
d7786a3
b0f1d02
1353433
5b024de
6aff382
e96952c
d3a2cc0
eec7dd1
7a3ebf1
e818313
0b1da36
a2a0f08
1e61af2
3d43396
7e9796a
21026d3
1a2bf5e
901ad9c
e0fdffa
a1c8b7d
05c1621
59cc820
cb66ec3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -414,28 +414,50 @@ class Bucket { | |
// routines, slab map and etc. | ||
USMAllocContext::USMAllocImpl &OwnAllocCtx; | ||
|
||
// For buckets used in chunked mode, a counter of slabs in the pool. | ||
// For allocations that use an entire slab each, the entries in the Available | ||
// list are entries in the pool.Each slab is available for a new | ||
// allocation.The size of the Available list is the size of the pool. | ||
// For allocations that use slabs in chunked mode, slabs will be in the | ||
// Available list if any one or more of their chunks is free.The entire slab | ||
// is not necessarily free, just some chunks in the slab are free. To | ||
// implement pooling we will allow one slab in the Available list to be | ||
// entirely empty. Normally such a slab would have been freed from USM. But | ||
// now we don't, and treat this slab as "in the pool". | ||
// When a slab becomes entirely free we have to decide whether to return it to | ||
// USM or keep it allocated. A simple check for size of the Available list is | ||
// not sufficient to check whether any slab has been pooled yet.We would have | ||
// to traverse the entire Available listand check if any of them is entirely | ||
// free. Instead we keep a counter of entirely empty slabs within the | ||
// Available list to speed up the process of checking if a slab in this bucket | ||
// is already pooled. | ||
size_t chunkedSlabsInPool; | ||
|
||
// Statistics | ||
size_t allocCount; | ||
size_t allocPoolCount; | ||
size_t freeCount; | ||
size_t currSlabsInUse; | ||
size_t currSlabsInPool; | ||
size_t maxSlabsInUse; | ||
size_t maxSlabsInPool; | ||
|
||
public: | ||
// Statistics | ||
size_t allocCount; | ||
size_t maxSlabsInUse; | ||
|
||
Bucket(size_t Sz, USMAllocContext::USMAllocImpl &AllocCtx) | ||
: Size{Sz}, OwnAllocCtx{AllocCtx}, allocCount(0), allocPoolCount(0), | ||
freeCount(0), currSlabsInUse(0), currSlabsInPool(0), maxSlabsInUse(0), | ||
maxSlabsInPool(0) {} | ||
: Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), | ||
allocPoolCount(0), freeCount(0), currSlabsInUse(0), currSlabsInPool(0), | ||
maxSlabsInPool(0), allocCount(0), maxSlabsInUse(0) {} | ||
|
||
// Get pointer to allocation that is one piece of an available slab in this | ||
// bucket. | ||
void *getChunk(bool &FromAllocatedSlab); | ||
void *getChunk(bool &FromPool); | ||
|
||
// Get pointer to allocation that is a full slab in this bucket. | ||
void *getSlab(bool &FromPool); | ||
|
||
// Return the allocation size of this bucket. | ||
size_t getSize() const { return Size; } | ||
|
||
// Free an allocation that is one piece of a slab in this bucket. | ||
|
@@ -451,11 +473,14 @@ class Bucket { | |
USMAllocContext::USMAllocImpl &getUsmAllocCtx() { return OwnAllocCtx; } | ||
|
||
// Check whether an allocation to be freed can be placed in the pool. | ||
bool CanPool(); | ||
bool CanPool(bool &ToPool); | ||
|
||
// The minimum allocation size for a slab in this bucket. | ||
// The minimum allocation size for any slab. | ||
size_t SlabMinSize(); | ||
|
||
// The allocation size for a slab in this bucket. | ||
size_t SlabAllocSize(); | ||
|
||
// The minimum size of a chunk from this bucket's slabs. | ||
size_t ChunkCutOff(); | ||
|
||
|
@@ -475,17 +500,19 @@ class Bucket { | |
void updateStats(int InUse, int InPool); | ||
|
||
// Print bucket statistics | ||
void printStats(); | ||
void printStats(bool &TitlePrinted, SystemMemory::MemType MT); | ||
|
||
private: | ||
void onFreeChunk(Slab &, bool &ToPool); | ||
|
||
// Update statistics of pool usage, and indicate that an allocation was made | ||
// from the pool. | ||
void decrementPool(bool &FromPool); | ||
|
||
// Get a slab to be used for chunked allocations. | ||
// These slabs are used for allocations <= ChunkCutOff and not pooled. | ||
decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromAllocatedSlab); | ||
decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); | ||
|
||
// Get a slab that will be used as a whole for a single allocation. | ||
// These slabs are > ChunkCutOff in size and pooled. | ||
decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); | ||
}; | ||
|
||
|
@@ -531,7 +558,8 @@ class USMAllocContext::USMAllocImpl { | |
return USMSettings.SlabMinSize[(*MemHandle).getMemType()]; | ||
}; | ||
|
||
void printStats(); | ||
void printStats(bool &TitlePrinted, size_t &HighBucketSize, | ||
size_t &HighPeakSlabsInUse, SystemMemory::MemType MT); | ||
|
||
private: | ||
Bucket &findBucket(size_t Size); | ||
|
@@ -552,10 +580,8 @@ Slab::Slab(Bucket &Bkt) | |
// some padding at the end of the slab. | ||
Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, | ||
bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { | ||
size_t SlabAllocSize = Bkt.getSize(); | ||
if (SlabAllocSize < Bkt.SlabMinSize()) | ||
SlabAllocSize = Bkt.SlabMinSize(); | ||
MemPtr = Bkt.getMemHandle().allocate(SlabAllocSize); | ||
auto SlabSize = Bkt.SlabAllocSize(); | ||
MemPtr = Bkt.getMemHandle().allocate(SlabSize); | ||
regSlab(*this); | ||
} | ||
|
||
|
@@ -673,6 +699,14 @@ void *Slab::getEnd() const { | |
|
||
bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } | ||
|
||
// If a slab was available in the pool then note that the current pooled | ||
// size has reduced by the size of a slab in this bucket. | ||
void Bucket::decrementPool(bool &FromPool) { | ||
FromPool = true; | ||
smaslov-intel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
updateStats(1, -1); | ||
USMSettings.CurPoolSize -= SlabAllocSize(); | ||
} | ||
|
||
auto Bucket::getAvailFullSlab(bool &FromPool) | ||
-> decltype(AvailableSlabs.begin()) { | ||
// Return a slab that will be used for a single allocation. | ||
|
@@ -681,17 +715,9 @@ auto Bucket::getAvailFullSlab(bool &FromPool) | |
std::make_unique<Slab>(*this)); | ||
(*It)->setIterator(It); | ||
FromPool = false; | ||
if (USMSettings.PoolTrace > 1) | ||
updateStats(1, 0); | ||
updateStats(1, 0); | ||
} else { | ||
// If a slab was available in the pool then note that the current pooled | ||
// size has reduced by the size of this slab. | ||
FromPool = true; | ||
if (USMSettings.PoolTrace > 1) { | ||
updateStats(1, -1); | ||
USMSettings.CurPoolSizes[getMemType()] -= Size; | ||
} | ||
USMSettings.CurPoolSize -= Size; | ||
decrementPool(FromPool); | ||
} | ||
|
||
return AvailableSlabs.begin(); | ||
|
@@ -713,47 +739,44 @@ void Bucket::freeSlab(Slab &Slab, bool &ToPool) { | |
std::lock_guard<std::mutex> Lg(BucketLock); | ||
auto SlabIter = Slab.getIterator(); | ||
assert(SlabIter != UnavailableSlabs.end()); | ||
if (CanPool()) { | ||
if (CanPool(ToPool)) { | ||
auto It = | ||
AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); | ||
UnavailableSlabs.erase(SlabIter); | ||
(*It)->setIterator(It); | ||
|
||
if (USMSettings.PoolTrace > 1) { | ||
updateStats(-1, 1); | ||
ToPool = true; | ||
} | ||
} else { | ||
UnavailableSlabs.erase(SlabIter); | ||
|
||
if (USMSettings.PoolTrace > 1) { | ||
updateStats(-1, 0); | ||
ToPool = false; | ||
} | ||
} | ||
} | ||
|
||
auto Bucket::getAvailSlab(bool &FromAllocatedSlab) | ||
-> decltype(AvailableSlabs.begin()) { | ||
auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { | ||
|
||
FromAllocatedSlab = true; | ||
if (AvailableSlabs.size() == 0) { | ||
auto It = AvailableSlabs.insert(AvailableSlabs.begin(), | ||
std::make_unique<Slab>(*this)); | ||
(*It)->setIterator(It); | ||
|
||
if (USMSettings.PoolTrace > 1) | ||
updateStats(1, 0); | ||
FromAllocatedSlab = false; | ||
updateStats(1, 0); | ||
FromPool = false; | ||
} else { | ||
if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { | ||
// If this was an empty slab, it was in the pool. | ||
// Now it is no longer in the pool, so update count. | ||
--chunkedSlabsInPool; | ||
decrementPool(FromPool); | ||
} else { | ||
// Allocation from existing slab is treated as from pool for statistics. | ||
FromPool = true; | ||
} | ||
} | ||
|
||
return AvailableSlabs.begin(); | ||
} | ||
|
||
void *Bucket::getChunk(bool &FromAllocatedSlab) { | ||
void *Bucket::getChunk(bool &FromPool) { | ||
std::lock_guard<std::mutex> Lg(BucketLock); | ||
|
||
auto SlabIt = getAvailSlab(FromAllocatedSlab); | ||
auto SlabIt = getAvailSlab(FromPool); | ||
auto *FreeChunk = (*SlabIt)->getChunk(); | ||
|
||
// If the slab is full, move it to unavailable slabs and update its iterator | ||
|
@@ -792,33 +815,45 @@ void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { | |
(*It)->setIterator(It); | ||
} | ||
|
||
// Remove the slab when all the chunks from it are deallocated | ||
// Note: since the slab is stored as unique_ptr, just remove it from | ||
// the list to remove the list to destroy the object | ||
// Check if slab is empty, and pool it if we can. | ||
if (Slab.getNumAllocated() == 0) { | ||
auto It = Slab.getIterator(); | ||
assert(It != AvailableSlabs.end()); | ||
|
||
AvailableSlabs.erase(It); | ||
|
||
if (USMSettings.PoolTrace > 1) | ||
updateStats(-1, 0); | ||
|
||
ToPool = false; | ||
// The slab is now empty. | ||
// If pool has capacity then put the slab in the pool. | ||
// The ToPool parameter indicates whether the Slab will be put in the pool | ||
// or freed from USM. | ||
if (!CanPool(ToPool)) { | ||
// Note: since the slab is stored as unique_ptr, just remove it from | ||
// the list to destroy the object. | ||
auto It = Slab.getIterator(); | ||
assert(It != AvailableSlabs.end()); | ||
AvailableSlabs.erase(It); | ||
} | ||
} | ||
} | ||
|
||
bool Bucket::CanPool() { | ||
bool Bucket::CanPool(bool &ToPool) { | ||
rdeodhar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::lock_guard<sycl::detail::SpinLock> Lock{PoolLock}; | ||
size_t NewFreeSlabsInBucket = AvailableSlabs.size() + 1; | ||
size_t NewFreeSlabsInBucket; | ||
// Check if this bucket is used in chunked form or as full slabs. | ||
bool chunkedBucket = getSize() <= ChunkCutOff(); | ||
if (chunkedBucket) | ||
NewFreeSlabsInBucket = chunkedSlabsInPool + 1; | ||
else | ||
NewFreeSlabsInBucket = AvailableSlabs.size() + 1; | ||
Comment on lines
+839
to
+842
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be simpler to maintain "chunked" slabs in a separate list, i.e. break the AvailableSlabs into 2? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if it would be simpler, but there could be alternative approaches such as adding a third type of list, in effect breaking the AvailableSlabs list into two, with different meaning. However, the approach implemented here required the least change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, let's go with what we have ready, and only consider a re-design when we need to changes something again. |
||
if (Capacity() >= NewFreeSlabsInBucket) { | ||
size_t NewPoolSize = USMSettings.CurPoolSize + Size; | ||
size_t NewPoolSize = USMSettings.CurPoolSize + SlabAllocSize(); | ||
if (USMSettings.MaxPoolSize >= NewPoolSize) { | ||
USMSettings.CurPoolSize = NewPoolSize; | ||
USMSettings.CurPoolSizes[getMemType()] += Size; | ||
if (chunkedBucket) | ||
++chunkedSlabsInPool; | ||
|
||
updateStats(-1, 1); | ||
ToPool = true; | ||
return true; | ||
} | ||
} | ||
updateStats(-1, 0); | ||
ToPool = false; | ||
return false; | ||
} | ||
|
||
|
@@ -830,7 +865,16 @@ SystemMemory::MemType Bucket::getMemType() { | |
|
||
size_t Bucket::SlabMinSize() { return USMSettings.SlabMinSize[getMemType()]; } | ||
|
||
size_t Bucket::Capacity() { return USMSettings.Capacity[getMemType()]; } | ||
size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } | ||
|
||
size_t Bucket::Capacity() { | ||
// For buckets used in chunked mode, just one slab in pool is sufficient. | ||
// For larger buckets, the capacity could be more and is adjustable. | ||
if (getSize() <= ChunkCutOff()) | ||
return 1; | ||
else | ||
return USMSettings.Capacity[getMemType()]; | ||
} | ||
|
||
size_t Bucket::MaxPoolableSize() { | ||
return USMSettings.MaxPoolableSize[getMemType()]; | ||
|
@@ -847,14 +891,28 @@ void Bucket::countAlloc(bool FromPool) { | |
void Bucket::countFree() { ++freeCount; } | ||
|
||
void Bucket::updateStats(int InUse, int InPool) { | ||
if (USMSettings.PoolTrace == 0) | ||
return; | ||
currSlabsInUse += InUse; | ||
maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); | ||
currSlabsInPool += InPool; | ||
maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); | ||
// Increment or decrement current pool sizes based on whether | ||
// slab was added to or removed from pool. | ||
USMSettings.CurPoolSizes[getMemType()] += InPool * SlabAllocSize(); | ||
} | ||
|
||
void Bucket::printStats() { | ||
void Bucket::printStats(bool &TitlePrinted, SystemMemory::MemType MT) { | ||
if (allocCount) { | ||
if (!TitlePrinted) { | ||
auto Label = MemTypeNames[MT]; | ||
std::cout << Label << " memory statistics\n"; | ||
std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs" | ||
<< std::setw(12) << "Frees" << std::setw(18) | ||
<< "Allocs from Pool" << std::setw(20) << "Peak Slabs in Use" | ||
<< std::setw(21) << "Peak Slabs in Pool" << std::endl; | ||
TitlePrinted = true; | ||
} | ||
std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount | ||
<< std::setw(12) << freeCount << std::setw(18) << allocPoolCount | ||
<< std::setw(20) << maxSlabsInUse << std::setw(21) | ||
|
@@ -982,6 +1040,7 @@ USMAllocContext::USMAllocContext(std::unique_ptr<SystemMemory> MemHandle) | |
: pImpl(std::make_unique<USMAllocImpl>(std::move(MemHandle))) {} | ||
|
||
void *USMAllocContext::allocate(size_t size) { | ||
// For full-slab allocations indicates whether slab is from Pool. | ||
bool FromPool; | ||
auto Ptr = pImpl->allocate(size, FromPool); | ||
|
||
|
@@ -1023,27 +1082,36 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) { | |
return; | ||
} | ||
|
||
// Define destructor for its usage with unique_ptr | ||
// Define destructor for use with unique_ptr | ||
USMAllocContext::~USMAllocContext() { | ||
bool TitlePrinted = false; | ||
size_t HighBucketSize; | ||
size_t HighPeakSlabsInUse; | ||
if (USMSettings.PoolTrace > 1) { | ||
auto Label = "Shared"; | ||
if (pImpl->getMemHandle().getMemType() == SystemMemory::Host) | ||
Label = "Host"; | ||
if (pImpl->getMemHandle().getMemType() == SystemMemory::Device) | ||
Label = "Device"; | ||
std::cout << Label << " memory statistics\n"; | ||
pImpl->printStats(); | ||
std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl; | ||
SystemMemory::MemType MT = pImpl->getMemHandle().getMemType(); | ||
pImpl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT); | ||
if (TitlePrinted) { | ||
std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl; | ||
const char *Label = MemTypeNames[MT]; | ||
std::cout << "Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;" | ||
<< std::string(1, tolower(*Label)) << std::string(Label + 1) | ||
<< ":" << HighBucketSize << "," << HighPeakSlabsInUse << ",64K" | ||
<< std::endl; | ||
} | ||
} | ||
} | ||
|
||
void USMAllocContext::USMAllocImpl::printStats() { | ||
std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs" | ||
<< std::setw(12) << "Frees" << std::setw(18) << "Allocs From Pool" | ||
<< std::setw(20) << "Peak Slabs In Use" << std::setw(21) | ||
<< "Peak Slabs in Pool" << std::endl; | ||
void USMAllocContext::USMAllocImpl::printStats(bool &TitlePrinted, | ||
size_t &HighBucketSize, | ||
size_t &HighPeakSlabsInUse, | ||
SystemMemory::MemType MT) { | ||
HighBucketSize = 0; | ||
HighPeakSlabsInUse = 0; | ||
for (auto &B : Buckets) { | ||
(*B).printStats(); | ||
(*B).printStats(TitlePrinted, MT); | ||
HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); | ||
if ((*B).allocCount) | ||
HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); | ||
} | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.