Skip to content

Commit 4e944f2

Browse files
committed
HubSpot Backport: HBASE-27225 Add BucketAllocator bucket size statistic logging (apache#4637) (addendum)
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
1 parent 96e7283 commit 4e944f2

File tree

3 files changed

+130
-38
lines changed

3 files changed

+130
-38
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java

Lines changed: 113 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,15 @@ final class BucketSizeInfo {
171171
// Free bucket means it has space to allocate a block;
172172
// Completely free bucket means it has no block.
173173
private LinkedMap bucketList, freeBuckets, completelyFreeBuckets;
174+
// only modified under synchronization, but also read outside it.
175+
private volatile long fragmentationBytes;
174176
private int sizeIndex;
175177

176178
BucketSizeInfo(int sizeIndex) {
177179
bucketList = new LinkedMap();
178180
freeBuckets = new LinkedMap();
179181
completelyFreeBuckets = new LinkedMap();
182+
fragmentationBytes = 0;
180183
this.sizeIndex = sizeIndex;
181184
}
182185

@@ -196,7 +199,7 @@ public int sizeIndex() {
196199
* Find a bucket to allocate a block
197200
* @return the offset in the IOEngine
198201
*/
199-
public long allocateBlock() {
202+
public long allocateBlock(int blockSize) {
200203
Bucket b = null;
201204
if (freeBuckets.size() > 0) {
202205
// Use up an existing one first...
@@ -209,6 +212,9 @@ public long allocateBlock() {
209212
if (b == null) return -1;
210213
long result = b.allocate();
211214
blockAllocated(b);
215+
if (blockSize < b.getItemAllocationSize()) {
216+
fragmentationBytes += b.getItemAllocationSize() - blockSize;
217+
}
212218
return result;
213219
}
214220

@@ -239,11 +245,14 @@ private synchronized void removeBucket(Bucket b) {
239245
completelyFreeBuckets.remove(b);
240246
}
241247

242-
public void freeBlock(Bucket b, long offset) {
248+
public void freeBlock(Bucket b, long offset, int length) {
243249
assert bucketList.containsKey(b);
244250
// else we shouldn't have anything to free...
245251
assert (!completelyFreeBuckets.containsKey(b));
246252
b.free(offset);
253+
if (length < b.getItemAllocationSize()) {
254+
fragmentationBytes -= b.getItemAllocationSize() - length;
255+
}
247256
if (!freeBuckets.containsKey(b)) freeBuckets.put(b, b);
248257
if (b.isCompletelyFree()) completelyFreeBuckets.put(b, b);
249258
}
@@ -265,9 +274,9 @@ public synchronized IndexStatistics statistics() {
265274
// if bucket capacity is not perfectly divisible by a bucket's object size, there will
266275
// be some left over per bucket. for some object sizes this may be large enough to be
267276
// non-trivial and worth tuning by choosing a more divisible object size.
268-
long waistedBytes = (bucketCapacity % bucketObjectSize) * (full + fillingBuckets);
269-
return new IndexStatistics(free, used, bucketObjectSize, full,
270-
completelyFreeBuckets.size(), waistedBytes);
277+
long wastedBytes = (bucketCapacity % bucketObjectSize) * (full + fillingBuckets);
278+
return new IndexStatistics(free, used, bucketObjectSize, full, completelyFreeBuckets.size(),
279+
wastedBytes, fragmentationBytes);
271280
}
272281

273282
@Override
@@ -459,7 +468,7 @@ public synchronized long allocateBlock(int blockSize) throws CacheFullException,
459468
"; adjust BucketCache sizes " + BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY +
460469
" to accomodate if size seems reasonable and you want it cached.");
461470
}
462-
long offset = bsi.allocateBlock();
471+
long offset = bsi.allocateBlock(blockSize);
463472

464473
// Ask caller to free up space and try again!
465474
if (offset < 0)
@@ -481,11 +490,11 @@ private Bucket grabGlobalCompletelyFreeBucket() {
481490
* @param offset block's offset
482491
* @return size freed
483492
*/
484-
public synchronized int freeBlock(long offset) {
493+
public synchronized int freeBlock(long offset, int length) {
485494
int bucketNo = (int) (offset / bucketCapacity);
486495
assert bucketNo >= 0 && bucketNo < buckets.length;
487496
Bucket targetBucket = buckets[bucketNo];
488-
bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset);
497+
bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset, length);
489498
usedSize -= targetBucket.getItemAllocationSize();
490499
return targetBucket.getItemAllocationSize();
491500
}
@@ -504,68 +513,141 @@ public int sizeOfAllocation(long offset) {
504513
return targetBucket.getItemAllocationSize();
505514
}
506515

516+
/**
517+
* Statistics to give a glimpse into the distribution of BucketCache objects. Each configured
518+
* bucket size, denoted by {@link BucketSizeInfo}, gets an IndexStatistic. A BucketSizeInfo
519+
* allocates blocks of a configured size from claimed buckets. If you have a bucket size of 512k,
520+
* the corresponding BucketSizeInfo will always allocate chunks of 512k at a time regardless of
521+
* actual request.
522+
* <p>
523+
* Over time, as a BucketSizeInfo gets more allocations, it will claim more buckets from the total
524+
* pool of completelyFreeBuckets. As blocks are freed from a BucketSizeInfo, those buckets may be
525+
* returned to the completelyFreeBuckets pool.
526+
* <p>
527+
* The IndexStatistics help visualize how these buckets are currently distributed, through counts
528+
* of items, bytes, and fullBuckets. Additionally, mismatches between block sizes and bucket sizes
529+
* can manifest in inefficient cache usage. These typically manifest in three ways:
530+
* <p>
531+
* 1. Allocation failures, because block size is larger than max bucket size. These show up in
532+
* logs and can be alleviated by adding larger bucket sizes if appropriate.<br>
533+
* 2. Memory fragmentation, because blocks are typically smaller than the bucket size. See
534+
* {@link #fragmentationBytes()} for details.<br>
535+
* 3. Memory waste, because a bucket's itemSize is not a perfect divisor of bucketCapacity. see
536+
* {@link #wastedBytes()} for details.<br>
537+
*/
507538
static class IndexStatistics {
508-
private long freeCount, usedCount, itemSize, totalCount, waistedBytes;
539+
private long freeCount, usedCount, itemSize, totalCount, wastedBytes, fragmentationBytes;
509540
private int fullBuckets, completelyFreeBuckets;
510541

542+
/**
543+
* How many more items can be allocated from the currently claimed blocks of this bucket size
544+
*/
511545
public long freeCount() {
512546
return freeCount;
513547
}
514548

549+
/**
550+
* How many items are currently taking up space in this bucket size's buckets
551+
*/
515552
public long usedCount() {
516553
return usedCount;
517554
}
518555

556+
/**
557+
* Combined {@link #freeCount()} + {@link #usedCount()}
558+
*/
519559
public long totalCount() {
520560
return totalCount;
521561
}
522562

563+
/**
564+
* How many more bytes can be allocated from the currently claimed blocks of this bucket size
565+
*/
523566
public long freeBytes() {
524567
return freeCount * itemSize;
525568
}
526569

570+
/**
571+
* How many bytes are currently taking up space in this bucket size's buckets Note: If your
572+
* items are less than the bucket size of this bucket, the actual used bytes by items will be
573+
* lower than this value. But since a bucket size can only allocate items of a single size, this
574+
* value is the true number of used bytes. The difference will be counted in
575+
* {@link #fragmentationBytes()}.
576+
*/
527577
public long usedBytes() {
528578
return usedCount * itemSize;
529579
}
530580

581+
/**
582+
* Combined {@link #totalCount()} * {@link #itemSize()}
583+
*/
531584
public long totalBytes() {
532585
return totalCount * itemSize;
533586
}
534587

588+
/**
589+
* This bucket size can only allocate items of this size, even if the requested allocation size
590+
* is smaller. The rest goes towards {@link #fragmentationBytes()}.
591+
*/
535592
public long itemSize() {
536593
return itemSize;
537594
}
538595

596+
/**
597+
* How many buckets have been completely filled by blocks for this bucket size. These buckets
598+
* can't accept any more blocks unless some existing are freed.
599+
*/
539600
public int fullBuckets() {
540601
return fullBuckets;
541602
}
542603

604+
/**
605+
* How many buckets are currently claimed by this bucket size but as yet totally unused. These
606+
* buckets are available for reallocation to other bucket sizes if those fill up.
607+
*/
543608
public int completelyFreeBuckets() {
544609
return completelyFreeBuckets;
545610
}
546611

547-
public long waistedBytes() {
548-
return waistedBytes;
612+
/**
613+
* If {@link #bucketCapacity} is not perfectly divisible by this {@link #itemSize()}, the
614+
* remainder will be unusable by in buckets of this size. A high value here may be optimized by
615+
* trying to choose bucket sizes which can better divide {@link #bucketCapacity}.
616+
*/
617+
public long wastedBytes() {
618+
return wastedBytes;
619+
}
620+
621+
/**
622+
* Every time you allocate blocks in these buckets where the block size is less than the bucket
623+
* size, fragmentation increases by that difference. You can reduce fragmentation by lowering
624+
* the bucket size so that it is closer to the typical block size. This may have the consequence
625+
* of bumping some blocks to the next larger bucket size, so experimentation may be needed.
626+
*/
627+
public long fragmentationBytes() {
628+
return fragmentationBytes;
549629
}
550630

551631
public IndexStatistics(long free, long used, long itemSize, int fullBuckets,
552-
int completelyFreeBuckets, long waistedBytes) {
553-
setTo(free, used, itemSize, fullBuckets, completelyFreeBuckets, waistedBytes);
632+
int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
633+
setTo(free, used, itemSize, fullBuckets, completelyFreeBuckets, wastedBytes,
634+
fragmentationBytes);
554635
}
555636

556637
public IndexStatistics() {
557-
setTo(-1, -1, 0, 0, 0, 0);
638+
setTo(-1, -1, 0, 0, 0, 0, 0);
558639
}
559640

560641
public void setTo(long free, long used, long itemSize, int fullBuckets,
561-
int completelyFreeBuckets, long waistedBytes) {
642+
int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
562643
this.itemSize = itemSize;
563644
this.freeCount = free;
564645
this.usedCount = used;
565646
this.totalCount = free + used;
566647
this.fullBuckets = fullBuckets;
567648
this.completelyFreeBuckets = completelyFreeBuckets;
568-
this.waistedBytes = waistedBytes;
649+
this.wastedBytes = wastedBytes;
650+
this.fragmentationBytes = fragmentationBytes;
569651
}
570652
}
571653

@@ -581,27 +663,35 @@ void logDebugStatistics() {
581663
IndexStatistics total = new IndexStatistics();
582664
IndexStatistics[] stats = getIndexStatistics(total);
583665
LOG.debug("Bucket allocator statistics follow:");
584-
LOG.debug(" Free bytes={}; used bytes={}; total bytes={}; waisted bytes={}; completelyFreeBuckets={}",
585-
total.freeBytes(), total.usedBytes(), total.totalBytes(), total.waistedBytes(), total.completelyFreeBuckets());
666+
LOG.debug(
667+
" Free bytes={}; used bytes={}; total bytes={}; wasted bytes={}; fragmentation bytes={}; "
668+
+ "completelyFreeBuckets={}",
669+
total.freeBytes(), total.usedBytes(), total.totalBytes(), total.wastedBytes(),
670+
total.fragmentationBytes(), total.completelyFreeBuckets());
586671
for (IndexStatistics s : stats) {
587-
LOG.debug(" Object size {}; used={}; free={}; total={}; waisted bytes={}; full buckets={}",
588-
s.itemSize(), s.usedCount(), s.freeCount(), s.totalCount(), s.waistedBytes(), s.fullBuckets());
672+
LOG.debug(
673+
" Object size {}; used={}; free={}; total={}; wasted bytes={}; fragmentation bytes={}, "
674+
+ "full buckets={}",
675+
s.itemSize(), s.usedCount(), s.freeCount(), s.totalCount(), s.wastedBytes(),
676+
s.fragmentationBytes(), s.fullBuckets());
589677
}
590678
}
591679

592680
IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) {
593681
IndexStatistics[] stats = getIndexStatistics();
594-
long totalfree = 0, totalused = 0, totalWaisted = 0;
682+
long totalfree = 0, totalused = 0, totalWasted = 0, totalFragmented = 0;
595683
int fullBuckets = 0, completelyFreeBuckets = 0;
596684

597685
for (IndexStatistics stat : stats) {
598686
totalfree += stat.freeBytes();
599687
totalused += stat.usedBytes();
600-
totalWaisted += stat.waistedBytes();
688+
totalWasted += stat.wastedBytes();
689+
totalFragmented += stat.fragmentationBytes();
601690
fullBuckets += stat.fullBuckets();
602691
completelyFreeBuckets += stat.completelyFreeBuckets();
603692
}
604-
grandTotal.setTo(totalfree, totalused, 1, fullBuckets, completelyFreeBuckets, totalWaisted);
693+
grandTotal.setTo(totalfree, totalused, 1, fullBuckets, completelyFreeBuckets, totalWasted,
694+
totalFragmented);
605695
return stats;
606696
}
607697

@@ -612,13 +702,6 @@ IndexStatistics[] getIndexStatistics() {
612702
return stats;
613703
}
614704

615-
public long freeBlock(long freeList[]) {
616-
long sz = 0;
617-
for (int i = 0; i < freeList.length; ++i)
618-
sz += freeBlock(freeList[i]);
619-
return sz;
620-
}
621-
622705
public int getBucketIndex(long offset) {
623706
return (int) (offset / bucketCapacity);
624707
}

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ void blockEvicted(BlockCacheKey cacheKey, BucketEntry bucketEntry, boolean decre
561561
* {@link BucketEntry#refCnt} becoming 0.
562562
*/
563563
void freeBucketEntry(BucketEntry bucketEntry) {
564-
bucketAllocator.freeBlock(bucketEntry.offset());
564+
bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength());
565565
realCacheSize.add(-1 * bucketEntry.getLength());
566566
}
567567

@@ -1053,8 +1053,9 @@ void doDrain(final List<RAMQueueEntry> entries) throws InterruptedException {
10531053
checkIOErrorIsTolerated();
10541054
// Since we failed sync, free the blocks in bucket allocator
10551055
for (int i = 0; i < entries.size(); ++i) {
1056-
if (bucketEntries[i] != null) {
1057-
bucketAllocator.freeBlock(bucketEntries[i].offset());
1056+
BucketEntry bucketEntry = bucketEntries[i];
1057+
if (bucketEntry != null) {
1058+
bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength());
10581059
bucketEntries[i] = null;
10591060
}
10601061
}
@@ -1467,7 +1468,7 @@ public BucketEntry writeToCache(final IOEngine ioEngine, final BucketAllocator a
14671468
succ = true;
14681469
} finally {
14691470
if (!succ) {
1470-
alloc.freeBlock(offset);
1471+
alloc.freeBlock(offset, len);
14711472
}
14721473
}
14731474
realCacheSize.add(len);

hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import org.apache.hadoop.hbase.nio.ByteBuff;
5858
import org.apache.hadoop.hbase.testclassification.IOTests;
5959
import org.apache.hadoop.hbase.testclassification.LargeTests;
60+
import org.apache.hadoop.hbase.util.Pair;
6061
import org.junit.After;
6162
import org.junit.Assert;
6263
import org.junit.Before;
@@ -161,15 +162,15 @@ public void testBucketAllocator() throws BucketAllocatorException {
161162
final List<Integer> BLOCKSIZES = Arrays.asList(4 * 1024, 8 * 1024, 64 * 1024, 96 * 1024);
162163

163164
boolean full = false;
164-
ArrayList<Long> allocations = new ArrayList<>();
165+
ArrayList<Pair<Long, Integer>> allocations = new ArrayList<>();
165166
// Fill the allocated extents by choosing a random blocksize. Continues selecting blocks until
166167
// the cache is completely filled.
167168
List<Integer> tmp = new ArrayList<>(BLOCKSIZES);
168169
while (!full) {
169170
Integer blockSize = null;
170171
try {
171172
blockSize = randFrom(tmp);
172-
allocations.add(mAllocator.allocateBlock(blockSize));
173+
allocations.add(new Pair<>(mAllocator.allocateBlock(blockSize), blockSize));
173174
} catch (CacheFullException cfe) {
174175
tmp.remove(blockSize);
175176
if (tmp.isEmpty()) full = true;
@@ -180,12 +181,19 @@ public void testBucketAllocator() throws BucketAllocatorException {
180181
BucketSizeInfo bucketSizeInfo = mAllocator.roundUpToBucketSizeInfo(blockSize);
181182
IndexStatistics indexStatistics = bucketSizeInfo.statistics();
182183
assertEquals("unexpected freeCount for " + bucketSizeInfo, 0, indexStatistics.freeCount());
184+
185+
// we know the block sizes above are multiples of 1024, but default bucket sizes give an
186+
// additional 1024 on top of that so this counts towards fragmentation in our test
187+
// real life may have worse fragmentation because blocks may not be perfectly sized to block
188+
// size, given encoding/compression and large rows
189+
assertEquals(1024 * indexStatistics.totalCount(), indexStatistics.fragmentationBytes());
183190
}
184191

185192
mAllocator.logDebugStatistics();
186193

187-
for (long offset : allocations) {
188-
assertEquals(mAllocator.sizeOfAllocation(offset), mAllocator.freeBlock(offset));
194+
for (Pair<Long, Integer> allocation : allocations) {
195+
assertEquals(mAllocator.sizeOfAllocation(allocation.getFirst()),
196+
mAllocator.freeBlock(allocation.getFirst(), allocation.getSecond()));
189197
}
190198
assertEquals(0, mAllocator.getUsedSize());
191199
}

0 commit comments

Comments
 (0)