Skip to content

Commit c039e44

Browse files
authored
HBASE-29398: Server side scan metrics for bytes read from FS vs Block cache vs memstore (apache#7136)
Signed-off-by: Viraj Jasani <vjasani@apache.org> Signed-off-by: Hari Krishna Dara <haridara@gmail.com>
1 parent 3500d6f commit c039e44

File tree

19 files changed

+1867
-68
lines changed

19 files changed

+1867
-68
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/metrics/ServerSideScanMetrics.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ public void moveToNextRegion() {
5252
currentRegionScanMetricsData.createCounter(COUNT_OF_ROWS_FILTERED_KEY_METRIC_NAME);
5353
currentRegionScanMetricsData.createCounter(BLOCK_BYTES_SCANNED_KEY_METRIC_NAME);
5454
currentRegionScanMetricsData.createCounter(FS_READ_TIME_METRIC_NAME);
55+
currentRegionScanMetricsData.createCounter(BYTES_READ_FROM_FS_METRIC_NAME);
56+
currentRegionScanMetricsData.createCounter(BYTES_READ_FROM_BLOCK_CACHE_METRIC_NAME);
57+
currentRegionScanMetricsData.createCounter(BYTES_READ_FROM_MEMSTORE_METRIC_NAME);
58+
currentRegionScanMetricsData.createCounter(BLOCK_READ_OPS_COUNT_METRIC_NAME);
5559
}
5660

5761
/**
@@ -68,6 +72,11 @@ protected AtomicLong createCounter(String counterName) {
6872
public static final String BLOCK_BYTES_SCANNED_KEY_METRIC_NAME = "BLOCK_BYTES_SCANNED";
6973

7074
public static final String FS_READ_TIME_METRIC_NAME = "FS_READ_TIME";
75+
public static final String BYTES_READ_FROM_FS_METRIC_NAME = "BYTES_READ_FROM_FS";
76+
public static final String BYTES_READ_FROM_BLOCK_CACHE_METRIC_NAME =
77+
"BYTES_READ_FROM_BLOCK_CACHE";
78+
public static final String BYTES_READ_FROM_MEMSTORE_METRIC_NAME = "BYTES_READ_FROM_MEMSTORE";
79+
public static final String BLOCK_READ_OPS_COUNT_METRIC_NAME = "BLOCK_READ_OPS_COUNT";
7180

7281
/**
7382
* number of rows filtered during scan RPC
@@ -86,6 +95,16 @@ protected AtomicLong createCounter(String counterName) {
8695

8796
public final AtomicLong fsReadTime = createCounter(FS_READ_TIME_METRIC_NAME);
8897

98+
public final AtomicLong bytesReadFromFs = createCounter(BYTES_READ_FROM_FS_METRIC_NAME);
99+
100+
public final AtomicLong bytesReadFromBlockCache =
101+
createCounter(BYTES_READ_FROM_BLOCK_CACHE_METRIC_NAME);
102+
103+
public final AtomicLong bytesReadFromMemstore =
104+
createCounter(BYTES_READ_FROM_MEMSTORE_METRIC_NAME);
105+
106+
public final AtomicLong blockReadOpsCount = createCounter(BLOCK_READ_OPS_COUNT_METRIC_NAME);
107+
89108
/**
90109
* Sets counter with counterName to passed in value, does nothing if counter does not exist. If
91110
* region level scan metrics are enabled then sets the value of counter for the current region

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.DataInput;
2121
import java.io.IOException;
2222
import org.apache.hadoop.hbase.Cell;
23+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
2324
import org.apache.hadoop.hbase.nio.ByteBuff;
2425
import org.apache.hadoop.hbase.regionserver.BloomType;
2526
import org.apache.hadoop.hbase.util.BloomFilter;
@@ -120,7 +121,8 @@ private boolean containsInternal(byte[] key, int keyOffset, int keyLength, ByteB
120121
return result;
121122
}
122123

123-
private HFileBlock getBloomBlock(int block) {
124+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
125+
public HFileBlock getBloomBlock(int block) {
124126
HFileBlock bloomBlock;
125127
try {
126128
// We cache the block and use a positional read.
@@ -218,4 +220,18 @@ public String toString() {
218220
return sb.toString();
219221
}
220222

223+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
224+
public HFileBlockIndex.BlockIndexReader getBloomIndex() {
225+
return index;
226+
}
227+
228+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
229+
public int getHashCount() {
230+
return hashCount;
231+
}
232+
233+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
234+
public Hash getHash() {
235+
return hash;
236+
}
221237
}

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@
2626
import java.nio.ByteBuffer;
2727
import org.apache.hadoop.fs.FSDataInputStream;
2828
import org.apache.hadoop.hbase.CellComparator;
29+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
2930
import org.apache.hadoop.hbase.InnerStoreCellComparator;
3031
import org.apache.hadoop.hbase.MetaCellComparator;
3132
import org.apache.hadoop.hbase.io.compress.Compression;
33+
import org.apache.hadoop.hbase.monitoring.ThreadLocalServerSideScanMetrics;
3234
import org.apache.hadoop.hbase.util.Bytes;
3335
import org.apache.yetus.audience.InterfaceAudience;
3436
import org.slf4j.Logger;
@@ -406,6 +408,11 @@ public static FixedFileTrailer readFromStream(FSDataInputStream istream, long fi
406408
FixedFileTrailer fft = new FixedFileTrailer(majorVersion, minorVersion);
407409
fft.deserialize(new DataInputStream(new ByteArrayInputStream(buf.array(),
408410
buf.arrayOffset() + bufferSize - trailerSize, trailerSize)));
411+
boolean isScanMetricsEnabled = ThreadLocalServerSideScanMetrics.isScanMetricsEnabled();
412+
if (isScanMetricsEnabled) {
413+
ThreadLocalServerSideScanMetrics.addBytesReadFromFs(trailerSize);
414+
ThreadLocalServerSideScanMetrics.addBlockReadOpsCount(1);
415+
}
409416
return fft;
410417
}
411418

@@ -612,7 +619,8 @@ static CellComparator createComparator(String comparatorClassName) throws IOExce
612619
}
613620
}
614621

615-
CellComparator createComparator() throws IOException {
622+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
623+
public CellComparator createComparator() throws IOException {
616624
expectAtLeastMajorVersion(2);
617625
return createComparator(comparatorClassName);
618626
}

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.hadoop.fs.FSDataInputStream;
4242
import org.apache.hadoop.fs.FSDataOutputStream;
4343
import org.apache.hadoop.hbase.ExtendedCell;
44+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
4445
import org.apache.hadoop.hbase.HConstants;
4546
import org.apache.hadoop.hbase.fs.HFileSystem;
4647
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
@@ -56,6 +57,7 @@
5657
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
5758
import org.apache.hadoop.hbase.io.hfile.trace.HFileContextAttributesBuilderConsumer;
5859
import org.apache.hadoop.hbase.io.util.BlockIOUtils;
60+
import org.apache.hadoop.hbase.monitoring.ThreadLocalServerSideScanMetrics;
5961
import org.apache.hadoop.hbase.nio.ByteBuff;
6062
import org.apache.hadoop.hbase.nio.MultiByteBuff;
6163
import org.apache.hadoop.hbase.nio.SingleByteBuff;
@@ -405,7 +407,8 @@ private static int getOnDiskSizeWithHeader(final ByteBuff headerBuf, boolean che
405407
* present) read by peeking into the next block's header; use as a hint when doing a read
406408
* of the next block when scanning or running over a file.
407409
*/
408-
int getNextBlockOnDiskSize() {
410+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
411+
public int getNextBlockOnDiskSize() {
409412
return nextBlockOnDiskSize;
410413
}
411414

@@ -626,7 +629,8 @@ public String toString() {
626629
* Retrieves the decompressed/decrypted view of this block. An encoded block remains in its
627630
* encoded structure. Internal structures are shared between instances where applicable.
628631
*/
629-
HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
632+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
633+
public HFileBlock unpack(HFileContext fileContext, FSReader reader) throws IOException {
630634
if (!fileContext.isCompressedOrEncrypted()) {
631635
// TODO: cannot use our own fileContext here because HFileBlock(ByteBuffer, boolean),
632636
// which is used for block serialization to L2 cache, does not preserve encoding and
@@ -1241,7 +1245,8 @@ interface BlockWritable {
12411245
* Iterator for reading {@link HFileBlock}s in load-on-open-section, such as root data index
12421246
* block, meta index block, file info block etc.
12431247
*/
1244-
interface BlockIterator {
1248+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
1249+
public interface BlockIterator {
12451250
/**
12461251
* Get the next block, or null if there are no more blocks to iterate.
12471252
*/
@@ -1265,7 +1270,8 @@ interface BlockIterator {
12651270
}
12661271

12671272
/** An HFile block reader with iteration ability. */
1268-
interface FSReader {
1273+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
1274+
public interface FSReader {
12691275
/**
12701276
* Reads the block at the given offset in the file with the given on-disk size and uncompressed
12711277
* size.
@@ -1738,6 +1744,7 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
17381744
// checksums. Can change with circumstances. The below flag is whether the
17391745
// file has support for checksums (version 2+).
17401746
boolean checksumSupport = this.fileContext.isUseHBaseChecksum();
1747+
boolean isScanMetricsEnabled = ThreadLocalServerSideScanMetrics.isScanMetricsEnabled();
17411748
long startTime = EnvironmentEdgeManager.currentTime();
17421749
if (onDiskSizeWithHeader == -1) {
17431750
// The caller does not know the block size. Need to get it from the header. If header was
@@ -1754,6 +1761,9 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
17541761
headerBuf = HEAP.allocate(hdrSize);
17551762
readAtOffset(is, headerBuf, hdrSize, false, offset, pread);
17561763
headerBuf.rewind();
1764+
if (isScanMetricsEnabled) {
1765+
ThreadLocalServerSideScanMetrics.addBytesReadFromFs(hdrSize);
1766+
}
17571767
}
17581768
onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
17591769
}
@@ -1801,6 +1811,12 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
18011811
boolean readNextHeader = readAtOffset(is, onDiskBlock,
18021812
onDiskSizeWithHeader - preReadHeaderSize, true, offset + preReadHeaderSize, pread);
18031813
onDiskBlock.rewind(); // in case of moving position when copying a cached header
1814+
if (isScanMetricsEnabled) {
1815+
long bytesRead =
1816+
(onDiskSizeWithHeader - preReadHeaderSize) + (readNextHeader ? hdrSize : 0);
1817+
ThreadLocalServerSideScanMetrics.addBytesReadFromFs(bytesRead);
1818+
ThreadLocalServerSideScanMetrics.addBlockReadOpsCount(1);
1819+
}
18041820

18051821
// the call to validateChecksum for this block excludes the next block header over-read, so
18061822
// no reason to delay extracting this value.

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.hadoop.hbase.CellComparator;
3636
import org.apache.hadoop.hbase.CellUtil;
3737
import org.apache.hadoop.hbase.ExtendedCell;
38+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3839
import org.apache.hadoop.hbase.KeyValue;
3940
import org.apache.hadoop.hbase.KeyValue.KeyOnlyKeyValue;
4041
import org.apache.hadoop.hbase.PrivateCellUtil;
@@ -564,7 +565,8 @@ public String toString() {
564565
* array of offsets to the entries within the block. This allows us to do binary search for the
565566
* entry corresponding to the given key without having to deserialize the block.
566567
*/
567-
static abstract class BlockIndexReader implements HeapSize {
568+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
569+
public static abstract class BlockIndexReader implements HeapSize {
568570

569571
protected long[] blockOffsets;
570572
protected int[] blockDataSizes;
@@ -812,7 +814,8 @@ static int binarySearchNonRootIndex(Cell key, ByteBuff nonRootIndex,
812814
* @return the index position where the given key was found, otherwise return -1 in the case the
813815
* given key is before the first key.
814816
*/
815-
static int locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key, CellComparator comparator) {
817+
public static int locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key,
818+
CellComparator comparator) {
816819
int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator);
817820

818821
if (entryIndex != -1) {

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java

Lines changed: 76 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.hadoop.hbase.CellComparator;
3636
import org.apache.hadoop.hbase.CellUtil;
3737
import org.apache.hadoop.hbase.ExtendedCell;
38+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3839
import org.apache.hadoop.hbase.HConstants;
3940
import org.apache.hadoop.hbase.KeyValue;
4041
import org.apache.hadoop.hbase.PrivateCellUtil;
@@ -46,6 +47,7 @@
4647
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
4748
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
4849
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
50+
import org.apache.hadoop.hbase.monitoring.ThreadLocalServerSideScanMetrics;
4951
import org.apache.hadoop.hbase.nio.ByteBuff;
5052
import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
5153
import org.apache.hadoop.hbase.util.ByteBufferUtils;
@@ -1097,71 +1099,91 @@ public void setConf(Configuration conf) {
10971099
* Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
10981100
* and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
10991101
*/
1100-
private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
1102+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.UNITTEST)
1103+
public HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
11011104
boolean updateCacheMetrics, BlockType expectedBlockType,
11021105
DataBlockEncoding expectedDataBlockEncoding) throws IOException {
11031106
// Check cache for block. If found return.
11041107
BlockCache cache = cacheConf.getBlockCache().orElse(null);
1108+
long cachedBlockBytesRead = 0;
11051109
if (cache != null) {
1106-
HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
1107-
updateCacheMetrics, expectedBlockType);
1108-
if (cachedBlock != null) {
1109-
if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1110-
HFileBlock compressedBlock = cachedBlock;
1111-
cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1112-
// In case of compressed block after unpacking we can release the compressed block
1113-
if (compressedBlock != cachedBlock) {
1114-
compressedBlock.release();
1110+
HFileBlock cachedBlock = null;
1111+
boolean isScanMetricsEnabled = ThreadLocalServerSideScanMetrics.isScanMetricsEnabled();
1112+
try {
1113+
cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics,
1114+
expectedBlockType);
1115+
if (cachedBlock != null) {
1116+
if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
1117+
HFileBlock compressedBlock = cachedBlock;
1118+
cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
1119+
// In case of compressed block after unpacking we can release the compressed block
1120+
if (compressedBlock != cachedBlock) {
1121+
compressedBlock.release();
1122+
}
1123+
}
1124+
try {
1125+
validateBlockType(cachedBlock, expectedBlockType);
1126+
} catch (IOException e) {
1127+
returnAndEvictBlock(cache, cacheKey, cachedBlock);
1128+
cachedBlock = null;
1129+
throw e;
11151130
}
1116-
}
1117-
try {
1118-
validateBlockType(cachedBlock, expectedBlockType);
1119-
} catch (IOException e) {
1120-
returnAndEvictBlock(cache, cacheKey, cachedBlock);
1121-
throw e;
1122-
}
11231131

1124-
if (expectedDataBlockEncoding == null) {
1125-
return cachedBlock;
1126-
}
1127-
DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1128-
// Block types other than data blocks always have
1129-
// DataBlockEncoding.NONE. To avoid false negative cache misses, only
1130-
// perform this check if cached block is a data block.
1131-
if (
1132-
cachedBlock.getBlockType().isData()
1133-
&& !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
1134-
) {
1135-
// This mismatch may happen if a Scanner, which is used for say a
1136-
// compaction, tries to read an encoded block from the block cache.
1137-
// The reverse might happen when an EncodedScanner tries to read
1138-
// un-encoded blocks which were cached earlier.
1139-
//
1140-
// Because returning a data block with an implicit BlockType mismatch
1141-
// will cause the requesting scanner to throw a disk read should be
1142-
// forced here. This will potentially cause a significant number of
1143-
// cache misses, so update so we should keep track of this as it might
1144-
// justify the work on a CompoundScanner.
1132+
if (expectedDataBlockEncoding == null) {
1133+
return cachedBlock;
1134+
}
1135+
DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding();
1136+
// Block types other than data blocks always have
1137+
// DataBlockEncoding.NONE. To avoid false negative cache misses, only
1138+
// perform this check if cached block is a data block.
11451139
if (
1146-
!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1147-
&& !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1140+
cachedBlock.getBlockType().isData()
1141+
&& !actualDataBlockEncoding.equals(expectedDataBlockEncoding)
11481142
) {
1149-
// If the block is encoded but the encoding does not match the
1150-
// expected encoding it is likely the encoding was changed but the
1151-
// block was not yet evicted. Evictions on file close happen async
1152-
// so blocks with the old encoding still linger in cache for some
1153-
// period of time. This event should be rare as it only happens on
1154-
// schema definition change.
1155-
LOG.info(
1156-
"Evicting cached block with key {} because data block encoding mismatch; "
1157-
+ "expected {}, actual {}, path={}",
1158-
cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1159-
// This is an error scenario. so here we need to release the block.
1160-
returnAndEvictBlock(cache, cacheKey, cachedBlock);
1143+
// This mismatch may happen if a Scanner, which is used for say a
1144+
// compaction, tries to read an encoded block from the block cache.
1145+
// The reverse might happen when an EncodedScanner tries to read
1146+
// un-encoded blocks which were cached earlier.
1147+
//
1148+
// Because returning a data block with an implicit BlockType mismatch
1149+
// will cause the requesting scanner to throw a disk read should be
1150+
// forced here. This will potentially cause a significant number of
1151+
// cache misses, so update so we should keep track of this as it might
1152+
// justify the work on a CompoundScanner.
1153+
if (
1154+
!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE)
1155+
&& !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)
1156+
) {
1157+
// If the block is encoded but the encoding does not match the
1158+
// expected encoding it is likely the encoding was changed but the
1159+
// block was not yet evicted. Evictions on file close happen async
1160+
// so blocks with the old encoding still linger in cache for some
1161+
// period of time. This event should be rare as it only happens on
1162+
// schema definition change.
1163+
LOG.info(
1164+
"Evicting cached block with key {} because data block encoding mismatch; "
1165+
+ "expected {}, actual {}, path={}",
1166+
cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path);
1167+
// This is an error scenario. so here we need to release the block.
1168+
returnAndEvictBlock(cache, cacheKey, cachedBlock);
1169+
}
1170+
cachedBlock = null;
1171+
return null;
11611172
}
1162-
return null;
1173+
return cachedBlock;
1174+
}
1175+
} finally {
1176+
// Count bytes read as cached block is being returned
1177+
if (isScanMetricsEnabled && cachedBlock != null) {
1178+
cachedBlockBytesRead = cachedBlock.getOnDiskSizeWithHeader();
1179+
// Account for the header size of the next block if it exists
1180+
if (cachedBlock.getNextBlockOnDiskSize() > 0) {
1181+
cachedBlockBytesRead += cachedBlock.headerSize();
1182+
}
1183+
}
1184+
if (cachedBlockBytesRead > 0) {
1185+
ThreadLocalServerSideScanMetrics.addBytesReadFromBlockCache(cachedBlockBytesRead);
11631186
}
1164-
return cachedBlock;
11651187
}
11661188
}
11671189
return null;

0 commit comments

Comments
 (0)