Skip to content

Commit 4e60a26

Browse files
committed
WIP HBASE-28065 Corrupt HFile data is mishandled in several cases
* when no block size is provided and there's not a preread headerBuf, treat the value with caution. * verify HBase checksums before making use of the block header. * inline verifyOnDiskSizeMatchesHeader to keep throw/return logic in the method body. * whenever a read is determined to be corrupt and fallback to HDFS checksum is necessary, also invalidate the cached value of headerBuf.
1 parent c1e5a55 commit 4e60a26

File tree

2 files changed

+98
-32
lines changed

2 files changed

+98
-32
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java

Lines changed: 97 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,12 @@ static HFileBlock createFromBuff(ByteBuff buf, boolean usesHBaseChecksum, final
390390

391391
/**
392392
* Parse total on disk size including header and checksum.
393-
* @param headerBuf Header ByteBuffer. Presumed exact size of header.
394-
* @param verifyChecksum true if checksum verification is in use.
393+
* @param headerBuf Header ByteBuffer. Presumed exact size of header.
394+
* @param checksumSupport true if checksum verification is in use.
395395
* @return Size of the block with header included.
396396
*/
397-
private static int getOnDiskSizeWithHeader(final ByteBuff headerBuf, boolean verifyChecksum) {
398-
return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(verifyChecksum);
397+
private static int getOnDiskSizeWithHeader(final ByteBuff headerBuf, boolean checksumSupport) {
398+
return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(checksumSupport);
399399
}
400400

401401
/**
@@ -1568,33 +1568,47 @@ public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL, boolean
15681568
}
15691569

15701570
/**
1571-
* Returns Check <code>onDiskSizeWithHeaderL</code> size is healthy and then return it as an int
1571+
* Check that {@code value} value seems reasonable, within a large margin of error.
1572+
* @return {@code true} if the value is safe to proceed, {@code false} otherwise.
15721573
*/
1573-
private static int checkAndGetSizeAsInt(final long onDiskSizeWithHeaderL, final int hdrSize)
1574-
throws IOException {
1575-
if (
1576-
(onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1)
1577-
|| onDiskSizeWithHeaderL >= Integer.MAX_VALUE
1578-
) {
1579-
throw new IOException(
1580-
"Invalid onDisksize=" + onDiskSizeWithHeaderL + ": expected to be at least " + hdrSize
1581-
+ " and at most " + Integer.MAX_VALUE + ", or -1");
1574+
private boolean checkOnDiskSizeWithHeader(int value) {
1575+
if (value == -1) {
1576+
// a magic value we expect to see.
1577+
return true;
1578+
}
1579+
if (value < 0) {
1580+
if (LOG.isTraceEnabled()) {
1581+
LOG.trace(
1582+
"onDiskSizeWithHeader={}; value represents a size, so it should never be negative.",
1583+
value);
1584+
}
1585+
return false;
1586+
}
1587+
if (value - hdrSize < 0) {
1588+
if (LOG.isTraceEnabled()) {
1589+
LOG.trace(
1590+
"onDiskSizeWithHeader={}, hdrSize={}; don't accept a value that is negative after the header size is excluded.",
1591+
value, hdrSize);
1592+
}
1593+
return false;
15821594
}
1583-
return (int) onDiskSizeWithHeaderL;
1595+
return true;
15841596
}
15851597

15861598
/**
1587-
* Verify the passed in onDiskSizeWithHeader aligns with what is in the header else something is
1588-
* not right.
1599+
* Check that {@code value} value seems reasonable, within a large margin of error.
1600+
* @return {@code true} if the value is safe to proceed, {@code false} otherwise.
15891601
*/
1590-
private void verifyOnDiskSizeMatchesHeader(final int passedIn, final ByteBuff headerBuf,
1591-
final long offset, boolean verifyChecksum) throws IOException {
1592-
// Assert size provided aligns with what is in the header
1593-
int fromHeader = getOnDiskSizeWithHeader(headerBuf, verifyChecksum);
1594-
if (passedIn != fromHeader) {
1595-
throw new IOException("Passed in onDiskSizeWithHeader=" + passedIn + " != " + fromHeader
1596-
+ ", offset=" + offset + ", fileContext=" + this.fileContext);
1602+
private boolean checkOnDiskSizeWithHeader(long value) {
1603+
// same validation logic as is used by Math.toIntExact(long)
1604+
int intValue = (int) value;
1605+
if (intValue != value) {
1606+
if (LOG.isTraceEnabled()) {
1607+
LOG.trace("onDiskSizeWithHeader={}; value exceeds int size limits.", value);
1608+
}
1609+
return false;
15971610
}
1611+
return checkOnDiskSizeWithHeader(intValue);
15981612
}
15991613

16001614
/**
@@ -1625,6 +1639,13 @@ private void cacheNextBlockHeader(final long offset, ByteBuff onDiskBlock,
16251639
this.prefetchedHeader.set(ph);
16261640
}
16271641

1642+
/**
1643+
* Clear the cached value when its integrity is suspect.
1644+
*/
1645+
private void invalidateNextBlockHeader() {
1646+
prefetchedHeader.set(new PrefetchedHeader());
1647+
}
1648+
16281649
private int getNextBlockOnDiskSize(boolean readNextHeader, ByteBuff onDiskBlock,
16291650
int onDiskSizeWithHeader) {
16301651
int nextBlockOnDiskSize = -1;
@@ -1667,7 +1688,11 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
16671688
final AttributesBuilder attributesBuilder = Attributes.builder();
16681689
Optional.of(Context.current()).map(val -> val.get(CONTEXT_KEY))
16691690
.ifPresent(c -> c.accept(attributesBuilder));
1670-
int onDiskSizeWithHeader = checkAndGetSizeAsInt(onDiskSizeWithHeaderL, hdrSize);
1691+
if (!checkOnDiskSizeWithHeader(onDiskSizeWithHeaderL)) {
1692+
throw new IOException(
1693+
"Caller provided invalid onDiskSizeWithHeaderL=" + onDiskSizeWithHeaderL);
1694+
}
1695+
int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL;
16711696
// Try and get cached header. Will serve us in rare case where onDiskSizeWithHeaderL is -1
16721697
// and will save us having to seek the stream backwards to reread the header we
16731698
// read the last time through here.
@@ -1682,8 +1707,8 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
16821707
// file has support for checksums (version 2+).
16831708
boolean checksumSupport = this.fileContext.isUseHBaseChecksum();
16841709
long startTime = EnvironmentEdgeManager.currentTime();
1685-
if (onDiskSizeWithHeader <= 0) {
1686-
// We were not passed the block size. Need to get it from the header. If header was
1710+
if (onDiskSizeWithHeader == -1) {
1711+
// The caller does not know the block size. Need to get it from the header. If header was
16871712
// not cached (see getCachedHeader above), need to seek to pull it in. This is costly
16881713
// and should happen very rarely. Currently happens on open of a hfile reader where we
16891714
// read the trailer blocks to pull in the indices. Otherwise, we are reading block sizes
@@ -1697,8 +1722,27 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
16971722
headerBuf = HEAP.allocate(hdrSize);
16981723
readAtOffset(is, headerBuf, hdrSize, false, offset, pread);
16991724
headerBuf.rewind();
1725+
1726+
// The caller didn't provide an anticipated block size and headerBuf was null, this is
1727+
// probably the first time this HDFS block has been read. The value we just read has not
1728+
// had HBase checksum validation ; assume it was not protected by HDFS checksum either.
1729+
// Sanity check the value. If it doesn't seem right, either trigger fall-back to hdfs
1730+
// checksum or abort the read.
1731+
//
1732+
// TODO: Should we also check the value vs. some multiple of fileContext.getBlocksize() ?
1733+
onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
1734+
if (!checkOnDiskSizeWithHeader(onDiskSizeWithHeader)) {
1735+
if (verifyChecksum) {
1736+
invalidateNextBlockHeader();
1737+
span.addEvent("Falling back to HDFS checksumming.", attributesBuilder.build());
1738+
return null;
1739+
} else {
1740+
throw new IOException("Read invalid onDiskSizeWithHeader=" + onDiskSizeWithHeader);
1741+
}
1742+
}
1743+
} else {
1744+
onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
17001745
}
1701-
onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
17021746
}
17031747
int preReadHeaderSize = headerBuf == null ? 0 : hdrSize;
17041748
// Allocate enough space to fit the next block's header too; saves a seek next time through.
@@ -1721,14 +1765,36 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
17211765
if (headerBuf == null) {
17221766
headerBuf = onDiskBlock.duplicate().position(0).limit(hdrSize);
17231767
}
1724-
// Do a few checks before we go instantiate HFileBlock.
1725-
assert onDiskSizeWithHeader > this.hdrSize;
1726-
verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, checksumSupport);
1768+
17271769
ByteBuff curBlock = onDiskBlock.duplicate().position(0).limit(onDiskSizeWithHeader);
17281770
// Verify checksum of the data before using it for building HFileBlock.
17291771
if (verifyChecksum && !validateChecksum(offset, curBlock, hdrSize)) {
1772+
invalidateNextBlockHeader();
1773+
span.addEvent("Falling back to HDFS checksumming.", attributesBuilder.build());
17301774
return null;
17311775
}
1776+
1777+
// TODO: is this check necessary or can we proceed with a provided value regardless of
1778+
// what is in the header?
1779+
int fromHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
1780+
if (onDiskSizeWithHeader != fromHeader) {
1781+
if (LOG.isTraceEnabled()) {
1782+
LOG.trace("Passed in onDiskSizeWithHeader={} != {}, offset={}, fileContext={}",
1783+
onDiskSizeWithHeader, fromHeader, offset, this.fileContext);
1784+
}
1785+
if (checksumSupport && verifyChecksum) {
1786+
// This file supports HBase checksums and verification of those checksums was
1787+
// requested. The block size provided by the caller (presumably from the block index)
1788+
// does not match the block size written to the block header. treat this as
1789+
// HBase-checksum failure.
1790+
span.addEvent("Falling back to HDFS checksumming.", attributesBuilder.build());
1791+
invalidateNextBlockHeader();
1792+
return null;
1793+
}
1794+
throw new IOException("Passed in onDiskSizeWithHeader=" + onDiskSizeWithHeader + " != "
1795+
+ fromHeader + ", offset=" + offset + ", fileContext=" + this.fileContext);
1796+
}
1797+
17321798
// remove checksum from buffer now that it's verified
17331799
int sizeWithoutChecksum = curBlock.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
17341800
curBlock.limit(sizeWithoutChecksum);

hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public class TestChecksum {
6161
public static final HBaseClassTestRule CLASS_RULE =
6262
HBaseClassTestRule.forClass(TestChecksum.class);
6363

64-
private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlock.class);
64+
private static final Logger LOG = LoggerFactory.getLogger(TestChecksum.class);
6565

6666
static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, GZ };
6767

0 commit comments

Comments
 (0)