Skip to content

Commit 7b0ac64

Browse files
authored
HBASE-27464 In memory compaction 'COMPACT' may cause data corruption when adding cells large than maxAlloc(default 256k) size (#4881)
Co-authored-by: comnetwork <comnetwork@163.com> Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent a2075f5 commit 7b0ac64

File tree

2 files changed

+107
-2
lines changed

2 files changed

+107
-2
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkImmutableSegment.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,11 @@ private void initializeCellSet(int numOfCells, MemStoreSegmentsIterator iterator
159159
offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER;
160160
}
161161
if (action == MemStoreCompactionStrategy.Action.COMPACT && !alreadyCopied) {
162-
// for compaction copy cell to the new segment (MSLAB copy)
163-
c = maybeCloneWithAllocator(c, false);
162+
163+
// For compaction copy cell to the new segment (MSLAB copy),here we set forceCloneOfBigCell
164+
// to true, because the chunk which the cell is allocated may be freed after the compaction
165+
// is completed, see HBASE-27464.
166+
c = maybeCloneWithAllocator(c, true);
164167
}
165168
offsetInCurentChunk = // add the Cell reference to the index chunk
166169
createCellReference((ByteBufferKeyValue) c, chunks[currentChunkIdx].getData(),

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import org.apache.hadoop.hbase.CellComparator;
7171
import org.apache.hadoop.hbase.CellComparatorImpl;
7272
import org.apache.hadoop.hbase.CellUtil;
73+
import org.apache.hadoop.hbase.ExtendedCell;
7374
import org.apache.hadoop.hbase.HBaseClassTestRule;
7475
import org.apache.hadoop.hbase.HBaseConfiguration;
7576
import org.apache.hadoop.hbase.HBaseTestingUtil;
@@ -99,6 +100,7 @@
99100
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
100101
import org.apache.hadoop.hbase.nio.RefCnt;
101102
import org.apache.hadoop.hbase.quotas.RegionSizeStoreImpl;
103+
import org.apache.hadoop.hbase.regionserver.ChunkCreator.ChunkType;
102104
import org.apache.hadoop.hbase.regionserver.MemStoreCompactionStrategy.Action;
103105
import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
104106
import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor;
@@ -1882,6 +1884,106 @@ public void testCompactingMemStoreCellExceedInmemoryFlushSize() throws Exception
18821884
myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await();
18831885
}
18841886

1887+
/**
1888+
* This test is for HBASE-27464, before this JIRA,when init {@link CellChunkImmutableSegment} for
1889+
* 'COMPACT' action, we not force copy to current MSLab. When cell size bigger than
1890+
* {@link MemStoreLABImpl#maxAlloc}, cell will stay in previous chunk which will recycle after
1891+
* segment replace, and we may read wrong data when these chunk reused by others.
1892+
*/
1893+
@Test
1894+
public void testForceCloneOfBigCellForCellChunkImmutableSegment() throws Exception {
1895+
Configuration conf = HBaseConfiguration.create();
1896+
int maxAllocByteSize = conf.getInt(MemStoreLAB.MAX_ALLOC_KEY, MemStoreLAB.MAX_ALLOC_DEFAULT);
1897+
1898+
// Construct big cell,which is large than {@link MemStoreLABImpl#maxAlloc}.
1899+
byte[] cellValue = new byte[maxAllocByteSize + 1];
1900+
final long timestamp = EnvironmentEdgeManager.currentTime();
1901+
final long seqId = 100;
1902+
final byte[] rowKey1 = Bytes.toBytes("rowKey1");
1903+
final Cell originalCell1 = createCell(rowKey1, qf1, timestamp, seqId, cellValue);
1904+
final byte[] rowKey2 = Bytes.toBytes("rowKey2");
1905+
final Cell originalCell2 = createCell(rowKey2, qf1, timestamp, seqId, cellValue);
1906+
TreeSet<byte[]> quals = new TreeSet<>(Bytes.BYTES_COMPARATOR);
1907+
quals.add(qf1);
1908+
1909+
int cellByteSize = MutableSegment.getCellLength(originalCell1);
1910+
int inMemoryFlushByteSize = cellByteSize - 1;
1911+
1912+
// set CompactingMemStore.inmemoryFlushSize to flushByteSize.
1913+
conf.set(HStore.MEMSTORE_CLASS_NAME, MyCompactingMemStore6.class.getName());
1914+
conf.setDouble(CompactingMemStore.IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 0.005);
1915+
conf.set(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, String.valueOf(inMemoryFlushByteSize * 200));
1916+
conf.setBoolean(WALFactory.WAL_ENABLED, false);
1917+
1918+
// Use {@link MemoryCompactionPolicy#EAGER} for always compacting.
1919+
init(name.getMethodName(), conf, ColumnFamilyDescriptorBuilder.newBuilder(family)
1920+
.setInMemoryCompaction(MemoryCompactionPolicy.EAGER).build());
1921+
1922+
MyCompactingMemStore6 myCompactingMemStore = ((MyCompactingMemStore6) store.memstore);
1923+
assertTrue((int) (myCompactingMemStore.getInmemoryFlushSize()) == inMemoryFlushByteSize);
1924+
1925+
// Data chunk Pool is disabled.
1926+
assertTrue(ChunkCreator.getInstance().getMaxCount(ChunkType.DATA_CHUNK) == 0);
1927+
1928+
MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing();
1929+
1930+
// First compact
1931+
store.add(originalCell1, memStoreSizing);
1932+
// Waiting for the first in-memory compaction finished
1933+
myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await();
1934+
1935+
StoreScanner storeScanner =
1936+
(StoreScanner) store.getScanner(new Scan(new Get(rowKey1)), quals, seqId + 1);
1937+
SegmentScanner segmentScanner = getTypeKeyValueScanner(storeScanner, SegmentScanner.class);
1938+
Cell resultCell1 = segmentScanner.next();
1939+
assertTrue(CellUtil.equals(resultCell1, originalCell1));
1940+
int cell1ChunkId = ((ExtendedCell) resultCell1).getChunkId();
1941+
assertTrue(cell1ChunkId != ExtendedCell.CELL_NOT_BASED_ON_CHUNK);
1942+
assertNull(segmentScanner.next());
1943+
segmentScanner.close();
1944+
storeScanner.close();
1945+
Segment segment = segmentScanner.segment;
1946+
assertTrue(segment instanceof CellChunkImmutableSegment);
1947+
MemStoreLABImpl memStoreLAB1 = (MemStoreLABImpl) (segmentScanner.segment.getMemStoreLAB());
1948+
assertTrue(!memStoreLAB1.isClosed());
1949+
assertTrue(!memStoreLAB1.chunks.isEmpty());
1950+
assertTrue(!memStoreLAB1.isReclaimed());
1951+
1952+
// Second compact
1953+
store.add(originalCell2, memStoreSizing);
1954+
// Waiting for the second in-memory compaction finished
1955+
myCompactingMemStore.inMemoryCompactionEndCyclicBarrier.await();
1956+
1957+
// Before HBASE-27464, here may throw java.lang.IllegalArgumentException: In CellChunkMap, cell
1958+
// must be associated with chunk.. We were looking for a cell at index 0.
1959+
// The cause for this exception is because the data chunk Pool is disabled,when the data chunks
1960+
// are recycled after the second in-memory compaction finished,the
1961+
// {@link ChunkCreator.putbackChunks} method does not put the chunks back to the data chunk
1962+
// pool,it just removes them from {@link ChunkCreator#chunkIdMap},so in
1963+
// {@link CellChunkMap#getCell} we could not get the data chunk by chunkId.
1964+
storeScanner = (StoreScanner) store.getScanner(new Scan(new Get(rowKey1)), quals, seqId + 1);
1965+
segmentScanner = getTypeKeyValueScanner(storeScanner, SegmentScanner.class);
1966+
Cell newResultCell1 = segmentScanner.next();
1967+
assertTrue(newResultCell1 != resultCell1);
1968+
assertTrue(CellUtil.equals(newResultCell1, originalCell1));
1969+
1970+
Cell resultCell2 = segmentScanner.next();
1971+
assertTrue(CellUtil.equals(resultCell2, originalCell2));
1972+
assertNull(segmentScanner.next());
1973+
segmentScanner.close();
1974+
storeScanner.close();
1975+
1976+
segment = segmentScanner.segment;
1977+
assertTrue(segment instanceof CellChunkImmutableSegment);
1978+
MemStoreLABImpl memStoreLAB2 = (MemStoreLABImpl) (segmentScanner.segment.getMemStoreLAB());
1979+
assertTrue(!memStoreLAB2.isClosed());
1980+
assertTrue(!memStoreLAB2.chunks.isEmpty());
1981+
assertTrue(!memStoreLAB2.isReclaimed());
1982+
assertTrue(memStoreLAB1.isClosed());
1983+
assertTrue(memStoreLAB1.chunks.isEmpty());
1984+
assertTrue(memStoreLAB1.isReclaimed());
1985+
}
1986+
18851987
// This test is for HBASE-26210 also, test write large cell and small cell concurrently when
18861988
// InmemoryFlushSize is smaller,equal with and larger than cell size.
18871989
@Test

0 commit comments

Comments
 (0)