Skip to content

Commit 70cb9b0

Browse files
authored
HBASE-26567 Remove IndexType from ChunkCreator (#3947)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent 39a8e3b commit 70cb9b0

File tree

5 files changed

+114
-88
lines changed

5 files changed

+114
-88
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java

Lines changed: 44 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.concurrent.atomic.AtomicInteger;
2929
import java.util.concurrent.atomic.AtomicLong;
3030
import java.util.concurrent.atomic.LongAdder;
31+
3132
import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.HeapMemoryTuneObserver;
3233
import org.apache.hadoop.hbase.util.Bytes;
3334
import org.apache.hadoop.util.StringUtils;
@@ -134,51 +135,36 @@ public static ChunkCreator getInstance() {
134135
return instance;
135136
}
136137

137-
/**
138-
* Creates and inits a chunk. The default implementation for a specific chunk size.
139-
* @return the chunk that was initialized
140-
*/
141-
Chunk getChunk(ChunkType chunkType) {
142-
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, chunkType);
143-
}
144138

145139
/**
146-
* Creates and inits a chunk. The default implementation.
140+
* Creates and inits a data chunk. The default implementation.
147141
* @return the chunk that was initialized
148142
*/
149143
Chunk getChunk() {
150-
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP, ChunkType.DATA_CHUNK);
144+
return getChunk(ChunkType.DATA_CHUNK);
151145
}
152146

153147
/**
154-
* Creates and inits a chunk. The default implementation for a specific index type.
148+
* Creates and inits a chunk with specific type.
155149
* @return the chunk that was initialized
156150
*/
157-
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
158-
return getChunk(chunkIndexType, ChunkType.DATA_CHUNK);
159-
}
160-
161-
/**
162-
* Creates and inits a chunk with specific index type and type.
163-
* @return the chunk that was initialized
164-
*/
165-
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType) {
151+
Chunk getChunk(ChunkType chunkType) {
166152
switch (chunkType) {
167153
case INDEX_CHUNK:
168154
if (indexChunksPool == null) {
169155
if (indexChunkSize <= 0) {
170156
throw new IllegalArgumentException(
171157
"chunkType is INDEX_CHUNK but indexChunkSize is:[" + this.indexChunkSize + "]");
172158
}
173-
return getChunk(chunkIndexType, chunkType, indexChunkSize);
159+
return getChunk(chunkType, indexChunkSize);
174160
} else {
175-
return getChunk(chunkIndexType, chunkType, indexChunksPool.getChunkSize());
161+
return getChunk(chunkType, indexChunksPool.getChunkSize());
176162
}
177163
case DATA_CHUNK:
178164
if (dataChunksPool == null) {
179-
return getChunk(chunkIndexType, chunkType, chunkSize);
165+
return getChunk(chunkType, chunkSize);
180166
} else {
181-
return getChunk(chunkIndexType, chunkType, dataChunksPool.getChunkSize());
167+
return getChunk(chunkType, dataChunksPool.getChunkSize());
182168
}
183169
default:
184170
throw new IllegalArgumentException(
@@ -189,10 +175,9 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType)
189175
/**
190176
* Creates and inits a chunk.
191177
* @return the chunk that was initialized
192-
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
193178
* @param size the size of the chunk to be allocated, in bytes
194179
*/
195-
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType, int size) {
180+
Chunk getChunk(ChunkType chunkType, int size) {
196181
Chunk chunk = null;
197182
MemStoreChunkPool pool = null;
198183

@@ -217,9 +202,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
217202
}
218203

219204
if (chunk == null) {
220-
// the second parameter explains whether CellChunkMap index is requested,
221-
// in that case, put allocated on demand chunk mapping into chunkIdMap
222-
chunk = createChunk(false, chunkIndexType, chunkType, size);
205+
chunk = createChunk(false, chunkType, size);
223206
}
224207

225208
// now we need to actually do the expensive memory allocation step in case of a new chunk,
@@ -240,22 +223,21 @@ Chunk getJumboChunk(int jumboSize) {
240223
if (allocSize <= this.getChunkSize(ChunkType.DATA_CHUNK)) {
241224
LOG.warn("Jumbo chunk size " + jumboSize + " must be more than regular chunk size "
242225
+ this.getChunkSize(ChunkType.DATA_CHUNK) + ". Converting to regular chunk.");
243-
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
226+
return getChunk();
244227
}
245228
// the new chunk is going to hold the jumbo cell data and needs to be referenced by
246-
// a strong map. Therefore the CCM index type
247-
return getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK, allocSize);
229+
// a strong map.
230+
return getChunk(ChunkType.JUMBO_CHUNK, allocSize);
248231
}
249232

250233
/**
251234
* Creates the chunk either onheap or offheap
252235
* @param pool indicates if the chunks have to be created which will be used by the Pool
253-
* @param chunkIndexType whether the requested chunk is going to be used with CellChunkMap index
236+
* @param chunkType whether the requested chunk is data chunk or index chunk.
254237
* @param size the size of the chunk to be allocated, in bytes
255238
* @return the chunk
256239
*/
257-
private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexType,
258-
ChunkType chunkType, int size) {
240+
private Chunk createChunk(boolean pool, ChunkType chunkType, int size) {
259241
Chunk chunk = null;
260242
int id = chunkID.getAndIncrement();
261243
assert id > 0;
@@ -265,22 +247,39 @@ private Chunk createChunk(boolean pool, CompactingMemStore.IndexType chunkIndexT
265247
} else {
266248
chunk = new OnheapChunk(size, id, chunkType, pool);
267249
}
268-
if (pool || (chunkIndexType == CompactingMemStore.IndexType.CHUNK_MAP)) {
269-
// put the pool chunk into the chunkIdMap so it is not GC-ed
270-
this.chunkIdMap.put(chunk.getId(), chunk);
271-
}
250+
251+
/**
252+
* Here we always put the chunk into the {@link ChunkCreator#chunkIdMap} no matter whether the
253+
* chunk is pooled or not. <br/>
254+
* For {@link CompactingMemStore},because the chunk could only be acquired from
255+
* {@link ChunkCreator} through {@link MemStoreLABImpl}, and
256+
* {@link CompactingMemStore#indexType} could only be {@link IndexType.CHUNK_MAP} when using
257+
* {@link MemStoreLABImpl}, so we must put chunk into this {@link ChunkCreator#chunkIdMap} to
258+
* make sure the chunk could be got by chunkId.
259+
* <p>
260+
* For {@link DefaultMemStore},it is also reasonable to put the chunk in
261+
* {@link ChunkCreator#chunkIdMap} because: <br/>
262+
* 1.When the {@link MemStoreLAB} which created the chunk is not closed, this chunk is used by
263+
* the {@link Segment} which references this {@link MemStoreLAB}, so this chunk certainly should
264+
* not be GC-ed, putting the chunk in {@link ChunkCreator#chunkIdMap} does not prevent useless
265+
* chunk to be GC-ed. <br/>
266+
* 2.When the {@link MemStoreLAB} which created the chunk is closed, and if the chunk is not
267+
* pooled, {@link ChunkCreator#removeChunk} is invoked to remove the chunk from this
268+
* {@link ChunkCreator#chunkIdMap}, so there is no memory leak.
269+
*/
270+
this.chunkIdMap.put(chunk.getId(), chunk);
271+
272272
return chunk;
273273
}
274274

275-
// Chunks from pool are created covered with strong references anyway
276-
// TODO: change to CHUNK_MAP if it is generally defined
277-
private Chunk createChunkForPool(CompactingMemStore.IndexType chunkIndexType, ChunkType chunkType,
275+
// Chunks from pool are created covered with strong references anyway.
276+
private Chunk createChunkForPool(ChunkType chunkType,
278277
int chunkSize) {
279278
if (chunkSize != dataChunksPool.getChunkSize() &&
280279
chunkSize != indexChunksPool.getChunkSize()) {
281280
return null;
282281
}
283-
return createChunk(true, chunkIndexType, chunkType, chunkSize);
282+
return createChunk(true, chunkType, chunkSize);
284283
}
285284

286285
// Used to translate the ChunkID into a chunk ref
@@ -346,7 +345,7 @@ private class MemStoreChunkPool implements HeapMemoryTuneObserver {
346345
this.reclaimedChunks = new LinkedBlockingQueue<>();
347346
for (int i = 0; i < initialCount; i++) {
348347
Chunk chunk =
349-
createChunk(true, CompactingMemStore.IndexType.ARRAY_MAP, chunkType, chunkSize);
348+
createChunk(true, chunkType, chunkSize);
350349
chunk.init();
351350
reclaimedChunks.add(chunk);
352351
}
@@ -368,10 +367,6 @@ private class MemStoreChunkPool implements HeapMemoryTuneObserver {
368367
* @see #putbackChunks(Chunk)
369368
*/
370369
Chunk getChunk() {
371-
return getChunk(CompactingMemStore.IndexType.ARRAY_MAP);
372-
}
373-
374-
Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
375370
Chunk chunk = reclaimedChunks.poll();
376371
if (chunk != null) {
377372
chunk.reset();
@@ -382,7 +377,7 @@ Chunk getChunk(CompactingMemStore.IndexType chunkIndexType) {
382377
long created = this.chunkCount.get();
383378
if (created < this.maxCount) {
384379
if (this.chunkCount.compareAndSet(created, created + 1)) {
385-
chunk = createChunkForPool(chunkIndexType, chunkType, chunkSize);
380+
chunk = createChunkForPool(chunkType, chunkSize);
386381
break;
387382
}
388383
} else {
@@ -559,7 +554,7 @@ int getPoolSize(ChunkType chunkType) {
559554

560555
boolean isChunkInPool(int chunkId) {
561556
Chunk c = getChunk(chunkId);
562-
if (c==null) {
557+
if (c == null) {
563558
return false;
564559
}
565560

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.apache.hadoop.hbase.ExtendedCell;
3434
import org.apache.hadoop.hbase.KeyValueUtil;
3535
import org.apache.hadoop.hbase.nio.RefCnt;
36+
import org.apache.hadoop.hbase.regionserver.CompactingMemStore.IndexType;
3637
import org.apache.yetus.audience.InterfaceAudience;
3738
import org.slf4j.Logger;
3839
import org.slf4j.LoggerFactory;
@@ -42,27 +43,28 @@
4243
/**
4344
* A memstore-local allocation buffer.
4445
* <p>
45-
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates
46-
* big (2MB) byte[] chunks from and then doles it out to threads that request
47-
* slices into the array.
46+
* The MemStoreLAB is basically a bump-the-pointer allocator that allocates big (2MB) byte[] chunks
47+
* from and then doles it out to threads that request slices into the array.
4848
* <p>
49-
* The purpose of this class is to combat heap fragmentation in the
50-
* regionserver. By ensuring that all Cells in a given memstore refer
51-
* only to large chunks of contiguous memory, we ensure that large blocks
52-
* get freed up when the memstore is flushed.
49+
* The purpose of this class is to combat heap fragmentation in the regionserver. By ensuring that
50+
* all Cells in a given memstore refer only to large chunks of contiguous memory, we ensure that
51+
* large blocks get freed up when the memstore is flushed.
5352
* <p>
54-
* Without the MSLAB, the byte array allocated during insertion end up
55-
* interleaved throughout the heap, and the old generation gets progressively
56-
* more fragmented until a stop-the-world compacting collection occurs.
53+
* Without the MSLAB, the byte array allocated during insertion end up interleaved throughout the
54+
* heap, and the old generation gets progressively more fragmented until a stop-the-world compacting
55+
* collection occurs.
5756
* <p>
58-
* TODO: we should probably benchmark whether word-aligning the allocations
59-
* would provide a performance improvement - probably would speed up the
60-
* Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
61-
* anyway.
62-
* The chunks created by this MemStoreLAB can get pooled at {@link ChunkCreator}.
63-
* When the Chunk comes from pool, it can be either an on heap or an off heap backed chunk. The chunks,
64-
* which this MemStoreLAB creates on its own (when no chunk available from pool), those will be
65-
* always on heap backed.
57+
* TODO: we should probably benchmark whether word-aligning the allocations would provide a
58+
* performance improvement - probably would speed up the Bytes.toLong/Bytes.toInt calls in KeyValue,
59+
* but some of those are cached anyway. The chunks created by this MemStoreLAB can get pooled at
60+
* {@link ChunkCreator}. When the Chunk comes from pool, it can be either an on heap or an off heap
61+
* backed chunk. The chunks, which this MemStoreLAB creates on its own (when no chunk available from
62+
* pool), those will be always on heap backed.
63+
* <p>
64+
* NOTE:if user requested to work with MSLABs (whether on- or off-heap), in
65+
* {@link CompactingMemStore} ctor, the {@link CompactingMemStore#indexType} could only be
66+
* {@link IndexType#CHUNK_MAP},that is to say the immutable segments using MSLABs are going to use
67+
* {@link CellChunkMap} as their index.
6668
*/
6769
@InterfaceAudience.Private
6870
public class MemStoreLABImpl implements MemStoreLAB {
@@ -78,7 +80,6 @@ public class MemStoreLABImpl implements MemStoreLAB {
7880
private final int dataChunkSize;
7981
private final int maxAlloc;
8082
private final ChunkCreator chunkCreator;
81-
private final CompactingMemStore.IndexType idxType; // what index is used for corresponding segment
8283

8384
// This flag is for closing this instance, its set when clearing snapshot of
8485
// memstore
@@ -104,13 +105,11 @@ public MemStoreLABImpl(Configuration conf) {
104105
// if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
105106
Preconditions.checkArgument(maxAlloc <= dataChunkSize,
106107
MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
108+
107109
this.refCnt = RefCnt.create(() -> {
108110
recycleChunks();
109111
});
110112

111-
// if user requested to work with MSLABs (whether on- or off-heap), then the
112-
// immutable segments are going to use CellChunkMap as their index
113-
idxType = CompactingMemStore.IndexType.CHUNK_MAP;
114113
}
115114

116115
@Override
@@ -339,7 +338,7 @@ private Chunk getOrMakeChunk() {
339338
if (c != null) {
340339
return c;
341340
}
342-
c = this.chunkCreator.getChunk(idxType);
341+
c = this.chunkCreator.getChunk();
343342
if (c != null) {
344343
// set the curChunk. No need of CAS as only one thread will be here
345344
currChunk.set(c);

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,8 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
282282

283283
// allocate new chunks and use the data chunk to hold the full data of the cells
284284
// and the index chunk to hold the cell-representations
285-
Chunk dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
286-
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
285+
Chunk dataChunk = chunkCreator.getChunk();
286+
Chunk idxChunk = chunkCreator.getChunk();
287287
// the array of index chunks to be used as a basis for CellChunkMap
288288
Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough
289289
int chunkArrayIdx = 0;
@@ -300,7 +300,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
300300
// do we have enough space to write the cell data on the data chunk?
301301
if (dataOffset + kv.getSerializedSize() > chunkCreator.getChunkSize()) {
302302
// allocate more data chunks if needed
303-
dataChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
303+
dataChunk = chunkCreator.getChunk();
304304
dataBuffer = dataChunk.getData();
305305
dataOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
306306
}
@@ -310,7 +310,7 @@ private CellChunkMap setUpCellChunkMap(boolean asc) {
310310
// do we have enough space to write the cell-representation on the index chunk?
311311
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
312312
// allocate more index chunks if needed
313-
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
313+
idxChunk = chunkCreator.getChunk();
314314
idxBuffer = idxChunk.getData();
315315
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
316316
chunkArray[chunkArrayIdx++] = idxChunk;
@@ -331,10 +331,10 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
331331
// allocate new chunks and use the data JUMBO chunk to hold the full data of the cells
332332
// and the normal index chunk to hold the cell-representations
333333
Chunk dataJumboChunk =
334-
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
334+
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
335335
smallChunkSize);
336336
assertTrue(dataJumboChunk.isJumbo());
337-
Chunk idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
337+
Chunk idxChunk = chunkCreator.getChunk();
338338
// the array of index chunks to be used as a basis for CellChunkMap
339339
Chunk[] chunkArray = new Chunk[8]; // according to test currently written 8 is way enough
340340
int chunkArrayIdx = 0;
@@ -354,7 +354,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
354354
// do we have enough space to write the cell-representation on the index chunk?
355355
if (idxOffset + ClassSize.CELL_CHUNK_MAP_ENTRY > chunkCreator.getChunkSize()) {
356356
// allocate more index chunks if needed
357-
idxChunk = chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP);
357+
idxChunk = chunkCreator.getChunk();
358358
idxBuffer = idxChunk.getData();
359359
idxOffset = ChunkCreator.SIZEOF_CHUNK_HEADER;
360360
chunkArray[chunkArrayIdx++] = idxChunk;
@@ -368,7 +368,7 @@ private CellChunkMap setUpJumboCellChunkMap(boolean asc) {
368368
// Jumbo chunks are working only with one cell per chunk, thus always allocate a new jumbo
369369
// data chunk for next cell
370370
dataJumboChunk =
371-
chunkCreator.getChunk(CompactingMemStore.IndexType.CHUNK_MAP, ChunkType.JUMBO_CHUNK,
371+
chunkCreator.getChunk(ChunkType.JUMBO_CHUNK,
372372
smallChunkSize);
373373
assertTrue(dataJumboChunk.isJumbo());
374374
dataBuffer = dataJumboChunk.getData();

0 commit comments

Comments
 (0)