Skip to content

Commit 3a8c0dc

Browse files
committed
HBASE-28468: Integrate the data-tiering logic into cache evictions.
As a part of cache blocks eviction when the cache is full, the data-tiering logic is integrated into the freeSpace code path to identify the cold data files and evict the blocks associated with those files. The list of files is traversed to identify the cold files based on the hot-data-age configuration and also the max timestamp associated with the files. The blocks associated with those cold files are evicted first and then the existing logic of LFU blocks is executed to further evict the blocks. Change-Id: Id09780357ef07549be63b32ed724259729f55563
1 parent b7bb8b9 commit 3a8c0dc

File tree

3 files changed

+129
-2
lines changed

3 files changed

+129
-2
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.ArrayList;
2828
import java.util.Collections;
2929
import java.util.Comparator;
30+
import java.util.HashMap;
3031
import java.util.HashSet;
3132
import java.util.Iterator;
3233
import java.util.List;
@@ -76,6 +77,7 @@
7677
import org.apache.hadoop.hbase.nio.ByteBuff;
7778
import org.apache.hadoop.hbase.nio.RefCnt;
7879
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
80+
import org.apache.hadoop.hbase.regionserver.DataTieringManager;
7981
import org.apache.hadoop.hbase.util.Bytes;
8082
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
8183
import org.apache.hadoop.hbase.util.IdReadWriteLock;
@@ -935,6 +937,24 @@ void freeSpace(final String why) {
935937
}
936938
try {
937939
freeInProgress = true;
940+
// Check the list of files to determine the cold files which can be readily evicted.
941+
Map<String, String> coldFiles = null;
942+
try {
943+
DataTieringManager dataTieringManager = DataTieringManager.getInstance();
944+
coldFiles = dataTieringManager.getColdFilesList();
945+
} catch (IllegalStateException e) {
946+
LOG.warn("Data Tiering Manager is not set. Ignore time-based block evictions.");
947+
}
948+
949+
long bytesFreed = 0;
950+
for (Map.Entry<BlockCacheKey, BucketEntry> bucketEntryWithKey : backingMap.entrySet()) {
951+
if (coldFiles != null && coldFiles.containsKey(bucketEntryWithKey.getKey().getHfileName())) {
952+
int freedBlockSize = bucketEntryWithKey.getValue().getLength();
953+
evictBlockIfNoRpcReferenced(bucketEntryWithKey.getKey());
954+
bytesFreed += freedBlockSize;
955+
continue;
956+
}
957+
}
938958
long bytesToFreeWithoutExtra = 0;
939959
// Calculate free byte for each bucketSizeinfo
940960
StringBuilder msgBuffer = LOG.isDebugEnabled() ? new StringBuilder() : null;
@@ -1007,8 +1027,6 @@ void freeSpace(final String why) {
10071027
bucketQueue.add(bucketMemory);
10081028

10091029
int remainingBuckets = bucketQueue.size();
1010-
long bytesFreed = 0;
1011-
10121030
BucketEntryGroup bucketGroup;
10131031
while ((bucketGroup = bucketQueue.poll()) != null) {
10141032
long overflow = bucketGroup.overflow();

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,21 @@
1717
*/
1818
package org.apache.hadoop.hbase.regionserver;
1919

20+
import java.util.ArrayList;
21+
import java.util.HashMap;
2022
import java.util.HashSet;
23+
import java.util.Iterator;
24+
import java.util.List;
2125
import java.util.Map;
2226
import java.util.OptionalLong;
2327
import java.util.Set;
28+
import java.util.concurrent.ConcurrentHashMap;
2429
import org.apache.hadoop.conf.Configuration;
2530
import org.apache.hadoop.fs.Path;
2631
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
2732
import org.apache.hadoop.hbase.util.Bytes;
2833
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
34+
import org.apache.hadoop.hbase.util.Pair;
2935
import org.apache.yetus.audience.InterfaceAudience;
3036
import org.slf4j.Logger;
3137
import org.slf4j.LoggerFactory;
@@ -219,4 +225,41 @@ private long getDataTieringHotDataAge(Configuration conf) {
219225
return Long.parseLong(
220226
conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
221227
}
228+
229+
/*
230+
* This API traverses through the list of online regions and returns a
231+
* subset of these files-names that are cold.
232+
* @return List of names of files with cold data as per data-tiering logic.
233+
*/
234+
public Map<String, String> getColdFilesList() {
235+
Map<String, String> coldFiles = new HashMap<>();
236+
for (HRegion r : this.onlineRegions.values()) {
237+
for (HStore hStore : r.getStores()) {
238+
Configuration conf = hStore.getReadOnlyConfiguration();
239+
if (getDataTieringType(conf) != DataTieringType.TIME_RANGE) {
240+
// Data-Tiering not enabled for the store. Just skip it.
241+
continue;
242+
}
243+
Long hotDataAge = getDataTieringHotDataAge(conf);
244+
245+
for (HStoreFile hStoreFile : hStore.getStorefiles()) {
246+
String hFileName =
247+
hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
248+
OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
249+
if (!maxTimestamp.isPresent()) {
250+
LOG.warn("maxTimestamp missing for file: "
251+
+ hStoreFile.getFileInfo().getActiveFileName());
252+
continue;
253+
}
254+
long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
255+
long fileAge = currentTimestamp - maxTimestamp.getAsLong();
256+
if (fileAge > hotDataAge) {
257+
// Values do not matter.
258+
coldFiles.put(hFileName, null);
259+
}
260+
}
261+
}
262+
}
263+
return coldFiles;
264+
}
222265
}

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.regionserver;
1919

20+
import static org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.DEFAULT_ERROR_TOLERATION_DURATION;
2021
import static org.junit.Assert.assertEquals;
2122
import static org.junit.Assert.fail;
2223

@@ -46,7 +47,9 @@
4647
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
4748
import org.apache.hadoop.hbase.io.hfile.BlockType;
4849
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
50+
import org.apache.hadoop.hbase.io.hfile.CacheTestUtils;
4951
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
52+
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
5053
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
5154
import org.apache.hadoop.hbase.testclassification.SmallTests;
5255
import org.apache.hadoop.hbase.util.Bytes;
@@ -55,6 +58,8 @@
5558
import org.junit.ClassRule;
5659
import org.junit.Test;
5760
import org.junit.experimental.categories.Category;
61+
import org.slf4j.Logger;
62+
import org.slf4j.LoggerFactory;
5863

5964
/**
6065
* This class is used to test the functionality of the DataTieringManager.
@@ -82,6 +87,8 @@ public class TestDataTieringManager {
8287
public static final HBaseClassTestRule CLASS_RULE =
8388
HBaseClassTestRule.forClass(TestDataTieringManager.class);
8489

90+
private static final Logger LOG = LoggerFactory.getLogger(TestDataTieringManager.class);
91+
8592
private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
8693
private static Configuration defaultConf;
8794
private static FileSystem fs;
@@ -218,6 +225,65 @@ public void testColdDataFiles() {
218225
}
219226
}
220227

228+
@Test
229+
public void testPickColdDataFiles() {
230+
Map<String, String> coldDataFiles = dataTieringManager.getColdFilesList();
231+
assertEquals(1, coldDataFiles.size());
232+
// hStoreFiles[3] is the cold file.
233+
assert(coldDataFiles.containsKey(hStoreFiles.get(3).getFileInfo().getActiveFileName()));
234+
}
235+
236+
@Test
237+
public void testBlockEvictions() throws Exception {
238+
long capacitySize = 64 * 1024;
239+
int writeThreads = 3;
240+
int writerQLen = 64;
241+
int[] bucketSizes = new int[] { 8 * 1024 + 1024 };
242+
243+
// Setup: Create a bucket cache with lower capacity
244+
BucketCache bucketCache = new BucketCache("file:" + testDir + "/bucket.cache", capacitySize,
245+
8192, bucketSizes, writeThreads, writerQLen, testDir + "/bucket.persistence",
246+
DEFAULT_ERROR_TOLERATION_DURATION, defaultConf);
247+
248+
// Create three Cache keys with cold data files and a block with hot data.
249+
// hStoreFiles.get(3) is a cold data file, while hStoreFiles.get(0) is a hot file.
250+
List<BlockCacheKey> cacheKeys = new ArrayList<>();
251+
cacheKeys.add(new BlockCacheKey(hStoreFiles.get(3).getPath(), 0, true, BlockType.DATA));
252+
cacheKeys.add(new BlockCacheKey(hStoreFiles.get(3).getPath(), 8192, true, BlockType.DATA));
253+
cacheKeys.add(new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA));
254+
255+
// Create dummy data to be cached and fill the cache completely.
256+
CacheTestUtils.HFileBlockPair[] blocks = CacheTestUtils.generateHFileBlocks(8192, 3);
257+
258+
int blocksIter = 0;
259+
for(BlockCacheKey key: cacheKeys) {
260+
bucketCache.cacheBlock(key, blocks[blocksIter++].getBlock());
261+
// Ensure that the block is persisted to the file.
262+
while (!(bucketCache.getBackingMap().containsKey(key))) {
263+
Thread.sleep(100);
264+
}
265+
}
266+
267+
// Verify that the bucket cache contains 4 blocks.
268+
assertEquals(3, bucketCache.getBackingMap().keySet().size());
269+
270+
// Add an additional block into cache with hot data which should trigger the eviction
271+
BlockCacheKey newKey = new BlockCacheKey(hStoreFiles.get(2).getPath(), 0, true, BlockType.DATA);
272+
CacheTestUtils.HFileBlockPair[] newBlock = CacheTestUtils.generateHFileBlocks(8192, 1);
273+
274+
bucketCache.cacheBlock(newKey, newBlock[0].getBlock());
275+
while (!(bucketCache.getBackingMap().containsKey(newKey))) {
276+
Thread.sleep(100);
277+
}
278+
279+
// Verify that the bucket cache now contains 2 hot blocks blocks only.
280+
Set<BlockCacheKey> newKeys = bucketCache.getBackingMap().keySet();
281+
assertEquals(2, newKeys.size());
282+
for(BlockCacheKey key: newKeys){
283+
assert(dataTieringManager.isHotData(key));
284+
}
285+
}
286+
221287
private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath caller, Path path,
222288
boolean expectedResult, DataTieringException exception) {
223289
try {

0 commit comments

Comments
 (0)