Skip to content

Commit 47ec22d

Browse files
committed
HBASE-28468: Integrate the data-tiering logic into cache evictions.
As a part of cache blocks eviction when the cache is full, the data-tiering logic is integrated into the freeSpace code path to identify the cold data files and evict the blocks associated with those files. The list of files is traversed to identify the cold files based on the hot-data-age configuration and also the max timestamp associated with the files. The blocks associated with those cold files are evicted first and then the existing logic of LFU blocks is executed to further evict the blocks. Change-Id: I4b0a9fbffb274fb80dcc7e1303985901e4da22f2 Change-Id: Id09780357ef07549be63b32ed724259729f55563
1 parent b7bb8b9 commit 47ec22d

File tree

3 files changed

+73
-1
lines changed

3 files changed

+73
-1
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.ArrayList;
2828
import java.util.Collections;
2929
import java.util.Comparator;
30+
import java.util.HashMap;
3031
import java.util.HashSet;
3132
import java.util.Iterator;
3233
import java.util.List;
@@ -76,6 +77,7 @@
7677
import org.apache.hadoop.hbase.nio.ByteBuff;
7778
import org.apache.hadoop.hbase.nio.RefCnt;
7879
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
80+
import org.apache.hadoop.hbase.regionserver.DataTieringManager;
7981
import org.apache.hadoop.hbase.util.Bytes;
8082
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
8183
import org.apache.hadoop.hbase.util.IdReadWriteLock;
@@ -935,6 +937,7 @@ void freeSpace(final String why) {
935937
}
936938
try {
937939
freeInProgress = true;
940+
938941
long bytesToFreeWithoutExtra = 0;
939942
// Calculate free byte for each bucketSizeinfo
940943
StringBuilder msgBuffer = LOG.isDebugEnabled() ? new StringBuilder() : null;
@@ -960,6 +963,7 @@ void freeSpace(final String why) {
960963
if (bytesToFreeWithoutExtra <= 0) {
961964
return;
962965
}
966+
963967
long currentSize = bucketAllocator.getUsedSize();
964968
long totalSize = bucketAllocator.getTotalSize();
965969
if (LOG.isDebugEnabled() && msgBuffer != null) {
@@ -972,6 +976,13 @@ void freeSpace(final String why) {
972976
long bytesToFreeWithExtra =
973977
(long) Math.floor(bytesToFreeWithoutExtra * (1 + extraFreeFactor));
974978

979+
// Check the list of files to determine the cold files which can be readily evicted.
980+
Map<String, String> coldFiles =
981+
DataTieringManager.getInstance().getColdFilesList();
982+
//Can we evict the blocks inline during backingMap traversal?
983+
// If not, we accumulate the keys and evict them later.
984+
// List<BlockCacheKey> coldBlocks = new ArrayList<>();
985+
975986
// Instantiate priority buckets
976987
BucketEntryGroup bucketSingle =
977988
new BucketEntryGroup(bytesToFreeWithExtra, blockSize, getPartitionSize(singleFactor));
@@ -980,9 +991,18 @@ void freeSpace(final String why) {
980991
BucketEntryGroup bucketMemory =
981992
new BucketEntryGroup(bytesToFreeWithExtra, blockSize, getPartitionSize(memoryFactor));
982993

994+
long bytesFreed = 0;
995+
983996
// Scan entire map putting bucket entry into appropriate bucket entry
984997
// group
985998
for (Map.Entry<BlockCacheKey, BucketEntry> bucketEntryWithKey : backingMap.entrySet()) {
999+
1000+
if (coldFiles.containsKey(bucketEntryWithKey.getKey().getHfileName())) {
1001+
//coldBlocks.add(bucketEntryWithKey.getKey());
1002+
bytesFreed += bucketEntryWithKey.getValue().getLength();
1003+
evictBlock(bucketEntryWithKey.getKey());
1004+
continue;
1005+
}
9861006
switch (bucketEntryWithKey.getValue().getPriority()) {
9871007
case SINGLE: {
9881008
bucketSingle.add(bucketEntryWithKey);
@@ -1007,7 +1027,6 @@ void freeSpace(final String why) {
10071027
bucketQueue.add(bucketMemory);
10081028

10091029
int remainingBuckets = bucketQueue.size();
1010-
long bytesFreed = 0;
10111030

10121031
BucketEntryGroup bucketGroup;
10131032
while ((bucketGroup = bucketQueue.poll()) != null) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,21 @@
1717
*/
1818
package org.apache.hadoop.hbase.regionserver;
1919

20+
import java.util.ArrayList;
21+
import java.util.HashMap;
2022
import java.util.HashSet;
23+
import java.util.Iterator;
24+
import java.util.List;
2125
import java.util.Map;
2226
import java.util.OptionalLong;
2327
import java.util.Set;
28+
import java.util.concurrent.ConcurrentHashMap;
2429
import org.apache.hadoop.conf.Configuration;
2530
import org.apache.hadoop.fs.Path;
2631
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
2732
import org.apache.hadoop.hbase.util.Bytes;
2833
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
34+
import org.apache.hadoop.hbase.util.Pair;
2935
import org.apache.yetus.audience.InterfaceAudience;
3036
import org.slf4j.Logger;
3137
import org.slf4j.LoggerFactory;
@@ -219,4 +225,43 @@ private long getDataTieringHotDataAge(Configuration conf) {
219225
return Long.parseLong(
220226
conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
221227
}
228+
229+
/*
230+
* This API takes the names of files as input and returns a subset of these file names
231+
* that are cold.
232+
* @parameter inputFileNames: Input list of file names
233+
* @return List of names of files that are cold as per data-tiering logic.
234+
*/
235+
public Map<String, String> getColdFilesList() {
236+
Map<String, String> coldFiles = new HashMap<>();
237+
for (HRegion r : this.onlineRegions.values()) {
238+
for (HStore hStore : r.getStores()) {
239+
Configuration conf = hStore.getReadOnlyConfiguration();
240+
if (getDataTieringType(conf) != DataTieringType.TIME_RANGE) {
241+
// Data-Tiering not enabled for the store. Just skip it.
242+
continue;
243+
}
244+
Long hotDataAge = getDataTieringHotDataAge(conf);
245+
246+
for (HStoreFile hStoreFile : hStore.getStorefiles()) {
247+
String hFileName =
248+
hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
249+
OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
250+
if (!maxTimestamp.isPresent()) {
251+
// We could throw from here, But we are already in the critical code-path
252+
// of freeing space. Hence, we can ignore that file for now
253+
// Or do we want to include it?
254+
continue;
255+
}
256+
long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
257+
long fileAge = currentTimestamp - maxTimestamp.getAsLong();
258+
if (fileAge > hotDataAge) {
259+
// Values do not matter.
260+
coldFiles.put(hFileName, null);
261+
}
262+
}
263+
}
264+
}
265+
return coldFiles;
266+
}
222267
}

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,14 @@ public void testColdDataFiles() {
218218
}
219219
}
220220

221+
@Test
222+
public void testPickColdDataFiles() {
223+
Map<String, String> coldDataFiles = dataTieringManager.getColdFilesList();
224+
assertEquals(1, coldDataFiles.size());
225+
// hStoreFiles[3] is the cold file.
226+
assert(coldDataFiles.containsKey(hStoreFiles.get(3).getFileInfo().getActiveFileName()));
227+
}
228+
221229
private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath caller, Path path,
222230
boolean expectedResult, DataTieringException exception) {
223231
try {

0 commit comments

Comments
 (0)