Skip to content

Commit 2f59db2

Browse files
committed
Option 2: Data Tiering integration into cache evictions.
This code change is only for the purpose of sharing the idea about implementation. The idea here is to only rely only the file names to handling the data tiering. DO NOT SUBTMIT Still TODO: 1. Finalise design. 2. Unit Tests. Change-Id: I4b0a9fbffb274fb80dcc7e1303985901e4da22f2
1 parent b7bb8b9 commit 2f59db2

File tree

3 files changed

+79
-0
lines changed

3 files changed

+79
-0
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.ArrayList;
2828
import java.util.Collections;
2929
import java.util.Comparator;
30+
import java.util.HashMap;
3031
import java.util.HashSet;
3132
import java.util.Iterator;
3233
import java.util.List;
@@ -76,6 +77,7 @@
7677
import org.apache.hadoop.hbase.nio.ByteBuff;
7778
import org.apache.hadoop.hbase.nio.RefCnt;
7879
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
80+
import org.apache.hadoop.hbase.regionserver.DataTieringManager;
7981
import org.apache.hadoop.hbase.util.Bytes;
8082
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
8183
import org.apache.hadoop.hbase.util.IdReadWriteLock;
@@ -935,6 +937,27 @@ void freeSpace(final String why) {
935937
}
936938
try {
937939
freeInProgress = true;
940+
941+
// Fetch the file names from the backing map.
942+
// Use the hash-map for efficient lookups
943+
Map<String, String> fileNames = new HashMap<>();
944+
for (Map.Entry<BlockCacheKey, BucketEntry> bucketEntryWithKey : backingMap.entrySet()) {
945+
String fileName = bucketEntryWithKey.getKey().getHfileName();
946+
if (!fileNames.containsKey(fileName)) {
947+
// We are only interested in the keys.
948+
fileNames.put(fileName, null);
949+
}
950+
}
951+
952+
// Check the list of files to determine the cold files which can be readily evicted.
953+
List<String> coldFiles =
954+
DataTieringManager.getInstance().getColdFileList(fileNames);
955+
if (coldFiles != null) {
956+
for(String fileName : coldFiles) {
957+
evictBlocksByHfileName(fileName);
958+
}
959+
}
960+
938961
long bytesToFreeWithoutExtra = 0;
939962
// Calculate free byte for each bucketSizeinfo
940963
StringBuilder msgBuffer = LOG.isDebugEnabled() ? new StringBuilder() : null;

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,20 @@
1717
*/
1818
package org.apache.hadoop.hbase.regionserver;
1919

20+
import java.util.ArrayList;
2021
import java.util.HashSet;
22+
import java.util.Iterator;
23+
import java.util.List;
2124
import java.util.Map;
2225
import java.util.OptionalLong;
2326
import java.util.Set;
27+
import java.util.concurrent.ConcurrentHashMap;
2428
import org.apache.hadoop.conf.Configuration;
2529
import org.apache.hadoop.fs.Path;
2630
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
2731
import org.apache.hadoop.hbase.util.Bytes;
2832
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
33+
import org.apache.hadoop.hbase.util.Pair;
2934
import org.apache.yetus.audience.InterfaceAudience;
3035
import org.slf4j.Logger;
3136
import org.slf4j.LoggerFactory;
@@ -219,4 +224,44 @@ private long getDataTieringHotDataAge(Configuration conf) {
219224
return Long.parseLong(
220225
conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
221226
}
227+
228+
/*
229+
* This API takes the names of files as input and returns a subset of these file names
230+
* that are cold.
231+
* @parameter inputFileNames: Input list of file names
232+
* @return List of names of files that are cold as per data-tiering logic.
233+
*/
234+
public List<String> getColdFileList(Map<String, String> inputFileNames) {
235+
List<String> coldFileList = new ArrayList<>();
236+
for (HRegion r : this.onlineRegions.values()) {
237+
for (HStore hStore : r.getStores()) {
238+
Configuration conf = hStore.getReadOnlyConfiguration();
239+
if (getDataTieringType(conf) != DataTieringType.TIME_RANGE) {
240+
// Data-Tiering not enabled for the store. Just skip it.
241+
continue;
242+
}
243+
Long hotDataAge = getDataTieringHotDataAge(conf);
244+
245+
for (HStoreFile hStoreFile : hStore.getStorefiles()) {
246+
String hFileName =
247+
hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName();
248+
if(inputFileNames.containsKey(hFileName)) {
249+
OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
250+
if (!maxTimestamp.isPresent()) {
251+
// We could throw from here, But we are already in the critical code-path
252+
// of freeing space. Hence, we can ignore that file for now
253+
// Or do we want to include it?
254+
continue;
255+
}
256+
long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
257+
long fileAge = currentTimestamp - maxTimestamp.getAsLong();
258+
if (fileAge > hotDataAge) {
259+
coldFileList.add(hFileName);
260+
}
261+
}
262+
}
263+
}
264+
}
265+
return coldFileList;
266+
}
222267
}

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,17 @@ public void testColdDataFiles() {
218218
}
219219
}
220220

221+
@Test
222+
public void testPickColdDataFiles() {
223+
Map<String, String> fileList = new HashMap<>();
224+
for (HStoreFile file : hStoreFiles) {
225+
fileList.put(file.getFileInfo().getActiveFileName(), null);
226+
}
227+
List<String> coldDataFiles = dataTieringManager.getColdFileList(fileList);
228+
assertEquals(1, coldDataFiles.size());
229+
assert(coldDataFiles.get(0)).equalsIgnoreCase(hStoreFiles.get(3).getFileInfo().getActiveFileName());
230+
}
231+
221232
private void testDataTieringMethodWithPath(DataTieringMethodCallerWithPath caller, Path path,
222233
boolean expectedResult, DataTieringException exception) {
223234
try {

0 commit comments

Comments
 (0)