apache · jhungund · Apr 8, 2024 · Apr 12, 2024 · Apr 9, 2024 · wchevreuil
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -39,24 +39,16 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable {
    * @param hfileName The name of the HFile this block belongs to.
    * @param offset    Offset of the block into the file
    */
-  public BlockCacheKey(String hfileName, long offset) {
-    this(hfileName, offset, true, BlockType.DATA);
-  }
-
-  public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica,
-    BlockType blockType) {
-    this.isPrimaryReplicaBlock = isPrimaryReplica;
-    this.hfileName = hfileName;
-    this.offset = offset;
-    this.blockType = blockType;
+  public BlockCacheKey(Path hfilePath, long offset) {
+    this(hfilePath, offset, true, BlockType.DATA);
   }
 
   public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType) {
-    this.filePath = hfilePath;
     this.isPrimaryReplicaBlock = isPrimaryReplica;
     this.hfileName = hfilePath.getName();
     this.offset = offset;
     this.blockType = blockType;
+    this.filePath = hfilePath;
   }
 
   @Override

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -30,6 +30,7 @@
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
@@ -978,7 +979,7 @@ public static class BlockIndexWriter implements InlineBlockWriter {
     private CacheConfig cacheConf;
 
     /** Name to use for computing cache keys */
-    private String nameForCaching;
+    private Path pathForCaching;
 
     /** Type of encoding used for index blocks in HFile */
     private HFileIndexBlockEncoder indexBlockEncoder;
@@ -995,15 +996,15 @@ public BlockIndexWriter() {
      * @param cacheConf   used to determine when and how a block should be cached-on-write.
      */
     public BlockIndexWriter(HFileBlock.Writer blockWriter, CacheConfig cacheConf,
-      String nameForCaching, HFileIndexBlockEncoder indexBlockEncoder) {
-      if ((cacheConf == null) != (nameForCaching == null)) {
+      Path pathForCaching, HFileIndexBlockEncoder indexBlockEncoder) {
+      if ((cacheConf == null) != (pathForCaching == null)) {
         throw new IllegalArgumentException(
           "Block cache and file name for " + "caching must be both specified or both null");
       }
 
       this.blockWriter = blockWriter;
       this.cacheConf = cacheConf;
-      this.nameForCaching = nameForCaching;
+      this.pathForCaching = pathForCaching;
       this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
       this.minIndexNumEntries = HFileBlockIndex.DEFAULT_MIN_INDEX_NUM_ENTRIES;
       this.indexBlockEncoder =
@@ -1070,7 +1071,7 @@ public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
         if (cacheConf != null) {
           cacheConf.getBlockCache().ifPresent(cache -> {
             HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);
-            cache.cacheBlock(new BlockCacheKey(nameForCaching, rootLevelIndexPos, true,
+            cache.cacheBlock(new BlockCacheKey(pathForCaching, rootLevelIndexPos, true,
               blockForCaching.getBlockType()), blockForCaching);
           });
         }
@@ -1162,7 +1163,7 @@ private void writeIntermediateBlock(FSDataOutputStream out, BlockIndexChunk pare
         cacheConf.getBlockCache().ifPresent(cache -> {
           HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);
           cache.cacheBlock(
-            new BlockCacheKey(nameForCaching, beginOffset, true, blockForCaching.getBlockType()),
+            new BlockCacheKey(pathForCaching,  beginOffset, true, blockForCaching.getBlockType()),
             blockForCaching);
         });
       }

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
@@ -79,7 +79,7 @@ public void run() {
               // so we check first if the block exists on its in-memory index, if so, we just
               // update the offset and move on to the next block without actually going read all
               // the way to the cache.
-              BlockCacheKey cacheKey = new BlockCacheKey(name, offset);
+              BlockCacheKey cacheKey = new BlockCacheKey(path, offset);
               if (cache.isAlreadyCached(cacheKey).orElse(false)) {
                 // Right now, isAlreadyCached is only supported by BucketCache, which should
                 // always cache data blocks.

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -1201,7 +1201,7 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws
       // Check cache for block. If found return.
       long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
       BlockCacheKey cacheKey =
-        new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
+        new BlockCacheKey(path, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
 
       cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
       HFileBlock cachedBlock =

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -315,7 +315,7 @@ protected void finishInit(final Configuration conf) {
     // Data block index writer
     boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite();
     dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(blockWriter,
-      cacheIndexesOnWrite ? cacheConf : null, cacheIndexesOnWrite ? name : null, indexBlockEncoder);
+      cacheIndexesOnWrite ? cacheConf : null, cacheIndexesOnWrite ? path : null, indexBlockEncoder);
     dataBlockIndexWriter.setMaxChunkSize(HFileBlockIndex.getMaxChunkSize(conf));
     dataBlockIndexWriter.setMinIndexNumEntries(HFileBlockIndex.getMinIndexNumEntries(conf));
     inlineBlockWriters.add(dataBlockIndexWriter);
@@ -556,7 +556,7 @@ private void doCacheOnWrite(long offset) {
     cacheConf.getBlockCache().ifPresent(cache -> {
       HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf);
       try {
-        cache.cacheBlock(new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()),
+        cache.cacheBlock(new BlockCacheKey(path, offset, true, cacheFormatBlock.getBlockType()),
           cacheFormatBlock, cacheConf.isInMemory(), true);
       } finally {
         // refCnt will auto increase when block add to Cache, see RAMCache#putIfAbsent

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -27,6 +27,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -141,8 +142,8 @@ public class BucketCache implements BlockCache, HeapSize {
   /** Statistics thread */
   private static final int statThreadPeriod = 5 * 60;
 
-  final static int DEFAULT_WRITER_THREADS = 3;
-  final static int DEFAULT_WRITER_QUEUE_ITEMS = 64;
+  public final static int DEFAULT_WRITER_THREADS = 3;
+  public final static int DEFAULT_WRITER_QUEUE_ITEMS = 64;
 
   // Store/read block data
   transient final IOEngine ioEngine;
@@ -682,7 +683,7 @@ public void fileCacheCompleted(Path filePath, long size) {
   }
 
   private void updateRegionCachedSize(Path filePath, long cachedSize) {
-    if (filePath != null) {
+    if (filePath != null && filePath.getParent() != null && filePath.getParent().getParent() != null) {
       String regionName = filePath.getParent().getParent().getName();
       regionCachedSize.merge(regionName, cachedSize,
         (previousSize, newBlockSize) -> previousSize + newBlockSize);
@@ -1670,8 +1671,8 @@ public int evictBlocksByHfileName(String hfileName) {
   }
 
   private Set<BlockCacheKey> getAllCacheKeysForFile(String hfileName) {
-    return blocksByHFile.subSet(new BlockCacheKey(hfileName, Long.MIN_VALUE), true,
-      new BlockCacheKey(hfileName, Long.MAX_VALUE), true);
+    return blocksByHFile.subSet(new BlockCacheKey(new Path(hfileName), Long.MIN_VALUE, true, BlockType.DATA), true,
+      new BlockCacheKey(new Path(hfileName), Long.MAX_VALUE, true, BlockType.DATA), true);
   }
 
   /**

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
@@ -25,22 +25,28 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.function.Function;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
 import org.apache.hadoop.hbase.io.hfile.BlockPriority;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.regionserver.DataTieringManager;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.yetus.audience.InterfaceAudience;
 
 import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
 
 import org.apache.hadoop.hbase.shaded.protobuf.generated.BucketCacheProtos;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @InterfaceAudience.Private
 final class BucketProtoUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(BucketProtoUtils.class);
+
   private BucketProtoUtils() {
 
   }
@@ -130,10 +136,30 @@ static Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCac
     ConcurrentHashMap<BlockCacheKey, BucketEntry> result = new ConcurrentHashMap<>();
     NavigableSet<BlockCacheKey> resultSet = new ConcurrentSkipListSet<>(Comparator
       .comparing(BlockCacheKey::getHfileName).thenComparingLong(BlockCacheKey::getOffset));
+
+    Map<String, Path> allFilePaths = null;
+    DataTieringManager dataTieringManager;
+    try {
+      dataTieringManager = DataTieringManager.getInstance();
+      allFilePaths = dataTieringManager.getAllFilesList();
+    } catch (IllegalStateException e) {
+      // Data-Tiering manager has not been set up.
+      // Ignore the error and proceed with the normal flow.
+      LOG.warn("Error while getting DataTieringManager instance: {}", e.getMessage());
+    }
+
     for (BucketCacheProtos.BackingMapEntry entry : backingMap.getEntryList()) {
       BucketCacheProtos.BlockCacheKey protoKey = entry.getKey();
-      BlockCacheKey key = new BlockCacheKey(protoKey.getHfilename(), protoKey.getOffset(),
-        protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType()));
+
+      BlockCacheKey key = null;
+      if (allFilePaths != null) {
+        key = new BlockCacheKey(allFilePaths.get(protoKey.getHfilename()), protoKey.getOffset(),
+          protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType()));
+      } else {
+        key = new BlockCacheKey(new Path(protoKey.getHfilename()), protoKey.getOffset(),
+          protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType()));
+      }
+
       BucketCacheProtos.BucketEntry protoValue = entry.getValue();
       // TODO:We use ByteBuffAllocator.HEAP here, because we could not get the ByteBuffAllocator
       // which created by RpcServer elegantly.

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+public class DataTieringException extends Exception {
+  DataTieringException(String reason) {
+    super(reason);
+  }
+}