-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-28527: Adjust BlockCacheKey to use the file path instead of file name. #5832
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fe285bc
b7bb8b9
6969f60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,24 +39,16 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { | |
* @param hfileName The name of the HFile this block belongs to. | ||
* @param offset Offset of the block into the file | ||
*/ | ||
public BlockCacheKey(String hfileName, long offset) { | ||
this(hfileName, offset, true, BlockType.DATA); | ||
} | ||
|
||
public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica, | ||
BlockType blockType) { | ||
this.isPrimaryReplicaBlock = isPrimaryReplica; | ||
this.hfileName = hfileName; | ||
this.offset = offset; | ||
this.blockType = blockType; | ||
public BlockCacheKey(Path hfilePath, long offset) { | ||
this(hfilePath, offset, true, BlockType.DATA); | ||
} | ||
|
||
public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType) { | ||
this.filePath = hfilePath; | ||
this.isPrimaryReplicaBlock = isPrimaryReplica; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: unneeded change. |
||
this.hfileName = hfilePath.getName(); | ||
this.offset = offset; | ||
this.blockType = blockType; | ||
this.filePath = hfilePath; | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
|
@@ -141,8 +142,8 @@ public class BucketCache implements BlockCache, HeapSize { | |
/** Statistics thread */ | ||
private static final int statThreadPeriod = 5 * 60; | ||
|
||
final static int DEFAULT_WRITER_THREADS = 3; | ||
final static int DEFAULT_WRITER_QUEUE_ITEMS = 64; | ||
public final static int DEFAULT_WRITER_THREADS = 3; | ||
public final static int DEFAULT_WRITER_QUEUE_ITEMS = 64; | ||
|
||
// Store/read block data | ||
transient final IOEngine ioEngine; | ||
|
@@ -682,7 +683,7 @@ public void fileCacheCompleted(Path filePath, long size) { | |
} | ||
|
||
private void updateRegionCachedSize(Path filePath, long cachedSize) { | ||
if (filePath != null) { | ||
if (filePath != null && filePath.getParent() != null && filePath.getParent().getParent() != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this really possible? |
||
String regionName = filePath.getParent().getParent().getName(); | ||
regionCachedSize.merge(regionName, cachedSize, | ||
(previousSize, newBlockSize) -> previousSize + newBlockSize); | ||
|
@@ -1670,8 +1671,8 @@ public int evictBlocksByHfileName(String hfileName) { | |
} | ||
|
||
private Set<BlockCacheKey> getAllCacheKeysForFile(String hfileName) { | ||
return blocksByHFile.subSet(new BlockCacheKey(hfileName, Long.MIN_VALUE), true, | ||
new BlockCacheKey(hfileName, Long.MAX_VALUE), true); | ||
return blocksByHFile.subSet(new BlockCacheKey(new Path(hfileName), Long.MIN_VALUE, true, BlockType.DATA), true, | ||
new BlockCacheKey(new Path(hfileName), Long.MAX_VALUE, true, BlockType.DATA), true); | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,22 +25,28 @@ | |
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.ConcurrentSkipListSet; | ||
import java.util.function.Function; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.hbase.io.ByteBuffAllocator; | ||
import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler; | ||
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; | ||
import org.apache.hadoop.hbase.io.hfile.BlockPriority; | ||
import org.apache.hadoop.hbase.io.hfile.BlockType; | ||
import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager; | ||
import org.apache.hadoop.hbase.io.hfile.HFileBlock; | ||
import org.apache.hadoop.hbase.regionserver.DataTieringManager; | ||
import org.apache.hadoop.hbase.util.Pair; | ||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; | ||
|
||
import org.apache.hadoop.hbase.shaded.protobuf.generated.BucketCacheProtos; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
@InterfaceAudience.Private | ||
final class BucketProtoUtils { | ||
private static final Logger LOG = LoggerFactory.getLogger(BucketProtoUtils.class); | ||
|
||
private BucketProtoUtils() { | ||
|
||
} | ||
|
@@ -130,10 +136,30 @@ static Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCac | |
ConcurrentHashMap<BlockCacheKey, BucketEntry> result = new ConcurrentHashMap<>(); | ||
NavigableSet<BlockCacheKey> resultSet = new ConcurrentSkipListSet<>(Comparator | ||
.comparing(BlockCacheKey::getHfileName).thenComparingLong(BlockCacheKey::getOffset)); | ||
|
||
Map<String, Path> allFilePaths = null; | ||
DataTieringManager dataTieringManager; | ||
try { | ||
dataTieringManager = DataTieringManager.getInstance(); | ||
allFilePaths = dataTieringManager.getAllFilesList(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need this for non TIME_RANGE tiering type, right? Can we avoid it, then? |
||
} catch (IllegalStateException e) { | ||
// Data-Tiering manager has not been set up. | ||
// Ignore the error and proceed with the normal flow. | ||
LOG.warn("Error while getting DataTieringManager instance: {}", e.getMessage()); | ||
Comment on lines
+146
to
+148
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this going to be thrown whenever data tiering type isn't set to TIME_RANGE? |
||
} | ||
|
||
for (BucketCacheProtos.BackingMapEntry entry : backingMap.getEntryList()) { | ||
BucketCacheProtos.BlockCacheKey protoKey = entry.getKey(); | ||
BlockCacheKey key = new BlockCacheKey(protoKey.getHfilename(), protoKey.getOffset(), | ||
protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType())); | ||
|
||
BlockCacheKey key = null; | ||
if (allFilePaths != null) { | ||
key = new BlockCacheKey(allFilePaths.get(protoKey.getHfilename()), protoKey.getOffset(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType())); | ||
} else { | ||
key = new BlockCacheKey(new Path(protoKey.getHfilename()), protoKey.getOffset(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This call will set the file name appropriately to protoKey.getHfilename(). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The path will be incorrect, right? Anyone who accesses it will find the incorrect path. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One way I can think of avoiding incorrect path to be set is by adding the following check in the constructor of BlockCacheKey: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In which cases will And when it isn't null, what do we do with keys that weren't found in |
||
protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType())); | ||
} | ||
|
||
BucketCacheProtos.BucketEntry protoValue = entry.getValue(); | ||
// TODO:We use ByteBuffAllocator.HEAP here, because we could not get the ByteBuffAllocator | ||
// which created by RpcServer elegantly. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.regionserver; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
@InterfaceAudience.Private | ||
public class DataTieringException extends Exception { | ||
DataTieringException(String reason) { | ||
super(reason); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we just leave these and avoid having to touch every single existing test? As having a complete path is only relevant for the TIME_RANGE data tiering, I think it's fine to allow non complete paths when TIME_RANGE data tiering is not in use.