-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-23066 Allow cache on write during compactions when prefetching … #707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,6 +81,13 @@ public class CacheConfig { | |
| */ | ||
| public static final String PREFETCH_BLOCKS_ON_OPEN_KEY = "hbase.rs.prefetchblocksonopen"; | ||
|
|
||
| /** | ||
| * Configuration key to cache blocks when a compacted file is written, predicated on prefetching | ||
| * being enabled for the column family. | ||
| */ | ||
| public static final String PREFETCH_COMPACTED_BLOCKS_ON_WRITE_KEY = | ||
| "hbase.rs.prefetchcompactedblocksonwrite"; | ||
|
|
||
| public static final String DROP_BEHIND_CACHE_COMPACTION_KEY = | ||
| "hbase.hfile.drop.behind.compaction"; | ||
|
|
||
|
|
@@ -93,6 +100,7 @@ public class CacheConfig { | |
| public static final boolean DEFAULT_EVICT_ON_CLOSE = false; | ||
| public static final boolean DEFAULT_CACHE_DATA_COMPRESSED = false; | ||
| public static final boolean DEFAULT_PREFETCH_ON_OPEN = false; | ||
| public static final boolean DEFAULT_PREFETCH_COMPACTED_BLOCKS_ON_WRITE = false; | ||
| public static final boolean DROP_BEHIND_CACHE_COMPACTION_DEFAULT = true; | ||
|
|
||
| /** | ||
|
|
@@ -124,6 +132,12 @@ public class CacheConfig { | |
| /** Whether data blocks should be prefetched into the cache */ | ||
| private final boolean prefetchOnOpen; | ||
|
|
||
| /** | ||
| * Whether data blocks should be cached when compacted file is written for column families with | ||
| * prefetching | ||
| */ | ||
| private final boolean prefetchCompactedDataOnWrite; | ||
|
|
||
| private final boolean dropBehindCompaction; | ||
|
|
||
| // Local reference to the block cache | ||
|
|
@@ -174,6 +188,8 @@ public CacheConfig(Configuration conf, ColumnFamilyDescriptor family, BlockCache | |
| (family == null ? false : family.isEvictBlocksOnClose()); | ||
| this.prefetchOnOpen = conf.getBoolean(PREFETCH_BLOCKS_ON_OPEN_KEY, DEFAULT_PREFETCH_ON_OPEN) || | ||
| (family == null ? false : family.isPrefetchBlocksOnOpen()); | ||
| this.prefetchCompactedDataOnWrite = conf.getBoolean(PREFETCH_COMPACTED_BLOCKS_ON_WRITE_KEY, | ||
| DEFAULT_PREFETCH_COMPACTED_BLOCKS_ON_WRITE); | ||
| this.blockCache = blockCache; | ||
| this.byteBuffAllocator = byteBuffAllocator; | ||
| LOG.info("Created cacheConfig: " + this + (family == null ? "" : " for family " + family) + | ||
|
|
@@ -193,6 +209,7 @@ public CacheConfig(CacheConfig cacheConf) { | |
| this.evictOnClose = cacheConf.evictOnClose; | ||
| this.cacheDataCompressed = cacheConf.cacheDataCompressed; | ||
| this.prefetchOnOpen = cacheConf.prefetchOnOpen; | ||
| this.prefetchCompactedDataOnWrite = cacheConf.prefetchCompactedDataOnWrite; | ||
| this.dropBehindCompaction = cacheConf.dropBehindCompaction; | ||
| this.blockCache = cacheConf.blockCache; | ||
| this.byteBuffAllocator = cacheConf.byteBuffAllocator; | ||
|
|
@@ -207,6 +224,7 @@ private CacheConfig() { | |
| this.evictOnClose = false; | ||
| this.cacheDataCompressed = false; | ||
| this.prefetchOnOpen = false; | ||
| this.prefetchCompactedDataOnWrite = false; | ||
| this.dropBehindCompaction = false; | ||
| this.blockCache = null; | ||
| this.byteBuffAllocator = ByteBuffAllocator.HEAP; | ||
|
|
@@ -319,6 +337,13 @@ public boolean shouldPrefetchOnOpen() { | |
| return this.prefetchOnOpen; | ||
| } | ||
|
|
||
| /** | ||
| * @return true if blocks should be cached while writing during compaction, false if not | ||
| */ | ||
| public boolean shouldCacheCompactedBlocksOnWrite() { | ||
| return this.prefetchCompactedDataOnWrite && this.prefetchOnOpen; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh.. So the cache on write (at compaction) happens iff prefetch config is ON ! Anyways in ur case the prefetch which is another config, is ON right? I think this is the reason why the new config you have named that way. But some how I feel that config name is bit misleading.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually the cache size should be much bigger than the hot data set size if u want to do cache on compact. Because the compacted away data might be already in cache (Those are flused files or a result of another compaction). Those are recently been accessed also (by the compaction thread). This feature should be very carefully used.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for looking at this. My understanding is that in cases where prefetch is enabled, the new file is going to be read into the cache after compaction completes anyway. So the cache size requirements are the same when this new setting is enabled. This is why I wanted to limit the scope of the cache on write to only apply where prefetching is enabled: it simply is a way to do the cache loading more efficiently while we are writing the data out rather than having to read it back after compaction is done which I've found is very expensive when data is in S3. As far as the name goes, I struggled to come up with something intuitive - how do I explain in the name alone that this only applies when prefetching is on? I tried to convey "when prefetching, do the prefetch of compacted data on write." I'm not in love with the name and I'm open to suggestions. I didn't want to give the false impression that all compacted data is going to be cached on write. Maybe "cacheCompactedDataOnWriteIfPrefetching"? Is that too wordy? |
||
| } | ||
|
|
||
| /** | ||
| * Return true if we may find this type of block in block cache. | ||
| * <p> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A bit confusing.. Are we doing the prefetch of the new compacted file once it is written?
Dont think so.. When we write the file, that time itself the caching happens. So it is cache on write. Why its called prefetch then? There is no extra fetch op happening right?