-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-26122: Implement an optional maximum size for Gets, after which a partial result is returned #3532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
saintstack
merged 3 commits into
apache:branch-2
from
bbeaudreault:max_get_size_upstream
Aug 11, 2021
Merged
HBASE-26122: Implement an optional maximum size for Gets, after which a partial result is returned #3532
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,6 +146,7 @@ | |
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry; | ||
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; | ||
import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; | ||
import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope; | ||
import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory; | ||
import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; | ||
import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector; | ||
|
@@ -3864,8 +3865,7 @@ public void prepareMiniBatchOperations(MiniBatchOperationInProgress<Mutation> mi | |
Result result; | ||
if (returnResults) { | ||
// convert duplicate increment/append to get | ||
List<Cell> results = region.get(toGet(mutation), false, nonceGroup, nonce); | ||
result = Result.create(results); | ||
result = region.get(toGet(mutation), false, nonceGroup, nonce); | ||
bbeaudreault marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} else { | ||
result = Result.EMPTY_RESULT; | ||
} | ||
|
@@ -7497,9 +7497,7 @@ public static boolean rowIsInRange(RegionInfo info, final byte [] row, final int | |
@Override | ||
public Result get(final Get get) throws IOException { | ||
prepareGet(get); | ||
List<Cell> results = get(get, true); | ||
boolean stale = this.getRegionInfo().getReplicaId() != 0; | ||
return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); | ||
return get(get, true, HConstants.NO_NONCE, HConstants.NO_NONCE); | ||
} | ||
|
||
void prepareGet(final Get get) throws IOException { | ||
|
@@ -7518,11 +7516,31 @@ void prepareGet(final Get get) throws IOException { | |
|
||
@Override | ||
public List<Cell> get(Get get, boolean withCoprocessor) throws IOException { | ||
return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); | ||
return getInternal(get, null, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); | ||
} | ||
|
||
private List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) | ||
throws IOException { | ||
private Result get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) | ||
throws IOException { | ||
ScannerContext scannerContext = get.getMaxResultSize() > 0 | ||
? ScannerContext.newBuilder() | ||
.setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), get.getMaxResultSize()) | ||
.build() | ||
: null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. |
||
|
||
List<Cell> result = getInternal(get, scannerContext, withCoprocessor, nonceGroup, nonce); | ||
boolean stale = this.getRegionInfo().getReplicaId() != 0; | ||
boolean mayHaveMoreCellsInRow = | ||
scannerContext != null && scannerContext.mayHaveMoreCellsInRow(); | ||
|
||
return Result.create( | ||
result, | ||
get.isCheckExistenceOnly() ? !result.isEmpty() : null, | ||
stale, | ||
mayHaveMoreCellsInRow); | ||
} | ||
|
||
private List<Cell> getInternal(Get get, ScannerContext scannerContext, boolean withCoprocessor, | ||
long nonceGroup, long nonce) throws IOException { | ||
List<Cell> results = new ArrayList<>(); | ||
long before = EnvironmentEdgeManager.currentTime(); | ||
|
||
|
@@ -7539,7 +7557,7 @@ private List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup, long n | |
} | ||
try (RegionScanner scanner = getScanner(scan, null, nonceGroup, nonce)) { | ||
List<Cell> tmp = new ArrayList<>(); | ||
scanner.next(tmp); | ||
scanner.next(tmp, scannerContext); | ||
// Copy EC to heap, then close the scanner. | ||
// This can be an EXPENSIVE call. It may make an extra copy from offheap to onheap buffers. | ||
// See more details in HBASE-26036. | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you do when mayHaveMoreCellsInRow is true @bbeaudreault ? How do you use this boolean in prod (if you don't mind me asking...)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At HubSpot we have a wrapper implementation of Table which all downstream users go through. This wrapper table enforces that
setMaxResultSize
is set to a standard value that we've deemed safe. If a result comes back andmayHaveMoreCellsInRow
is true, we throw an exception. If a team gets such an exception they can request a temporary allowance which disables the check. In the meantime they are expected to add a filter to paginate so they don't hit the max limit.This is a little draconian, but we used to have lots of OOM issues due to large gets/puts/scans. Another possible solution is to iterate with PageFilter, like I did in
testGetPartialResults
. We planned to do something like that eventually, but in the end we had rolled this out in such a way that the number of exceptions were so few that we never did the work.Would you be open to an automatic stitching in the future, like we do with Scans? I can't do that now, but might be a reasonable followup jira.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In terms of that last sentence, maybe it's better to not support stitching for Gets. Instead people should rewrite these large Gets as Scans or add filters like above. Stitching obviously increases the latency and that could be very misleading for Gets. Multigets even worse (and harder to implement)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want a Cell Streaming API.