Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

save a key comparison in block seeks #6646

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
address comments
  • Loading branch information
ajkr committed Jun 10, 2020
commit 463f2743a9b0e1da06301384e3793ea130f187ea
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
* Generate file checksum in SstFileWriter if Options.file_checksum_gen_factory is set. The checksum and checksum function name are stored in ExternalSstFileInfo after the sst file write is finished.
* Add a value_size_soft_limit in read options which limits the cumulative value size of keys read in batches in MultiGet. Once the cumulative value size of found keys exceeds read_options.value_size_soft_limit, all the remaining keys are returned with status Abort without further finding their values. By default the value_size_soft_limit is std::numeric_limits<uint64_t>::max().

### Performance Improvements
* Eliminate redundant key comparisons during random access in block-based tables.

## 6.10 (5/2/2020)
### Bug Fixes
* Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced.
Expand Down
54 changes: 29 additions & 25 deletions table/block_based/block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,14 @@ void DataBlockIter::Seek(const Slice& target) {
return;
}
uint32_t index = 0;
bool is_index_key_result = false;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&is_index_key_result, comparator_);
&skip_linear_scan, comparator_);

if (!ok) {
return;
}
ScanAfterBinarySeek(seek_key, index, is_index_key_result, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);
}

// Optimized Seek for point lookup for an internal key `target`
Expand Down Expand Up @@ -386,7 +386,7 @@ void IndexBlockIter::Seek(const Slice& target) {
}
status_ = Status::OK();
uint32_t index = 0;
bool is_index_key_result = false;
bool skip_linear_scan = false;
bool ok = false;
if (prefix_index_) {
bool prefix_may_exist = true;
Expand All @@ -400,19 +400,19 @@ void IndexBlockIter::Seek(const Slice& target) {
}
// restart interval must be one when hash search is enabled so the binary
// search simply lands at the right place.
is_index_key_result = true;
skip_linear_scan = true;
} else if (value_delta_encoded_) {
ok = BinarySeek<DecodeKeyV4>(seek_key, 0, num_restarts_ - 1, &index,
&is_index_key_result, comparator_);
&skip_linear_scan, comparator_);
} else {
ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&is_index_key_result, comparator_);
&skip_linear_scan, comparator_);
}

if (!ok) {
return;
}
ScanAfterBinarySeek(seek_key, index, is_index_key_result, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);
}

void DataBlockIter::SeekForPrev(const Slice& target) {
Expand All @@ -422,14 +422,14 @@ void DataBlockIter::SeekForPrev(const Slice& target) {
return;
}
uint32_t index = 0;
bool is_index_key_result = false;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&is_index_key_result, comparator_);
&skip_linear_scan, comparator_);

if (!ok) {
return;
}
ScanAfterBinarySeek(seek_key, index, is_index_key_result, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);

if (!Valid()) {
SeekToLast();
Expand Down Expand Up @@ -649,13 +649,17 @@ void IndexBlockIter::DecodeCurrentValue(uint32_t shared) {
}

template <class TValue>
void BlockIter<TValue>::ScanAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result,
const Comparator* comp) {
// Linear search (within restart block) for first key >= target
void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
uint32_t index,
bool skip_linear_scan,
const Comparator* comp) {
// SeekToRestartPoint() only does the lookup in the restart block. We need
// to follow it up with Next() to position the iterator at the restart key.
SeekToRestartPoint(index);
Next();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A comment here might help. It took me a little while to figure out that this Next() actually doesn't advance to the next key, but just ends up decoding the key at the restart point we seek'd to.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, done.

if (!is_index_key_result) {

if (!skip_linear_scan) {
// Linear search (within restart block) for first key >= target
uint32_t max_offset;
if (index + 1 < num_restarts_) {
// We are in a non-last restart interval. Since `BinarySeek()` guarantees
Expand Down Expand Up @@ -687,14 +691,14 @@ void BlockIter<TValue>::ScanAfterBinarySeek(const Slice& target, uint32_t index,
// contain duplicate keys. It is guaranteed that the restart key at `*index + 1`
// is strictly greater than `target` or does not exist (this can be used to
// elide a comparison when linear scan reaches all the way to the next restart
// key). Furthermore, `*is_index_key_result` is set to indicate whether the
// `*index`th restart key is the final result so we can elide a comparison at
// the start of the linear scan.
// key). Furthermore, `*skip_linear_scan` is set to indicate whether the
// `*index`th restart key is the final result so that key does not need to be
// compared again later.
template <class TValue>
template <typename DecodeKeyFunc>
bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
uint32_t right, uint32_t* index,
bool* is_index_key_result,
bool* skip_linear_scan,
const Comparator* comp) {
assert(left <= right);
if (restarts_ == 0) {
Expand All @@ -707,7 +711,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
return false;
}

*is_index_key_result = false;
*skip_linear_scan = false;
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
Expand All @@ -730,17 +734,17 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
// after "mid" are uninteresting.
right = mid - 1;
} else {
*is_index_key_result = true;
*skip_linear_scan = true;
left = right = mid;
}
}

assert(left == right);
*index = left;
if (*index == 0) {
// Special case as we land at zero as long as restart key at index 1 is >=
// Special case as we land at zero as long as restart key at index 1 is >
// "target". We need to compare the restart key at index 0 so we can set
// `*is_index_key_result` when the 0th restart key is >= "target".
// `*skip_linear_scan` when the 0th restart key is >= "target".
//
// GetRestartPoint() is always zero for restart key zero; skip the restart
// block access.
Expand All @@ -754,7 +758,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
Slice first_key(key_ptr, non_shared);
raw_key_.SetKey(first_key, false /* copy */);
int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target);
*is_index_key_result = cmp >= 0;
*skip_linear_scan = cmp >= 0;
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions table/block_based/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,8 @@ class BlockIter : public InternalIteratorBase<TValue> {
uint32_t* index, bool* is_index_key_result,
const Comparator* comp);

void ScanAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result, const Comparator* comp);
void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result, const Comparator* comp);
};

class DataBlockIter final : public BlockIter<Slice> {
Expand Down