Skip to content

Commit

Permalink
save key comparisons in BlockIter::BinarySeek (facebook#7068)
Browse files Browse the repository at this point in the history
Summary:
This is a followup to facebook#6646. In that PR, for simplicity I just appended a comparison against the 0th restart key in case `BinarySeek()`'s binary search landed at index 0. As a result there were `2/(N+1) + log_2(N)` key comparisons. This PR does it differently. Now we expand the binary search range by one so it also covers the case where target is at or before the restart key at index 0. As a result, it involves `log_2(N+1)` key comparisons.

Pull Request resolved: facebook#7068

Test Plan:
ran readrandom with mostly default settings and counted key comparisons
using `PerfContext`.

before: `user_key_comparison_count = 28881965`
after: `user_key_comparison_count = 27823245`

setup command:

```
$ TEST_TMPDIR=/dev/shm/dbbench ./db_bench -benchmarks=fillrandom,compact -write_buffer_size=1048576 -target_file_size_base=1048576 -max_bytes_for_level_base=4194304 -max_background_jobs=12 -level_compaction_dynamic_level_bytes=true -num=10000000
```

benchmark command:

```
$ TEST_TMPDIR=/dev/shm/dbbench/ ./db_bench -use_existing_db=true -benchmarks=readrandom -disable_auto_compactions=true -num=10000000 -compression_type=none -reads=1000000 -perf_level=3
```

Reviewed By: anand1976

Differential Revision: D22357032

Pulled By: ajkr

fbshipit-source-id: 8b01e9c1c2a4e9d02fc9dfe16c1cc0327f8bdf24
  • Loading branch information
ajkr authored and wenh committed Jul 9, 2020
1 parent f4b53e3 commit 5aac89e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 36 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

### Performance Improvements
* Eliminate key copies for internal comparisons while accessing ingested block-based tables.
* Reduce key comparisons during random access in all block-based tables.

## 6.11 (6/12/2020)
### Bug Fixes
Expand Down
57 changes: 23 additions & 34 deletions table/block_based/block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,7 @@ void DataBlockIter::SeekImpl(const Slice& target) {
}
uint32_t index = 0;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan);
bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);

if (!ok) {
return;
Expand Down Expand Up @@ -399,11 +398,9 @@ void IndexBlockIter::SeekImpl(const Slice& target) {
// search simply lands at the right place.
skip_linear_scan = true;
} else if (value_delta_encoded_) {
ok = BinarySeek<DecodeKeyV4>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan);
ok = BinarySeek<DecodeKeyV4>(seek_key, &index, &skip_linear_scan);
} else {
ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan);
ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);
}

if (!ok) {
Expand All @@ -420,8 +417,7 @@ void DataBlockIter::SeekForPrevImpl(const Slice& target) {
}
uint32_t index = 0;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan);
bool ok = BinarySeek<DecodeKey>(seek_key, &index, &skip_linear_scan);

if (!ok) {
return;
Expand Down Expand Up @@ -688,10 +684,8 @@ void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
// compared again later.
template <class TValue>
template <typename DecodeKeyFunc>
bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
uint32_t right, uint32_t* index,
bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t* index,
bool* skip_linear_scan) {
assert(left <= right);
if (restarts_ == 0) {
// SST files dedicated to range tombstones are written with index blocks
// that have no keys while also having `num_restarts_ == 1`. This would
Expand All @@ -703,9 +697,17 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
}

*skip_linear_scan = false;
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
// Loop invariants:
// - Restart key at index `left` is less than or equal to the target key. The
// sentinel index `-1` is considered to have a key that is less than all
// keys.
// - Any restart keys after index `right` are strictly greater than the target
// key.
int64_t left = -1, right = num_restarts_ - 1;
while (left != right) {
// The `mid` is computed by rounding up so it lands in (`left`, `right`].
int64_t mid = left + (right - left + 1) / 2;
uint32_t region_offset = GetRestartPoint(static_cast<uint32_t>(mid));
uint32_t shared, non_shared;
const char* key_ptr = DecodeKeyFunc()(
data_ + region_offset, data_ + restarts_, &shared, &non_shared);
Expand All @@ -730,26 +732,13 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
}
}

assert(left == right);
*index = left;
if (*index == 0) {
// Special case as we land at zero as long as restart key at index 1 is >
// "target". We need to compare the restart key at index 0 so we can set
// `*skip_linear_scan` when the 0th restart key is >= "target".
//
// GetRestartPoint() is always zero for restart key zero; skip the restart
// block access.
uint32_t shared, non_shared;
const char* key_ptr =
DecodeKeyFunc()(data_, data_ + restarts_, &shared, &non_shared);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
return false;
}
Slice first_key(key_ptr, non_shared);
raw_key_.SetKey(first_key, false /* copy */);
int cmp = CompareCurrentKey(target);
*skip_linear_scan = cmp >= 0;
if (left == -1) {
// All keys in the block were strictly greater than `target`. So the very
// first key in the block is the final seek result.
*skip_linear_scan = true;
*index = 0;
} else {
*index = static_cast<uint32_t>(left);
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions table/block_based/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ class BlockIter : public InternalIteratorBase<TValue> {

protected:
template <typename DecodeKeyFunc>
inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
uint32_t* index, bool* is_index_key_result);
inline bool BinarySeek(const Slice& target, uint32_t* index,
bool* is_index_key_result);

void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result);
Expand Down

0 comments on commit 5aac89e

Please sign in to comment.