Skip to content

Commit

Permalink
make L0 index/filter pinned memory usage predictable (#6911)
Browse files Browse the repository at this point in the history
Summary:
Memory pinned by `pin_l0_filter_and_index_blocks_in_cache` needs to be predictable based on user config. This PR makes sure
we do not pin extra memory for large files generated by intra-L0 (see #6889).
Pull Request resolved: #6911

Test Plan: unit test

Reviewed By: siying

Differential Revision: D21835818

Pulled By: ajkr

fbshipit-source-id: a11a088549d06bed8aacc2548d266e5983f0ead4
  • Loading branch information
ajkr authored and facebook-github-bot committed Jun 9, 2020
1 parent 5abda3b commit 02db03a
Show file tree
Hide file tree
Showing 20 changed files with 141 additions and 60 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## Unreleased
### Behavior Changes
* Disable delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode and num_levels = 1 in order to avoid a corruption bug.
* `pin_l0_filter_and_index_blocks_in_cache` no longer applies to L0 files larger than `1.5 * write_buffer_size` to give more predictable memory usage. Such L0 files may exist due to intra-L0 compaction, external file ingestion, or user dynamically changing `write_buffer_size` (note, however, that files that are already pinned will continue being pinned, even after such a dynamic change).

### Bug Fixes
* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true.
Expand Down
4 changes: 3 additions & 1 deletion db/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,9 @@ Status BuildTable(
(internal_stats == nullptr) ? nullptr
: internal_stats->GetFileReadHist(0),
TableReaderCaller::kFlush, /*arena=*/nullptr,
/*skip_filter=*/false, level, /*smallest_compaction_key=*/nullptr,
/*skip_filter=*/false, level,
MaxFileSizeForL0MetaPin(mutable_cf_options),
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key*/ nullptr,
/*allow_unprepared_value*/ false));
s = it->status();
Expand Down
2 changes: 2 additions & 0 deletions db/compaction/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,8 @@ Status CompactionJob::Run() {
compact_->compaction->output_level()),
TableReaderCaller::kCompactionRefill, /*arena=*/nullptr,
/*skip_filters=*/false, compact_->compaction->output_level(),
MaxFileSizeForL0MetaPin(
*compact_->compaction->mutable_cf_options()),
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
/*allow_unprepared_value=*/false);
Expand Down
20 changes: 20 additions & 0 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3184,6 +3184,15 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
options.level0_file_num_compaction_trigger = 5;
options.max_background_compactions = 2;
options.max_subcompactions = max_subcompactions_;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.write_buffer_size = 2 << 20; // 2MB

BlockBasedTableOptions table_options;
table_options.block_cache = NewLRUCache(64 << 20); // 64MB
table_options.cache_index_and_filter_blocks = true;
table_options.pin_l0_filter_and_index_blocks_in_cache = true;
options.table_factory.reset(new BlockBasedTableFactory(table_options));

DestroyAndReopen(options);

const size_t kValueSize = 1 << 20;
Expand Down Expand Up @@ -3214,6 +3223,7 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
ASSERT_OK(Put(Key(i + 1), value));
}
ASSERT_OK(Flush());
ASSERT_EQ(i + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
}
dbfull()->TEST_WaitForCompact();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
Expand All @@ -3228,6 +3238,16 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
for (int i = 0; i < 2; ++i) {
ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21);
}

// The index/filter in the file produced by intra-L0 should not be pinned.
// That means clearing unref'd entries in block cache and re-accessing the
// file produced by intra-L0 should bump the index block miss count.
uint64_t prev_index_misses =
TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
table_options.block_cache->EraseUnRefEntries();
ASSERT_EQ("", Get(Key(0)));
ASSERT_EQ(prev_index_misses + 1,
TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
}

TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) {
Expand Down
16 changes: 8 additions & 8 deletions db/forward_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ class ForwardLevelIterator : public InternalIterator {
prefix_extractor_, /*table_reader_ptr=*/nullptr,
/*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator,
/*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
/*max_file_size_for_l0_meta_pin=*/0,
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
allow_unprepared_value_);
/*largest_compaction_key=*/nullptr, allow_unprepared_value_);
file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
valid_ = false;
if (!range_del_agg.IsEmpty()) {
Expand Down Expand Up @@ -686,9 +686,9 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
/*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
TableReaderCaller::kUserIterator, /*arena=*/nullptr,
/*skip_filters=*/false, /*level=*/-1,
MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
allow_unprepared_value_));
/*largest_compaction_key=*/nullptr, allow_unprepared_value_));
}
BuildLevelIterators(vstorage);
current_ = nullptr;
Expand Down Expand Up @@ -764,9 +764,9 @@ void ForwardIterator::RenewIterators() {
/*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
TableReaderCaller::kUserIterator, /*arena=*/nullptr,
/*skip_filters=*/false, /*level=*/-1,
MaxFileSizeForL0MetaPin(svnew->mutable_cf_options),
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
allow_unprepared_value_));
/*largest_compaction_key=*/nullptr, allow_unprepared_value_));
}

for (auto* f : l0_iters_) {
Expand Down Expand Up @@ -830,9 +830,9 @@ void ForwardIterator::ResetIncompleteIterators() {
/*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
TableReaderCaller::kUserIterator, /*arena=*/nullptr,
/*skip_filters=*/false, /*level=*/-1,
MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
allow_unprepared_value_);
/*largest_compaction_key=*/nullptr, allow_unprepared_value_);
l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
}

Expand Down
3 changes: 2 additions & 1 deletion db/repair.cc
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,8 @@ class Repairer {
cfd->GetLatestMutableCFOptions()->prefix_extractor.get(),
/*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr,
TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false,
/*level=*/-1, /*smallest_compaction_key=*/nullptr,
/*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0,
/*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr,
/*allow_unprepared_value=*/false);
ParsedInternalKey parsed;
Expand Down
30 changes: 19 additions & 11 deletions db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ Status TableCache::GetTableReader(
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
const SliceTransform* prefix_extractor, bool skip_filters, int level,
bool prefetch_index_and_filter_in_cache) {
bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) {
std::string fname =
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file;
Expand All @@ -124,7 +125,8 @@ Status TableCache::GetTableReader(
TableReaderOptions(ioptions_, prefix_extractor, file_options,
internal_comparator, skip_filters, immortal_tables_,
false /* force_direct_prefetch */, level,
fd.largest_seqno, block_cache_tracer_),
fd.largest_seqno, block_cache_tracer_,
max_file_size_for_l0_meta_pin),
std::move(file_reader), fd.GetFileSize(), table_reader,
prefetch_index_and_filter_in_cache);
TEST_SYNC_POINT("TableCache::GetTableReader:0");
Expand All @@ -145,8 +147,8 @@ Status TableCache::FindTable(const FileOptions& file_options,
const SliceTransform* prefix_extractor,
const bool no_io, bool record_read_stats,
HistogramImpl* file_read_hist, bool skip_filters,
int level,
bool prefetch_index_and_filter_in_cache) {
int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) {
PERF_TIMER_GUARD_WITH_ENV(find_table_nanos, ioptions_.env);
Status s;
uint64_t number = fd.GetNumber();
Expand All @@ -170,7 +172,8 @@ Status TableCache::FindTable(const FileOptions& file_options,
s = GetTableReader(file_options, internal_comparator, fd,
false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader, prefix_extractor,
skip_filters, level, prefetch_index_and_filter_in_cache);
skip_filters, level, prefetch_index_and_filter_in_cache,
max_file_size_for_l0_meta_pin);
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
Expand All @@ -194,6 +197,7 @@ InternalIterator* TableCache::NewIterator(
RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor,
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
TableReaderCaller caller, Arena* arena, bool skip_filters, int level,
size_t max_file_size_for_l0_meta_pin,
const InternalKey* smallest_compaction_key,
const InternalKey* largest_compaction_key, bool allow_unprepared_value) {
PERF_TIMER_GUARD(new_table_iterator_nanos);
Expand All @@ -211,7 +215,9 @@ InternalIterator* TableCache::NewIterator(
s = FindTable(file_options, icomparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record_read_stats */, file_read_hist,
skip_filters, level);
skip_filters, level,
true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(handle);
}
Expand Down Expand Up @@ -372,7 +378,7 @@ Status TableCache::Get(const ReadOptions& options,
GetContext* get_context,
const SliceTransform* prefix_extractor,
HistogramImpl* file_read_hist, bool skip_filters,
int level) {
int level, size_t max_file_size_for_l0_meta_pin) {
auto& fd = file_meta.fd;
std::string* row_cache_entry = nullptr;
bool done = false;
Expand All @@ -397,10 +403,12 @@ Status TableCache::Get(const ReadOptions& options,
Cache::Handle* handle = nullptr;
if (!done && s.ok()) {
if (t == nullptr) {
s = FindTable(
file_options_, internal_comparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, level);
s = FindTable(file_options_, internal_comparator, fd, &handle,
prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
}
Expand Down
11 changes: 7 additions & 4 deletions db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ class TableCache {
const FileMetaData& file_meta, RangeDelAggregator* range_del_agg,
const SliceTransform* prefix_extractor, TableReader** table_reader_ptr,
HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena,
bool skip_filters, int level, const InternalKey* smallest_compaction_key,
bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin,
const InternalKey* smallest_compaction_key,
const InternalKey* largest_compaction_key, bool allow_unprepared_value);

// If a seek to internal key "k" in specified file finds an entry,
Expand All @@ -91,7 +92,7 @@ class TableCache {
GetContext* get_context,
const SliceTransform* prefix_extractor = nullptr,
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
int level = -1);
int level = -1, size_t max_file_size_for_l0_meta_pin = 0);

// Return the range delete tombstone iterator of the file specified by
// `file_meta`.
Expand Down Expand Up @@ -135,7 +136,8 @@ class TableCache {
const bool no_io = false, bool record_read_stats = true,
HistogramImpl* file_read_hist = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true);
bool prefetch_index_and_filter_in_cache = true,
size_t max_file_size_for_l0_meta_pin = 0);

// Get TableReader from a cache handle.
TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
Expand Down Expand Up @@ -200,7 +202,8 @@ class TableCache {
std::unique_ptr<TableReader>* table_reader,
const SliceTransform* prefix_extractor = nullptr,
bool skip_filters = false, int level = -1,
bool prefetch_index_and_filter_in_cache = true);
bool prefetch_index_and_filter_in_cache = true,
size_t max_file_size_for_l0_meta_pin = 0);

// Create a key prefix for looking up the row cache. The prefix is of the
// format row_cache_id + fd_number + seq_no. Later, the user key can be
Expand Down
14 changes: 8 additions & 6 deletions db/version_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,8 @@ class VersionBuilder::Rep {
Status LoadTableHandlers(InternalStats* internal_stats, int max_threads,
bool prefetch_index_and_filter_in_cache,
bool is_initial_load,
const SliceTransform* prefix_extractor) {
const SliceTransform* prefix_extractor,
size_t max_file_size_for_l0_meta_pin) {
assert(table_cache_ != nullptr);

size_t table_cache_capacity = table_cache_->get_cache()->GetCapacity();
Expand Down Expand Up @@ -823,7 +824,7 @@ class VersionBuilder::Rep {
file_meta->fd, &file_meta->table_reader_handle, prefix_extractor,
false /*no_io */, true /* record_read_stats */,
internal_stats->GetFileReadHist(level), false, level,
prefetch_index_and_filter_in_cache);
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin);
if (file_meta->table_reader_handle != nullptr) {
// Load table_reader
file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle(
Expand Down Expand Up @@ -882,10 +883,11 @@ Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) {
Status VersionBuilder::LoadTableHandlers(
InternalStats* internal_stats, int max_threads,
bool prefetch_index_and_filter_in_cache, bool is_initial_load,
const SliceTransform* prefix_extractor) {
return rep_->LoadTableHandlers(internal_stats, max_threads,
prefetch_index_and_filter_in_cache,
is_initial_load, prefix_extractor);
const SliceTransform* prefix_extractor,
size_t max_file_size_for_l0_meta_pin) {
return rep_->LoadTableHandlers(
internal_stats, max_threads, prefetch_index_and_filter_in_cache,
is_initial_load, prefix_extractor, max_file_size_for_l0_meta_pin);
}

BaseReferencedVersionBuilder::BaseReferencedVersionBuilder(
Expand Down
3 changes: 2 additions & 1 deletion db/version_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class VersionBuilder {
Status LoadTableHandlers(InternalStats* internal_stats, int max_threads,
bool prefetch_index_and_filter_in_cache,
bool is_initial_load,
const SliceTransform* prefix_extractor);
const SliceTransform* prefix_extractor,
size_t max_file_size_for_l0_meta_pin);

private:
class Rep;
Expand Down
3 changes: 2 additions & 1 deletion db/version_edit_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,8 @@ Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd,
cfd->internal_stats(),
version_set_->db_options_->max_file_opening_threads,
prefetch_index_and_filter_in_cache, is_initial_load,
cfd->GetLatestMutableCFOptions()->prefix_extractor.get());
cfd->GetLatestMutableCFOptions()->prefix_extractor.get(),
MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions()));
if ((s.IsPathNotFound() || s.IsCorruption()) &&
no_error_if_table_files_missing_) {
s = Status::OK();
Expand Down
Loading

0 comments on commit 02db03a

Please sign in to comment.