Skip to content

Commit

Permalink
Ignore kBottommostFiles compaction logic when allow_ingest_behind (#1…
Browse files Browse the repository at this point in the history
…0767)

Summary:
fix for #10752 where RocksDB could be in an infinite compaction loop (with compaction reason kBottommostFiles)  if allow_ingest_behind is enabled and the bottommost level is unfilled.

Pull Request resolved: #10767

Test Plan: Added a unit test to reproduce the compaction loop.

Reviewed By: ajkr

Differential Revision: D40031861

Pulled By: ajkr

fbshipit-source-id: 71c4b02931fbe507a847632905404c9b8fa8c96b
  • Loading branch information
cbi42 authored and facebook-github-bot committed Oct 5, 2022
1 parent 00d697b commit eca47fb
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 14 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Fixed a bug in iterator refresh which could segfault for DeleteRange users (#10739).
* Fixed a bug causing manual flush with `flush_opts.wait=false` to stall when database has stopped all writes (#10001).
* Fixed a bug in iterator refresh that was not freeing up SuperVersion, which could cause excessive resource pinniung (#10770).
* Fixed a bug where RocksDB could be doing compaction endlessly when allow_ingest_behind is true and the bottommost level is not filled (#10767).

### Performance Improvements
* Try to align the compaction output file boundaries to the next level ones, which can reduce more than 10% compaction load for the default level compaction. The feature is enabled by default, to disable, set `AdvancedColumnFamilyOptions.level_compaction_dynamic_file_size` to false. As a side effect, it can create SSTs larger than the target_file_size (capped at 2x target_file_size) or smaller files.
Expand Down
36 changes: 36 additions & 0 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8194,6 +8194,42 @@ TEST_F(DBCompactionTest, BottomPriCompactionCountsTowardConcurrencyLimit) {
compact_range_thread.join();
}

TEST_F(DBCompactionTest, BottommostFileCompactionAllowIngestBehind) {
// allow_ingest_behind prevents seqnum zeroing, and could cause
// compaction loop with reason kBottommostFiles.
Options options = CurrentOptions();
options.env = env_;
options.compaction_style = kCompactionStyleLevel;
options.allow_ingest_behind = true;
options.comparator = BytewiseComparator();
DestroyAndReopen(options);

WriteOptions write_opts;
ASSERT_OK(db_->Put(write_opts, "infinite", "compaction loop"));
ASSERT_OK(db_->Put(write_opts, "infinite", "loop"));

ASSERT_OK(Flush());
MoveFilesToLevel(1);
ASSERT_OK(db_->Put(write_opts, "bumpseqnum", ""));
ASSERT_OK(Flush());
auto snapshot = db_->GetSnapshot();
// Bump up oldest_snapshot_seqnum_ in VersionStorageInfo.
db_->ReleaseSnapshot(snapshot);
bool compacted = false;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"LevelCompactionPicker::PickCompaction:Return", [&](void* /* arg */) {
// There should not be a compaction.
compacted = true;
});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// Wait for compaction to be scheduled.
env_->SleepForMicroseconds(2000000);
ASSERT_FALSE(compacted);
// The following assert can be used to check for compaction loop:
// it used to wait forever before the fix.
// ASSERT_OK(dbfull()->TEST_WaitForCompact(true /* wait_unscheduled */));
}

#endif // !defined(ROCKSDB_LITE)

} // namespace ROCKSDB_NAMESPACE
Expand Down
21 changes: 12 additions & 9 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3674,14 +3674,16 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
if (oldest_snapshot > bottommost_files_mark_threshold_) {
CfdList cf_scheduled;
for (auto* cfd : *versions_->GetColumnFamilySet()) {
cfd->current()->storage_info()->UpdateOldestSnapshot(oldest_snapshot);
if (!cfd->current()
->storage_info()
->BottommostFilesMarkedForCompaction()
.empty()) {
SchedulePendingCompaction(cfd);
MaybeScheduleFlushOrCompaction();
cf_scheduled.push_back(cfd);
if (!cfd->ioptions()->allow_ingest_behind) {
cfd->current()->storage_info()->UpdateOldestSnapshot(oldest_snapshot);
if (!cfd->current()
->storage_info()
->BottommostFilesMarkedForCompaction()
.empty()) {
SchedulePendingCompaction(cfd);
MaybeScheduleFlushOrCompaction();
cf_scheduled.push_back(cfd);
}
}
}

Expand All @@ -3690,7 +3692,8 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
// mutex might be unlocked during the loop, making the result inaccurate.
SequenceNumber new_bottommost_files_mark_threshold = kMaxSequenceNumber;
for (auto* cfd : *versions_->GetColumnFamilySet()) {
if (CfdListContains(cf_scheduled, cfd)) {
if (CfdListContains(cf_scheduled, cfd) ||
cfd->ioptions()->allow_ingest_behind) {
continue;
}
new_bottommost_files_mark_threshold = std::min(
Expand Down
8 changes: 5 additions & 3 deletions db/db_impl/db_impl_compaction_flush.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3772,9 +3772,11 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
// triggered soon anyway.
bottommost_files_mark_threshold_ = kMaxSequenceNumber;
for (auto* my_cfd : *versions_->GetColumnFamilySet()) {
bottommost_files_mark_threshold_ = std::min(
bottommost_files_mark_threshold_,
my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
if (!my_cfd->ioptions()->allow_ingest_behind) {
bottommost_files_mark_threshold_ = std::min(
bottommost_files_mark_threshold_,
my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
}
}

// Whenever we install new SuperVersion, we might need to issue new flushes or
Expand Down
8 changes: 6 additions & 2 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2915,7 +2915,9 @@ void VersionStorageInfo::PrepareForVersionAppend(
GenerateFileIndexer();
GenerateLevelFilesBrief();
GenerateLevel0NonOverlapping();
GenerateBottommostFiles();
if (!immutable_options.allow_ingest_behind) {
GenerateBottommostFiles();
}
GenerateFileLocationIndex();
}

Expand Down Expand Up @@ -3355,7 +3357,9 @@ void VersionStorageInfo::ComputeCompactionScore(
}
}
ComputeFilesMarkedForCompaction();
ComputeBottommostFilesMarkedForCompaction();
if (!immutable_options.allow_ingest_behind) {
ComputeBottommostFilesMarkedForCompaction();
}
if (mutable_cf_options.ttl > 0) {
ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl);
}
Expand Down

0 comments on commit eca47fb

Please sign in to comment.