Skip to content

Commit

Permalink
Do not set bg error for compaction in retryable IO Error case (facebo…
Browse files Browse the repository at this point in the history
…ok#7899)

Summary:
When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper.

Pull Request resolved: facebook#7899

Test Plan: tested with error_handler_fs_test

Reviewed By: anand1976

Differential Revision: D26094097

Pulled By: zhichao-cao

fbshipit-source-id: c53424f11d237405592cd762f43cbbdf8da8234f
  • Loading branch information
zhichao-cao authored and facebook-github-bot committed Jan 28, 2021
1 parent 19210d5 commit 95013df
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 12 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Rocksdb Change Log
## Unreleased
### Behavior Changes
* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper.

## 6.17.0 (01/15/2021)
### Behavior Changes
Expand Down
3 changes: 3 additions & 0 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,9 @@ class DBImpl : public DB {
// is only for the special test of CancelledCompactions
Status TEST_WaitForCompact(bool waitUnscheduled = false);

// Get the background error status
Status TEST_GetBGError();

// Return the maximum overlapping data (in bytes) at next level for any
// file at a level >= 1.
int64_t TEST_MaxNextLevelOverlappingBytes(
Expand Down
5 changes: 5 additions & 0 deletions db/db_impl/db_impl_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ Status DBImpl::TEST_WaitForCompact(bool wait_unscheduled) {
return error_handler_.GetBGError();
}

Status DBImpl::TEST_GetBGError() {
InstrumentedMutexLock l(&mutex_);
return error_handler_.GetBGError();
}

void DBImpl::TEST_LockMutex() { mutex_.Lock(); }

void DBImpl::TEST_UnlockMutex() { mutex_.Unlock(); }
Expand Down
10 changes: 5 additions & 5 deletions db/error_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,11 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
&new_bg_io_err, db_mutex_,
&auto_recovery);
if (BackgroundErrorReason::kCompaction == reason) {
Status bg_err(new_bg_io_err, Status::Severity::kSoftError);
if (bg_err.severity() > bg_error_.severity()) {
bg_error_ = bg_err;
}
recover_context_ = context;
// We map the retryable IO error during compaction to soft error. Since
// compaction can reschedule by itself. We will not set the BG error in
// this case
// TODO: a better way to set or clean the retryable IO error which
// happens during compaction SST file write.
return bg_error_;
} else if (BackgroundErrorReason::kFlushNoWAL == reason ||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
Expand Down
18 changes: 11 additions & 7 deletions db/error_handler_fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -892,15 +892,17 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableError) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"CompactionJob::OpenCompactionOutputFile",
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:Finish",
[&](void*) { CancelAllBackgroundWork(dbfull()); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

ASSERT_OK(Put(Key(1), "val"));
s = Flush();
ASSERT_OK(s);

s = dbfull()->TEST_WaitForCompact();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);

s = dbfull()->TEST_GetBGError();
ASSERT_OK(s);
fault_fs_->SetFilesystemActive(true);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
Expand Down Expand Up @@ -940,14 +942,17 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteFileScopeError) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"CompactionJob::OpenCompactionOutputFile",
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:Finish",
[&](void*) { CancelAllBackgroundWork(dbfull()); });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

ASSERT_OK(Put(Key(1), "val"));
s = Flush();
ASSERT_OK(s);

s = dbfull()->TEST_WaitForCompact();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
s = dbfull()->TEST_GetBGError();
ASSERT_OK(s);

fault_fs_->SetFilesystemActive(true);
SyncPoint::GetInstance()->ClearAllCallBacks();
Expand Down Expand Up @@ -2198,8 +2203,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableErrorAutoRecover) {
ASSERT_OK(s);

s = dbfull()->TEST_WaitForCompact();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);

ASSERT_OK(s);
TEST_SYNC_POINT("CompactionWriteRetryableErrorAutoRecover0");
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
Expand Down

0 comments on commit 95013df

Please sign in to comment.