Skip to content

Commit

Permalink
Remove the return value of SetBGError() (facebook#12792)
Browse files Browse the repository at this point in the history
Summary:
the return value for `ErrorHandler::SetBGError(error)` seems to be not well-defined, it can be `bg_error_` (no matter if the `bg_error_` is set to the input error), ok status or [`recovery_error_`](https://github.com/facebook/rocksdb/blob/3ee4d5a11a882056b341a9a1694a71371a39f664/db/error_handler.cc#L669) from `StartRecoverFromRetryableBGIOError()`.  The `recovery_error_` returned may be an OK status.

We have only a few places that use the return value of  `SetBGError()` and they don't need to do so. Using the return value may even be wrong for example in https://github.com/facebook/rocksdb/blob/3ee4d5a11a882056b341a9a1694a71371a39f664/db/db_impl/db_impl_write.cc#L2365 where a non-ok `s` could be overwritten to OK. This PR changes SetBGError() to return void and clean up relevant code.

Pull Request resolved: facebook#12792

Test Plan: existing unit tests and go over all places where return value of `SetBGError()` is used.

Reviewed By: hx235

Differential Revision: D58904898

Pulled By: cbi42

fbshipit-source-id: d58a20ba5a40e3f35367c6034a32c755088c3653
  • Loading branch information
cbi42 authored and facebook-github-bot committed Jun 27, 2024
1 parent 0d93c8a commit a31fe52
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 51 deletions.
12 changes: 6 additions & 6 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
if (!s.ok()) {
io_s = versions_->io_status();
if (!io_s.ok()) {
s = error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
}
}
}
Expand Down Expand Up @@ -916,8 +916,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
if (!s.ok() && versions_->io_status().IsIOError()) {
s = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
}
}

Expand Down Expand Up @@ -1724,8 +1724,8 @@ Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
read_options, write_options, synced_wals, &mutex_,
directories_.GetDbDir());
if (!status.ok() && versions_->io_status().IsIOError()) {
status = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
}
return status;
}
Expand Down
7 changes: 3 additions & 4 deletions db/db_impl/db_impl_write.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1204,8 +1204,7 @@ void DBImpl::MemTableInsertStatusCheck(const Status& status) {
mutex_.Lock();
assert(!error_handler_.IsBGWorkStopped());
// Maybe change the return status to void?
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
.PermitUncheckedError();
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable);
mutex_.Unlock();
}
}
Expand Down Expand Up @@ -2363,8 +2362,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
read_options, write_options, &wal_deletion, &mutex_,
directories_.GetDbDir());
if (!s.ok() && versions_->io_status().IsIOError()) {
s = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
}
if (!s.ok()) {
return s;
Expand Down
62 changes: 27 additions & 35 deletions db/error_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() {
EndAutoRecovery();
}

STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};

// This is the main function for looking at an error during a background
// operation and deciding the severity, and error recovery strategy. The high
// level algorithm is as follows -
Expand All @@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
// This can also get called as part of a recovery operation. In that case, we
// also track the error separately in recovery_error_ so we can tell in the
// end whether recovery succeeded or not
const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
BackgroundErrorReason reason) {
void ErrorHandler::HandleKnownErrors(const Status& bg_err,
BackgroundErrorReason reason) {
db_mutex_->AssertHeld();
if (bg_err.ok()) {
return kOkStatus;
return;
}

ROCKS_LOG_INFO(db_options_.info_log,
Expand Down Expand Up @@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
} else {
// This error is less severe than previously encountered error. Don't
// take any further action
return bg_error_;
return;
}
}

Expand All @@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
if (bg_error_.severity() >= Status::Severity::kHardError) {
is_db_stopped_.store(true, std::memory_order_release);
}
return bg_error_;
}

// This is the main function for looking at IO related error during the
Expand All @@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
// BackgroundErrorReason reason) will be called to handle other error cases
// such as delegating to SstFileManager to handle no space error.
const Status& ErrorHandler::SetBGError(const Status& bg_status,
BackgroundErrorReason reason) {
void ErrorHandler::SetBGError(const Status& bg_status,
BackgroundErrorReason reason) {
db_mutex_->AssertHeld();
Status tmp_status = bg_status;
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));

if (bg_io_err.ok()) {
return kOkStatus;
return;
}
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
bg_io_err.ToString().c_str());
Expand All @@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
&bg_err, db_mutex_, &auto_recovery);
recover_context_ = context;
return bg_error_;
} else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
(bg_io_err.GetScope() ==
IOStatus::IOErrorScope::kIOErrorScopeFile ||
bg_io_err.GetRetryable())) {
return;
}
if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
(bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
bg_io_err.GetRetryable())) {
// Second, check if the error is a retryable IO error (file scope IO error
// is also treated as retryable IO error in RocksDB write path). if it is
// retryable error and its severity is higher than bg_error_, overwrite the
Expand Down Expand Up @@ -447,7 +444,7 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
"ErrorHandler: Compaction will schedule by itself to resume\n");
// Not used in this code path.
new_bg_io_err.PermitUncheckedError();
return bg_error_;
return;
}

Status::Severity severity;
Expand All @@ -469,10 +466,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
Status bg_err(new_bg_io_err, severity);
CheckAndSetRecoveryAndBGError(bg_err);
recover_context_ = context;
return StartRecoverFromRetryableBGIOError(bg_io_err);
} else {
return HandleKnownErrors(new_bg_io_err, reason);
StartRecoverFromRetryableBGIOError(bg_io_err);
return;
}

HandleKnownErrors(new_bg_io_err, reason);
}

void ErrorHandler::AddFilesToQuarantine(
Expand Down Expand Up @@ -620,23 +618,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
return s;
}

const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
void ErrorHandler::StartRecoverFromRetryableBGIOError(
const IOStatus& io_error) {
db_mutex_->AssertHeld();
if (bg_error_.ok()) {
return bg_error_;
} else if (io_error.ok()) {
return kOkStatus;
} else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
// Auto resume BG error is not enabled, directly return bg_error_.
return bg_error_;
} else if (end_recovery_) {
if (bg_error_.ok() || io_error.ok()) {
return;
}
if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
// Auto resume BG error is not enabled
return;
}
if (end_recovery_) {
// Can temporarily release db mutex
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
Status::ShutdownInProgress(),
db_mutex_);
db_mutex_->AssertHeld();
return bg_error_;
return;
}
RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
ROCKS_LOG_INFO(
Expand Down Expand Up @@ -664,12 +662,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(

recovery_thread_.reset(
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));

if (recovery_error_.ok()) {
return recovery_error_;
} else {
return bg_error_;
}
}

// Automatic recover from Retryable BG IO error. Must be called after db
Expand Down
7 changes: 3 additions & 4 deletions db/error_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class ErrorHandler {
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
Status::Code code, Status::SubCode subcode);

const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
void SetBGError(const Status& bg_err, BackgroundErrorReason reason);

Status GetBGError() const { return bg_error_; }

Expand Down Expand Up @@ -135,11 +135,10 @@ class ErrorHandler {
// unsorted.
autovector<uint64_t> files_to_quarantine_;

const Status& HandleKnownErrors(const Status& bg_err,
BackgroundErrorReason reason);
void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
void RecoverFromNoSpace();
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
void RecoverFromRetryableBGIOError();
// First, if it is in recovery and the recovery_error is ok. Set the
// recovery_error_ to bg_err. Second, if the severity is higher than the
Expand Down
8 changes: 6 additions & 2 deletions db/error_handler_fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -851,13 +851,17 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) {
});
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_TRUE(s.IsNoSpace());
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
fault_fs_->SetFilesystemActive(true);

// This Resume() will attempt to create a new manifest file and fail again
s = dbfull()->Resume();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_TRUE(s.IsNoSpace());
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
fault_fs_->SetFilesystemActive(true);
SyncPoint::GetInstance()->ClearAllCallBacks();
Expand Down

0 comments on commit a31fe52

Please sign in to comment.