Skip to content

Commit

Permalink
Fix flush no wal IO error bug (facebook#8107)
Browse files Browse the repository at this point in the history
Summary:
There is bug in the current code base introduced in facebook#8049 , we still set the SST file write IO Error only case as hard error. Fix it by removing the logic.

Pull Request resolved: facebook#8107

Test Plan: make check, error_handler_fs_test

Reviewed By: anand1976

Differential Revision: D27321422

Pulled By: zhichao-cao

fbshipit-source-id: c014afc1553ca66b655e3bbf9d0bf6eb417ccf94
  • Loading branch information
zhichao-cao authored and facebook-github-bot committed Mar 26, 2021
1 parent 711881b commit af80a78
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 48 deletions.
40 changes: 18 additions & 22 deletions db/db_impl/db_impl_compaction_flush.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,18 +260,16 @@ Status DBImpl::FlushMemTableToOutputFile(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0 || !log_io_s.ok()) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the Manifest write will be map to soft error.
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is
// needed.
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
} else {
// If the WAL is empty, we use different error reason
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the other SST file write errors will be set as
// kFlushNoWAL.
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {
Expand Down Expand Up @@ -687,18 +685,16 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the Manifest write will be map to soft error.
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor
// is needed.
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
} else {
// If the WAL is empty, we use different error reason
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the other SST file write errors will be set as
// kFlushNoWAL.
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {
Expand Down
52 changes: 26 additions & 26 deletions db/error_handler_fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand All @@ -242,7 +242,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand All @@ -256,7 +256,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand Down Expand Up @@ -292,7 +292,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand All @@ -306,7 +306,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand All @@ -320,7 +320,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand All @@ -340,7 +340,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
Expand Down Expand Up @@ -649,7 +649,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
Expand Down Expand Up @@ -695,7 +695,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
Expand Down Expand Up @@ -1698,7 +1698,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) {
// to soft error and trigger auto resume. During auto resume, SwitchMemtable
// is disabled to avoid small SST tables. Write can still be applied before
// the bg error is cleaned unless the memtable is full.
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Expand Down Expand Up @@ -1768,7 +1768,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Expand Down Expand Up @@ -1810,14 +1810,14 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ASSERT_LE(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ASSERT_LE(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
HistogramData autoresume_retry;
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
&autoresume_retry);
ASSERT_EQ(autoresume_retry.max, 1);
ASSERT_GE(autoresume_retry.max, 0);
ASSERT_OK(Put(Key(2), "val2", wo));
s = Flush();
// Since auto resume is successful, the bg error is cleaned, flush will
Expand All @@ -1827,7 +1827,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover1) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover1) {
// Fail the first resume and make the second resume successful
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Expand Down Expand Up @@ -1876,7 +1876,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover1) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Expand All @@ -1901,7 +1901,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {

SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
ASSERT_EQ(listener->WaitForRecovery(5000000), true);
Expand All @@ -1916,7 +1916,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) {
// Fail all the resume and let user to resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Expand Down Expand Up @@ -1945,7 +1945,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:1");
fault_fs_->SetFilesystemActive(true);
Expand All @@ -1965,7 +1965,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover4) {
// Fail the first resume and does not do resume second time because
// the IO error severity is Fatal Error and not Retryable.
std::shared_ptr<ErrorHandlerFSListener> listener(
Expand Down Expand Up @@ -2001,7 +2001,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {

SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:2");
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
Expand All @@ -2025,7 +2025,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover5) {
// During the resume, call DB->CLose, make sure the resume thread exist
// before close continues. Due to the shutdown, the resume is not successful
// and the FS does not become active, so close status is still IO error
Expand Down Expand Up @@ -2054,7 +2054,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover5:0");
// The first resume will cause recovery_error and its severity is the
// Fatal error
Expand All @@ -2074,7 +2074,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
Destroy(options);
}

TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover6) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover6) {
// During the resume, call DB->CLose, make sure the resume thread exist
// before close continues. Due to the shutdown, the resume is not successful
// and the FS does not become active, so close status is still IO error
Expand Down Expand Up @@ -2109,7 +2109,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover6) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:1");
fault_fs_->SetFilesystemActive(true);
Expand Down Expand Up @@ -2168,7 +2168,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("ManifestWriteRetryableErrorAutoRecover:0");
fault_fs_->SetFilesystemActive(true);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
Expand Down

0 comments on commit af80a78

Please sign in to comment.