Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/ut_blobstorage/restart_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Y_UNIT_TEST_SUITE(BSCRestartPDisk) {
auto& diskId = it->first;

NKikimrBlobStorage::TConfigRequest request;
request.SetIgnoreDegradedGroupsChecks(true);

NKikimrBlobStorage::TRestartPDisk* cmd = request.AddCommand()->MutableRestartPDisk();
auto pdiskId = cmd->MutableTargetPDiskId();
Expand All @@ -50,7 +51,7 @@ Y_UNIT_TEST_SUITE(BSCRestartPDisk) {
// Restarting third disk will not be allowed.
UNIT_ASSERT_C(!response.GetSuccess(), "Restart should've been prohibited");

UNIT_ASSERT_STRING_CONTAINS(response.GetErrorDescription(), "ExpectedStatus# DISINTEGRATED");
UNIT_ASSERT_STRING_CONTAINS(response.GetErrorDescription(), "Disintegrated");
break;
}
}
Expand Down
1 change: 1 addition & 0 deletions ydb/core/cms/sentinel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,7 @@ class TSentinel: public TActorBootstrapped<TSentinel> {
command.SetPDiskId(id.DiskId);
command.SetStatus(info->GetStatus());
}
request->Record.MutableRequest()->SetIgnoreDisintegratedGroupsChecks(true);

NTabletPipe::SendData(SelfId(), CmsState->BSControllerPipe, request.Release(), ++SentinelState->ChangeRequestId);
}
Expand Down
5 changes: 4 additions & 1 deletion ydb/core/mind/bscontroller/cmds_box.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,11 @@ namespace NKikimr::NBsController {

for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) {
if (slot->Group) {
auto *m = VSlots.FindForUpdate(slot->VSlotId);
m->Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
m->IsReady = false;
TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID);

GroupFailureModelChanged.insert(slot->Group->ID);
group->CalculateGroupStatus();
}
}
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/bscontroller/cmds_drive_status.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ namespace NKikimr::NBsController {
for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) {
if (slot->Group) {
TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID);
GroupFailureModelChanged.insert(group->ID);
group->CalculateGroupStatus();
}
}
Expand Down
48 changes: 38 additions & 10 deletions ydb/core/mind/bscontroller/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ namespace NKikimr::NBsController {

bool TBlobStorageController::CommitConfigUpdates(TConfigState& state, bool suppressFailModelChecking,
bool suppressDegradedGroupsChecking, bool suppressDisintegratedGroupsChecking,
TTransactionContext& txc, TString *errorDescription) {
TTransactionContext& txc, TString *errorDescription, NKikimrBlobStorage::TConfigResponse *response) {
NIceDb::TNiceDb db(txc.DB);

for (TGroupId groupId : state.GroupContentChanged) {
Expand All @@ -309,16 +309,20 @@ namespace NKikimr::NBsController {
}
}

bool errors = false;
std::vector<TGroupId> disintegratedByExpectedStatus;
std::vector<TGroupId> disintegrated;
std::vector<TGroupId> degraded;

if (!suppressDisintegratedGroupsChecking) {
for (auto&& [base, overlay] : state.Groups.Diff()) {
if (base && overlay->second) {
const TGroupInfo::TGroupStatus& prev = base->second->Status;
const TGroupInfo::TGroupStatus& status = overlay->second->Status;
if (status.ExpectedStatus == NKikimrBlobStorage::TGroupStatus::DISINTEGRATED &&
status.ExpectedStatus != prev.ExpectedStatus) { // status did really change
*errorDescription = TStringBuilder() << "GroupId# " << overlay->first
<< " ExpectedStatus# DISINTEGRATED";
return false;
disintegratedByExpectedStatus.push_back(overlay->first);
errors = true;
}
}
}
Expand All @@ -340,20 +344,44 @@ namespace NKikimr::NBsController {
// check the failure model
auto& checker = *topology.QuorumChecker;
if (!checker.CheckFailModelForGroup(failed)) {
*errorDescription = TStringBuilder() << "GroupId# " << groupId
<< " may lose data while modifying group";
return false;
disintegrated.push_back(groupId);
errors = true;
} else if (!suppressDegradedGroupsChecking && checker.IsDegraded(failed)) {
*errorDescription = TStringBuilder() << "GroupId# " << groupId
<< " may become DEGRADED while modifying group";
return false;
degraded.push_back(groupId);
errors = true;
}
} else {
Y_ABORT_UNLESS(group); // group must exist
}
}
}

if (errors) {
TStringStream msg;
if (!degraded.empty()) {
msg << "Degraded GroupIds# " << FormatList(degraded) << ' ';
if (response) {
response->MutableGroupsGetDegraded()->Add(degraded.begin(), degraded.end());
}
}
if (!disintegrated.empty()) {
msg << "Disintegrated GroupIds# " << FormatList(disintegrated) << ' ';
if (response) {
response->MutableGroupsGetDisintegrated()->Add(disintegrated.begin(), disintegrated.end());
}
}
if (!disintegratedByExpectedStatus.empty()) {
msg << "DisintegratedByExpectedStatus GroupIds# " << FormatList(disintegratedByExpectedStatus) << ' ';
if (response) {
response->MutableGroupsGetDisintegratedByExpectedStatus()->Add(disintegratedByExpectedStatus.begin(),
disintegratedByExpectedStatus.end());
}
}
*errorDescription = msg.Str();
errorDescription->pop_back();
return false;
}

// trim PDisks awaiting deletion
for (const TPDiskId& pdiskId : state.PDisksToRemove) {
TPDiskInfo *pdiskInfo = state.PDisks.FindForUpdate(pdiskId);
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/mind/bscontroller/config_cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ namespace NKikimr::NBsController {

const bool doLogCommand = Success && State->Changed();
Success = Success && Self->CommitConfigUpdates(*State, Cmd.GetIgnoreGroupFailModelChecks(),
Cmd.GetIgnoreDegradedGroupsChecks(), Cmd.GetIgnoreDisintegratedGroupsChecks(), txc, &Error);
Cmd.GetIgnoreDegradedGroupsChecks(), Cmd.GetIgnoreDisintegratedGroupsChecks(), txc, &Error,
Response);

Finish();
if (doLogCommand) {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/bscontroller/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <ydb/core/base/tablet_pipe.h>
#include <ydb/core/blobstorage/base/blobstorage_events.h>
#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/blobstorage/base/utility.h>
#include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h>
#include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_blobmap.h>
#include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_sets.h>
Expand Down
9 changes: 5 additions & 4 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,12 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
switch (Status) {
case NKikimrBlobStorage::EDriveStatus::UNKNOWN:
case NKikimrBlobStorage::EDriveStatus::BROKEN:
return false;

case NKikimrBlobStorage::EDriveStatus::ACTIVE:
case NKikimrBlobStorage::EDriveStatus::INACTIVE:
case NKikimrBlobStorage::EDriveStatus::FAULTY:
case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED:
return false;

case NKikimrBlobStorage::EDriveStatus::ACTIVE:
return true;

case NKikimrBlobStorage::EDriveStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_:
Expand Down Expand Up @@ -1572,7 +1572,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
void UpdateSystemViews();

bool CommitConfigUpdates(TConfigState& state, bool suppressFailModelChecking, bool suppressDegradedGroupsChecking,
bool suppressDisintegratedGroupsChecking, TTransactionContext& txc, TString *errorDescription);
bool suppressDisintegratedGroupsChecking, TTransactionContext& txc, TString *errorDescription,
NKikimrBlobStorage::TConfigResponse *response = nullptr);

void CommitSelfHealUpdates(TConfigState& state);
void CommitScrubUpdates(TConfigState& state, TTransactionContext& txc);
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/bscontroller/virtual_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ namespace NKikimr::NBsController {
group->SeenOperational = true;
}

GroupFailureModelChanged.insert(group->ID);
group->CalculateGroupStatus();

NKikimrBlobDepot::TBlobDepotConfig config;
Expand Down Expand Up @@ -127,6 +128,7 @@ namespace NKikimr::NBsController {
group->HiveId = cmd.HasHiveId() ? MakeMaybe(cmd.GetHiveId()) : Nothing();
group->Database = cmd.HasDatabase() ? MakeMaybe(cmd.GetDatabase()) : Nothing();
group->NeedAlter = true;
GroupFailureModelChanged.insert(group->ID);
group->CalculateGroupStatus();

NKikimrBlobDepot::TBlobDepotConfig config;
Expand Down
3 changes: 3 additions & 0 deletions ydb/core/protos/blobstorage_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -771,4 +771,7 @@ message TConfigResponse {
bool Success = 2;
string ErrorDescription = 3;
uint64 ConfigTxSeqNo = 4;
repeated uint32 GroupsGetDegraded = 5;
repeated uint32 GroupsGetDisintegrated = 6;
repeated uint32 GroupsGetDisintegratedByExpectedStatus = 7;
}