Skip to content

Commit 9bdd408

Browse files
authored
Merge 8452df1 into b949049
2 parents b949049 + 8452df1 commit 9bdd408

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

ydb/core/blobstorage/vdisk/syncer/blobstorage_syncer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,6 @@ namespace NKikimr {
577577
GInfo = msg->NewInfo;
578578

579579
// reconfigure guid recovery actor
580-
Y_ABORT_UNLESS(RecoverLostDataId != TActorId());
581580
ctx.Send(RecoverLostDataId, msg->Clone());
582581
}
583582

ydb/core/mind/bscontroller/self_heal.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ namespace NKikimr::NBsController {
4343
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
4444
TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks;
4545
const bool IsSelfHealReasonDecommit;
46+
const bool IgnoreDegradedGroupsChecks;
4647
const bool DonorMode;
4748
THashSet<TVDiskID> PendingVDisks;
4849
THashMap<TActorId, TVDiskID> ActorToDiskMap;
@@ -51,14 +52,15 @@ namespace NKikimr::NBsController {
5152
public:
5253
TReassignerActor(TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group,
5354
std::optional<TVDiskID> vdiskToReplace, std::shared_ptr<TBlobStorageGroupInfo::TTopology> topology,
54-
bool isSelfHealReasonDecommit, bool donorMode)
55+
bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode)
5556
: ControllerId(controllerId)
5657
, GroupId(groupId)
5758
, Group(std::move(group))
5859
, VDiskToReplace(vdiskToReplace)
5960
, Topology(std::move(topology))
6061
, FailedGroupDisks(Topology.get())
6162
, IsSelfHealReasonDecommit(isSelfHealReasonDecommit)
63+
, IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks)
6264
, DonorMode(donorMode)
6365
{}
6466

@@ -166,6 +168,7 @@ namespace NKikimr::NBsController {
166168
request->SetIgnoreGroupReserve(true);
167169
request->SetSettleOnlyOnOperationalDisks(true);
168170
request->SetIsSelfHealReasonDecommit(IsSelfHealReasonDecommit);
171+
request->SetIgnoreDegradedGroupsChecks(IgnoreDegradedGroupsChecks);
169172
request->SetAllowUnusableDisks(true);
170173
if (VDiskToReplace) {
171174
ev->SelfHeal = true;
@@ -427,9 +430,11 @@ namespace NKikimr::NBsController {
427430

428431
// check if it is possible to move anything out
429432
bool isSelfHealReasonDecommit;
430-
if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit)) {
433+
bool ignoreDegradedGroupsChecks;
434+
if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit,
435+
&ignoreDegradedGroupsChecks)) {
431436
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
432-
*v, group.Topology, isSelfHealReasonDecommit, DonorMode));
437+
*v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
433438
} else {
434439
++counter; // this group can't be reassigned right now
435440

@@ -484,7 +489,8 @@ namespace NKikimr::NBsController {
484489
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog,
485490
"Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation);
486491
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
487-
std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, DonorMode));
492+
std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/,
493+
false /*ignoreDegradedGroupsChecks*/, DonorMode));
488494
}
489495
}
490496
}
@@ -534,7 +540,8 @@ namespace NKikimr::NBsController {
534540
}
535541

536542
std::optional<TVDiskID> FindVDiskToReplace(const TEvControllerUpdateSelfHealInfo::TGroupContent& content,
537-
TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) {
543+
TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit,
544+
bool *ignoreDegradedGroupsChecks) {
538545
// main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more
539546
// than one disk missing, but we should not move next faulty disk until previous one is replicated, at least
540547
// partially (meaning only phantoms left)
@@ -553,7 +560,7 @@ namespace NKikimr::NBsController {
553560
}
554561
[[fallthrough]];
555562
case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING:
556-
return std::nullopt; // don't touch group with replicating disks
563+
return std::nullopt; // don't touch group with replicating or starting disks
557564

558565
default:
559566
break;
@@ -579,6 +586,7 @@ namespace NKikimr::NBsController {
579586
continue; // this group will become degraded when applying self-heal logic, skip disk
580587
}
581588
*isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit;
589+
*ignoreDegradedGroupsChecks = checker.IsDegraded(failedByReadiness);
582590
return vdiskId;
583591
}
584592
}

0 commit comments

Comments
 (0)