@@ -43,6 +43,7 @@ namespace NKikimr::NBsController {
43
43
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
44
44
TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks;
45
45
const bool IsSelfHealReasonDecommit;
46
+ const bool IgnoreDegradedGroupsChecks;
46
47
const bool DonorMode;
47
48
THashSet<TVDiskID> PendingVDisks;
48
49
THashMap<TActorId, TVDiskID> ActorToDiskMap;
@@ -51,14 +52,15 @@ namespace NKikimr::NBsController {
51
52
public:
52
53
TReassignerActor (TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group,
53
54
std::optional<TVDiskID> vdiskToReplace, std::shared_ptr<TBlobStorageGroupInfo::TTopology> topology,
54
- bool isSelfHealReasonDecommit, bool donorMode)
55
+ bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode)
55
56
: ControllerId(controllerId)
56
57
, GroupId(groupId)
57
58
, Group(std::move(group))
58
59
, VDiskToReplace(vdiskToReplace)
59
60
, Topology(std::move(topology))
60
61
, FailedGroupDisks(Topology.get())
61
62
, IsSelfHealReasonDecommit(isSelfHealReasonDecommit)
63
+ , IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks)
62
64
, DonorMode(donorMode)
63
65
{}
64
66
@@ -166,6 +168,7 @@ namespace NKikimr::NBsController {
166
168
request->SetIgnoreGroupReserve (true );
167
169
request->SetSettleOnlyOnOperationalDisks (true );
168
170
request->SetIsSelfHealReasonDecommit (IsSelfHealReasonDecommit);
171
+ request->SetIgnoreDegradedGroupsChecks (IgnoreDegradedGroupsChecks);
169
172
request->SetAllowUnusableDisks (true );
170
173
if (VDiskToReplace) {
171
174
ev->SelfHeal = true ;
@@ -427,9 +430,11 @@ namespace NKikimr::NBsController {
427
430
428
431
// check if it is possible to move anything out
429
432
bool isSelfHealReasonDecommit;
430
- if (const auto v = FindVDiskToReplace (group.Content , now, group.Topology .get (), &isSelfHealReasonDecommit)) {
433
+ bool ignoreDegradedGroupsChecks;
434
+ if (const auto v = FindVDiskToReplace (group.Content , now, group.Topology .get (), &isSelfHealReasonDecommit,
435
+ &ignoreDegradedGroupsChecks)) {
431
436
group.ReassignerActorId = Register (new TReassignerActor (ControllerId, group.GroupId , group.Content ,
432
- *v, group.Topology , isSelfHealReasonDecommit, DonorMode));
437
+ *v, group.Topology , isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
433
438
} else {
434
439
++counter; // this group can't be reassigned right now
435
440
@@ -484,7 +489,8 @@ namespace NKikimr::NBsController {
484
489
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG (GroupLayoutSanitizerOperationLog,
485
490
" Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content .Generation );
486
491
group.ReassignerActorId = Register (new TReassignerActor (ControllerId, group.GroupId , group.Content ,
487
- std::nullopt, group.Topology , false /* isSelfHealReasonDecommit*/ , DonorMode));
492
+ std::nullopt, group.Topology , false /* isSelfHealReasonDecommit*/ ,
493
+ false /* ignoreDegradedGroupsChecks*/ , DonorMode));
488
494
}
489
495
}
490
496
}
@@ -534,7 +540,8 @@ namespace NKikimr::NBsController {
534
540
}
535
541
536
542
std::optional<TVDiskID> FindVDiskToReplace (const TEvControllerUpdateSelfHealInfo::TGroupContent& content,
537
- TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) {
543
+ TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit,
544
+ bool *ignoreDegradedGroupsChecks) {
538
545
// main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more
539
546
// than one disk missing, but we should not move next faulty disk until previous one is replicated, at least
540
547
// partially (meaning only phantoms left)
@@ -553,7 +560,7 @@ namespace NKikimr::NBsController {
553
560
}
554
561
[[fallthrough]];
555
562
case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING:
556
- return std::nullopt; // don't touch group with replicating disks
563
+ return std::nullopt; // don't touch group with replicating or starting disks
557
564
558
565
default :
559
566
break ;
@@ -579,6 +586,7 @@ namespace NKikimr::NBsController {
579
586
continue ; // this group will become degraded when applying self-heal logic, skip disk
580
587
}
581
588
*isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit ;
589
+ *ignoreDegradedGroupsChecks = checker.IsDegraded (failedByReadiness);
582
590
return vdiskId;
583
591
}
584
592
}
0 commit comments