@@ -36,6 +36,38 @@ namespace NKikimr::NCms {
36
36
using namespace NNodeWhiteboard ;
37
37
using namespace NKikimrCms ;
38
38
39
+ namespace {
40
+
41
+ constexpr size_t MAX_ISSUES_TO_STORE = 100 ;
42
+
43
+ TAction::TIssue ConvertIssue (const TReason& reason) {
44
+ TAction::TIssue issue;
45
+ switch (reason.GetType ()) {
46
+ case TReason::EType::Generic:
47
+ issue.SetType (TAction::TIssue::GENERIC);
48
+ break ;
49
+ case TReason::EType::TooManyUnavailableVDisks:
50
+ issue.SetType (TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS);
51
+ break ;
52
+ case TReason::EType::TooManyUnavailableStateStorageRings:
53
+ issue.SetType (TAction::TIssue::TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS);
54
+ break ;
55
+ case TReason::EType::DisabledNodesLimitReached:
56
+ issue.SetType (TAction::TIssue::DISABLED_NODES_LIMIT_REACHED);
57
+ break ;
58
+ case TReason::EType::TenantDisabledNodesLimitReached:
59
+ issue.SetType (TAction::TIssue::TENANT_DISABLED_NODES_LIMIT_REACHED);
60
+ break ;
61
+ case TReason::EType::SysTabletsNodeLimitReached:
62
+ issue.SetType (TAction::TIssue::SYS_TABLETS_NODE_LIMIT_REACHED);
63
+ break ;
64
+ }
65
+ issue.SetMessage (reason.GetMessage ());
66
+ return issue;
67
+ }
68
+
69
+ } // anonymous namespace
70
+
39
71
void TCms::DefaultSignalTabletActive (const TActorContext &)
40
72
{
41
73
// must be empty
@@ -326,6 +358,8 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
326
358
};
327
359
328
360
auto point = ClusterInfo->PushRollbackPoint ();
361
+ size_t storedIssues = 0 ;
362
+ size_t processedActions = 0 ;
329
363
for (const auto &action : request.GetActions ()) {
330
364
TDuration permissionDuration = State->Config .DefaultPermissionDuration ;
331
365
if (request.HasDuration ())
@@ -352,28 +386,40 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
352
386
353
387
auto *permission = response.AddPermissions ();
354
388
permission->MutableAction ()->CopyFrom (action);
389
+ permission->MutableAction ()->ClearIssue ();
355
390
permission->SetDeadline (error.Deadline .GetValue ());
356
391
AddPermissionExtensions (action, *permission);
357
392
358
393
ClusterInfo->AddTempLocks (action, &ctx);
359
394
} else {
360
395
LOG_DEBUG (ctx, NKikimrServices::CMS, " Result: %s (reason: %s)" ,
361
- ToString (error.Code ).data (), error.Reason .data ());
396
+ ToString (error.Code ).data (), error.Reason .GetMessage (). data ());
362
397
363
398
if (CodesRate[response.GetStatus ().GetCode ()] > CodesRate[error.Code ]) {
364
399
response.MutableStatus ()->SetCode (error.Code );
365
- response.MutableStatus ()->SetReason (error.Reason );
400
+ response.MutableStatus ()->SetReason (error.Reason . GetMessage () );
366
401
if (error.Code == TStatus::DISALLOW_TEMP
367
402
|| error.Code == TStatus::ERROR_TEMP)
368
403
response.SetDeadline (error.Deadline .GetValue ());
369
404
}
370
405
406
+ if (schedule) {
407
+ auto *scheduledAction = scheduled.AddActions ();
408
+ scheduledAction->CopyFrom (action);
409
+
410
+ // Limit stored issues to avoid overloading the local database
411
+ if (storedIssues < MAX_ISSUES_TO_STORE) {
412
+ *scheduledAction->MutableIssue () = ConvertIssue (error.Reason );
413
+ ++storedIssues;
414
+ } else {
415
+ scheduledAction->ClearIssue ();
416
+ }
417
+ }
418
+
371
419
if (!allowPartial)
372
420
break ;
373
-
374
- if (schedule)
375
- scheduled.AddActions ()->CopyFrom (action);
376
421
}
422
+ ++processedActions;
377
423
}
378
424
ClusterInfo->RollbackLocks (point);
379
425
@@ -396,9 +442,21 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
396
442
if (schedule && response.GetStatus ().GetCode () != TStatus::ALLOW_PARTIAL) {
397
443
if (response.GetStatus ().GetCode () == TStatus::DISALLOW_TEMP
398
444
|| response.GetStatus ().GetCode () == TStatus::ERROR_TEMP)
399
- scheduled.MutableActions ()->CopyFrom (request.GetActions ());
400
- else
445
+ {
446
+ if (!allowPartial) {
447
+ // Only the first problem action was scheduled during
448
+ // the actions check loop. Merge it with rest actions.
449
+ Y_ABORT_UNLESS (scheduled.ActionsSize () == 1 );
450
+ TAction::TIssue issue = std::move (*scheduled.MutableActions ()->begin ()->MutableIssue ());
451
+ scheduled.MutableActions ()->CopyFrom (request.GetActions ());
452
+ for (auto &action : *scheduled.MutableActions ()) {
453
+ action.ClearIssue ();
454
+ }
455
+ *scheduled.MutableActions (processedActions)->MutableIssue () = std::move (issue);
456
+ }
457
+ } else {
401
458
scheduled.ClearActions ();
459
+ }
402
460
}
403
461
404
462
return response.GetStatus ().GetCode () == TStatus::ALLOW
@@ -701,26 +759,32 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action,
701
759
case MODE_MAX_AVAILABILITY:
702
760
if (restartRings + lockedRings > 1 ) {
703
761
error.Code = TStatus::DISALLOW_TEMP;
704
- error.Reason = TStringBuilder () << " Too many unavailable state storage rings"
705
- << " . Restarting rings: "
706
- << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1 )
707
- << " . Temporary (for a 2 minutes) locked rings: "
708
- << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
709
- << " . Maximum allowed number of unavailable rings for this mode: " << 1 ;
762
+ error.Reason = TReason (
763
+ TStringBuilder () << " Too many unavailable state storage rings"
764
+ << " . Restarting rings: "
765
+ << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1 )
766
+ << " . Temporary (for a 2 minutes) locked rings: "
767
+ << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
768
+ << " . Maximum allowed number of unavailable rings for this mode: " << 1 ,
769
+ TReason::EType::TooManyUnavailableStateStorageRings
770
+ );
710
771
error.Deadline = defaultDeadline;
711
772
return false ;
712
773
}
713
774
break ;
714
775
case MODE_KEEP_AVAILABLE:
715
776
if (restartRings + lockedRings + disabledRings > (nToSelect - 1 ) / 2 ) {
716
777
error.Code = TStatus::DISALLOW_TEMP;
717
- error.Reason = TStringBuilder () << " Too many unavailable state storage rings"
718
- << " . Restarting rings: "
719
- << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1 )
720
- << " . Temporary (for a 2 minutes) locked rings: "
721
- << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
722
- << " . Disabled rings: " << disabledRings
723
- << " . Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1 ) / 2 ;
778
+ error.Reason = TReason (
779
+ TStringBuilder () << " Too many unavailable state storage rings"
780
+ << " . Restarting rings: "
781
+ << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1 )
782
+ << " . Temporary (for a 2 minutes) locked rings: "
783
+ << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
784
+ << " . Disabled rings: " << disabledRings
785
+ << " . Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1 ) / 2 ,
786
+ TReason::EType::TooManyUnavailableStateStorageRings
787
+ );
724
788
error.Deadline = defaultDeadline;
725
789
return false ;
726
790
}
@@ -1484,6 +1548,13 @@ void TCms::CheckAndEnqueueRequest(TEvCms::TEvPermissionRequest::TPtr &ev, const
1484
1548
ev, TStatus::WRONG_REQUEST, " Priority value is out of range" , ctx);
1485
1549
}
1486
1550
1551
+ for (const auto &action : rec.GetActions ()) {
1552
+ if (action.HasIssue ()) {
1553
+ return ReplyWithError<TEvCms::TEvPermissionResponse>(
1554
+ ev, TStatus::WRONG_REQUEST, TStringBuilder () << " Action issue is read-only" , ctx);
1555
+ }
1556
+ }
1557
+
1487
1558
EnqueueRequest (ev.Release (), ctx);
1488
1559
}
1489
1560
0 commit comments