@@ -439,20 +439,6 @@ namespace NKikimr {
439
439
return InFlightCount > 0 && TActivationContext::Monotonic () - LastUpdate > StuckQueueThreshold;
440
440
}
441
441
442
- void ResetQueue () {
443
- InFlightCount = 0 ;
444
- InFlightCost = 0 ;
445
- InFlightBytes = 0 ;
446
-
447
- *SkeletonFrontInFlightCount = 0 ;
448
- *SkeletonFrontInFlightCost = 0 ;
449
- *SkeletonFrontInFlightBytes = 0 ;
450
- *SkeletonFrontCostProcessed = 0 ;
451
-
452
- Msgs.clear ();
453
- UpdateState ();
454
- }
455
-
456
442
TString GenerateHtmlState () const {
457
443
// NOTE: warning policy:
458
444
// 1. For InFlightCount and InFlightCost we output them in yellow, if
@@ -701,13 +687,15 @@ namespace NKikimr {
701
687
NMonGroup::TSyncerGroup SyncerMonGroup;
702
688
NMonGroup::TVDiskStateGroup VDiskMonGroup;
703
689
NMonGroup::TCostGroup CostGroup;
704
- NMonGroup::TMalfunctionGroup MalfunctionGroup ;
690
+ NMonGroup::TTimerGroup TimerGroup ;
705
691
TVDiskIncarnationGuid VDiskIncarnationGuid;
706
692
bool HasUnreadableBlobs = false ;
707
693
TInstant LastSanitizeTime = TInstant::Zero();
708
694
TInstant LastSanitizeWithErrorTime = TInstant::Zero();
709
695
ui64 NextUniqueMessageId = 1 ;
710
696
697
+ TMonotonic StartTimestamp = TMonotonic::Zero();
698
+
711
699
static constexpr TDuration StuckQueueCheckPeriod = TDuration::Seconds(60 );
712
700
713
701
ui64 AllocateMessageId () {
@@ -812,6 +800,8 @@ namespace NKikimr {
812
800
ActiveActors.Insert (SkeletonId, __FILE__, __LINE__, ctx, NKikimrServices::BLOBSTORAGE);
813
801
814
802
SetupMonitoring (ctx);
803
+ StartTimestamp = TActivationContext::Monotonic ();
804
+ TimerGroup.SkeletonFrontUptimeSeconds () = 0 ;
815
805
Become (&TThis::StateLocalRecoveryInProgress);
816
806
}
817
807
@@ -2077,18 +2067,20 @@ namespace NKikimr {
2077
2067
}
2078
2068
2079
2069
void HandleWakeup (const TActorContext& ctx) {
2070
+ TMonotonic now = TActivationContext::Monotonic ();
2071
+ TimerGroup.SkeletonFrontUptimeSeconds () = (now - StartTimestamp).Seconds ();
2080
2072
for (TIntQueueClass* queue : { IntQueueAsyncGets.get (), IntQueueFastGets.get (),
2081
2073
IntQueueDiscover.get (), IntQueueLowGets.get (), IntQueueLogPuts.get (),
2082
2074
IntQueueHugePutsForeground.get (), IntQueueHugePutsBackground.get () }) {
2083
2075
if (queue->IsStuck ()) {
2084
- queue->DropWithError (ctx, *this );
2085
- queue->ResetQueue ();
2086
- DisconnectClients (ctx);
2087
2076
LOG_CRIT_S (ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix
2088
- << " Stuck internal queue detected, dropping queues , "
2077
+ << " Stuck internal queue detected, restarting VDisk , "
2089
2078
<< " Queue.Name# " << queue->Name
2090
2079
<< " Marker# BSVSF08" );
2091
- ++MalfunctionGroup.DroppingStuckInternalQueue ();
2080
+ TActorId wardenId = MakeBlobStorageNodeWardenID (SelfId ().NodeId ());
2081
+ ctx.Send (wardenId, new TEvBlobStorage::TEvAskRestartVDisk (
2082
+ Config->BaseInfo .PDiskId , SelfVDiskId));
2083
+ return ;
2092
2084
}
2093
2085
}
2094
2086
Schedule (StuckQueueCheckPeriod, new TEvents::TEvWakeup);
@@ -2266,7 +2258,7 @@ namespace NKikimr {
2266
2258
, SyncerMonGroup(VDiskCounters, " subsystem" , " syncer" )
2267
2259
, VDiskMonGroup(VDiskCounters, " subsystem" , " state" )
2268
2260
, CostGroup(VDiskCounters, " subsystem" , " cost" )
2269
- , MalfunctionGroup (VDiskCounters, " subsystem" , " malfunction " )
2261
+ , TimerGroup (VDiskCounters, " subsystem" , " timer " )
2270
2262
{
2271
2263
ReplMonGroup.ReplUnreplicatedVDisks () = 1 ;
2272
2264
VDiskMonGroup.VDiskState (NKikimrWhiteboard::EVDiskState::Initial);
0 commit comments