@@ -35,6 +35,10 @@ namespace {
3535 static constexpr TDuration OfflineFollowerWaitFirst = TDuration::Seconds(4 );
3636 static constexpr TDuration OfflineFollowerWaitRetry = TDuration::Seconds(15 );
3737
38+ constexpr ui64 GcErrorInitialBackoffMs = 1 ;
39+ constexpr ui64 GcErrorMaxBackoffMs = 10000 ;
40+ constexpr ui64 GcMaxErrors = 25 ; // ~1.13 min in total
41+
3842}
3943
4044ui64 TTablet::TabletID () const {
@@ -1323,6 +1327,16 @@ void TTablet::Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr &ev) {
13231327
13241328 TEvBlobStorage::TEvCollectGarbageResult *msg = ev->Get ();
13251329
1330+ auto handleNextGcLogChannel = [&]() {
1331+ if (GcNextStep != 0 ) {
1332+ GcLogChannel (std::exchange (GcNextStep, 0 ));
1333+ } else if (GcFailCount > 0 && !GcPendingRetry && GcTryCounter < GcMaxErrors) {
1334+ ++GcTryCounter;
1335+ GcPendingRetry = true ;
1336+ Schedule (TDuration::MilliSeconds (GcBackoffTimer.NextBackoffMs ()), new TEvTabletBase::TEvLogGcRetry ());
1337+ }
1338+ };
1339+
13261340 switch (msg->Status ) {
13271341 case NKikimrProto::RACE:
13281342 case NKikimrProto::BLOCKED:
@@ -1335,18 +1349,27 @@ void TTablet::Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr &ev) {
13351349 }
13361350 break ;
13371351 case NKikimrProto::OK:
1338- if (GcInFly == 0 && GcForStepAckRequest) {
1339- const auto & req = *GcForStepAckRequest->Get ();
1340- const ui32 gen = StateStorageInfo.KnownGeneration ;
1341- if (std::tie (req.Generation , req.Step ) <= std::tie (gen, GcInFlyStep)) {
1342- Send (GcForStepAckRequest->Sender , new TEvTablet::TEvGcForStepAckResponse (gen, GcInFlyStep));
1343- GcForStepAckRequest = nullptr ;
1352+ if (GcInFly == 0 ) {
1353+ if (GcFailCount == 0 ) {
1354+ GcConfirmedStep = GcInFlyStep;
1355+ GcTryCounter = 0 ;
1356+ GcBackoffTimer.Reset ();
1357+ if (GcForStepAckRequest) {
1358+ const auto & req = *GcForStepAckRequest->Get ();
1359+ const ui32 gen = StateStorageInfo.KnownGeneration ;
1360+ if (std::tie (req.Generation , req.Step ) <= std::tie (gen, GcConfirmedStep)) {
1361+ Send (GcForStepAckRequest->Sender , new TEvTablet::TEvGcForStepAckResponse (gen, GcConfirmedStep));
1362+ GcForStepAckRequest = nullptr ;
1363+ }
1364+ }
13441365 }
1366+ handleNextGcLogChannel ();
13451367 }
1346- [[fallthrough]];
1347- default : // silently ignore unrecognized errors (assume temporary)
1348- if (GcInFly == 0 && GcNextStep != 0 ) {
1349- GcLogChannel (std::exchange (GcNextStep, 0 ));
1368+ return ;
1369+ default :
1370+ ++GcFailCount;
1371+ if (GcInFly == 0 ) {
1372+ handleNextGcLogChannel ();
13501373 }
13511374 return ;
13521375 }
@@ -1357,8 +1380,8 @@ void TTablet::Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr &ev) {
13571380void TTablet::Handle (TEvTablet::TEvGcForStepAckRequest::TPtr& ev) {
13581381 const auto & req = *ev->Get ();
13591382 const ui32 gen = StateStorageInfo.KnownGeneration ;
1360- if (GcInFly == 0 && std::tie (req.Generation , req.Step ) <= std::tie (gen, GcInFlyStep )) {
1361- Send (ev->Sender , new TEvTablet::TEvGcForStepAckResponse (gen, GcInFlyStep ));
1383+ if (GcInFly == 0 && std::tie (req.Generation , req.Step ) <= std::tie (gen, GcConfirmedStep )) {
1384+ Send (ev->Sender , new TEvTablet::TEvGcForStepAckResponse (gen, GcConfirmedStep ));
13621385 } else {
13631386 GcForStepAckRequest = ev;
13641387 }
@@ -1367,6 +1390,8 @@ void TTablet::Handle(TEvTablet::TEvGcForStepAckRequest::TPtr& ev) {
13671390void TTablet::GcLogChannel (ui32 step) {
13681391 const ui64 tabletid = TabletID ();
13691392 const ui32 gen = StateStorageInfo.KnownGeneration ;
1393+ GcPendingRetry = false ;
1394+ GcFailCount = 0 ;
13701395
13711396 if (GcInFly != 0 || Graph.SyncCommit .SyncStep != 0 && Graph.SyncCommit .SyncStep <= step) {
13721397 if (GcInFlyStep < step) {
@@ -1414,6 +1439,12 @@ void TTablet::GcLogChannel(ui32 step) {
14141439 GcNextStep = 0 ;
14151440}
14161441
1442+ void TTablet::RetryGcRequests () {
1443+ if (GcPendingRetry && GcInFly == 0 && GcInFlyStep > GcConfirmedStep) {
1444+ GcLogChannel (GcInFlyStep);
1445+ }
1446+ }
1447+
14171448void TTablet::SpreadFollowerAuxUpdate (const TString& auxUpdate) {
14181449 for (auto &xpair : LeaderInfo) {
14191450 SendFollowerAuxUpdate (xpair.second , xpair.first , auxUpdate);
@@ -1938,7 +1969,12 @@ TTablet::TTablet(const TActorId &launcher, TTabletStorageInfo *info, TTabletSetu
19381969 , DiscoveredLastBlocked(Max<ui32>())
19391970 , GcInFly(0 )
19401971 , GcInFlyStep(0 )
1972+ , GcConfirmedStep(0 )
19411973 , GcNextStep(0 )
1974+ , GcTryCounter(0 )
1975+ , GcBackoffTimer(GcErrorInitialBackoffMs, GcErrorMaxBackoffMs)
1976+ , GcPendingRetry(false )
1977+ , GcFailCount(0 )
19421978 , ResourceProfiles(profiles)
19431979 , TxCacheQuota(txCacheQuota)
19441980{
0 commit comments