Skip to content

Commit 53903ba

Browse files
committed
Register VPuts in RootCause tracker
1 parent c053875 commit 53903ba

File tree

10 files changed

+39
-60
lines changed

10 files changed

+39
-60
lines changed

ydb/core/blobstorage/base/utility.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -147,28 +147,5 @@ namespace NKikimr {
147147
TActorId NotifyId;
148148
};
149149

150-
template<class T>
151-
class TParameterByHandleClass {
152-
public:
153-
TParameterByHandleClass(const T& putLog, const T& fastRead, const T& async)
154-
: Parameters({ putLog, async, async, fastRead, async, async, async })
155-
{}
156-
157-
TParameterByHandleClass(const T& common)
158-
: TParameterByHandleClass(common, common, common)
159-
{}
160-
161-
const T& Get(NKikimrBlobStorage::EPutHandleClass handleClass) {
162-
return Parameters[(ui32)handleClass];
163-
}
164-
165-
const T& Get(NKikimrBlobStorage::EGetHandleClass handleClass) {
166-
return Parameters[(ui32)handleClass + 3];
167-
}
168-
169-
private:
170-
std::array<T, 7> Parameters;
171-
};
172-
173150
} // NKikimr
174151

ydb/core/blobstorage/dsproxy/dsproxy.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -513,9 +513,8 @@ IActor* CreateBlobStorageGroupEjectedProxy(ui32 groupId, TIntrusivePtr<TDsProxyN
513513

514514
struct TBlobStorageProxyParameters {
515515
bool UseActorSystemTimeInBSQueue = false;
516-
TDuration RequestReportingThrottlerDelay = TDuration::Seconds(1);
517-
TParameterByHandleClass<TDuration> LongRequestThreshold =
518-
TParameterByHandleClass<TDuration>(TDuration::Seconds(60));
516+
TDuration RequestReportingThrottlerDelay = TDuration::Seconds(60);
517+
TDuration LongRequestThreshold = TDuration::Seconds(50);
519518

520519
const TControlWrapper& EnablePutBatching;
521520
const TControlWrapper& EnableVPatch;

ydb/core/blobstorage/dsproxy/dsproxy_get.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,9 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor {
383383
success);
384384
A_LOG_LOG_S(true, success ? NLog::PRI_INFO : NLog::PRI_NOTICE, "BPG68", "Result# " << evResult->Print(false));
385385

386+
bool allowToReport = AllowToReport(GetImpl.GetHandleClass());
386387
if (TActivationContext::Now() - StartTime >= LongRequestThreshold) {
387-
if (AllowToReport(GetImpl.GetHandleClass())) {
388+
if (allowToReport) {
388389
R_LOG_WARN_S("BPG71", "TEvGet Request was being processed for more than " << LongRequestThreshold
389390
<< ", serialized RootCause# " << RootCauseTrack.ToString());
390391
}

ydb/core/blobstorage/dsproxy/dsproxy_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ class TBlobStorageGroupProxy : public TActorBootstrapped<TBlobStorageGroupProxy>
124124
TMemorizableControlWrapper SlowDiskThreshold;
125125
TMemorizableControlWrapper PredictedDelayMultiplier;
126126

127-
TParameterByHandleClass<TDuration> LongRequestThreshold;
127+
TDuration LongRequestThreshold;
128128

129129
TAccelerationParams GetAccelerationParams();
130130

ydb/core/blobstorage/dsproxy/dsproxy_put.cpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,11 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor {
280280
GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()),
281281
NKikimrBlobStorage::EPutHandleClass_Name(PutImpl.GetPutHandleClass()),
282282
NKikimrProto::EReplyStatus_Name(status));
283-
//if (RootCauseTrack.IsOn) {
284-
// RootCauseTrack.OnReply(cookie.GetCauseIdx(),
285-
// GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()),
286-
// GetVDiskTimeMs(record.GetTimestamps()));
287-
//}
283+
if (RootCauseTrack.IsOn) {
284+
RootCauseTrack.OnReply(record.GetCookie(),
285+
GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()),
286+
GetVDiskTimeMs(record.GetTimestamps()));
287+
}
288288

289289
if (status == NKikimrProto::BLOCKED || status == NKikimrProto::DEADLINE) {
290290
TString error = TStringBuilder() << "Got VPutResult status# " << status << " from VDiskId# " << vdiskId;
@@ -362,14 +362,14 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor {
362362
}
363363

364364
// Handle put results
365-
//bool isCauseRegistered = !RootCauseTrack.IsOn;
365+
bool isCauseRegistered = !RootCauseTrack.IsOn;
366366
TPutImpl::TPutResultVec putResults;
367367
for (auto &item : record.GetItems()) {
368-
//if (!isCauseRegistered) {
369-
// isCauseRegistered = RootCauseTrack.OnReply(cookie.GetCauseIdx(),
370-
// GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()),
371-
// GetVDiskTimeMs(record.GetTimestamps()));
372-
//}
368+
if (!isCauseRegistered) {
369+
isCauseRegistered = RootCauseTrack.OnReply(record.GetCookie(),
370+
GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()),
371+
GetVDiskTimeMs(record.GetTimestamps()));
372+
}
373373

374374
Y_ABORT_UNLESS(item.HasStatus());
375375
Y_ABORT_UNLESS(item.HasBlobID());
@@ -477,8 +477,11 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor {
477477
}
478478

479479
if (TActivationContext::Monotonic() - StartTime >= LongRequestThreshold) {
480-
if (AllowToReport(HandleClass)) {
481-
R_LOG_WARN_S("BPG71", "TEvGet Request was being processed for more than " << LongRequestThreshold
480+
bool allowToReport = AllowToReport(HandleClass);
481+
R_LOG_WARN_S("DEBUG", TActivationContext::Monotonic() - StartTime << " " << LongRequestThreshold << " " << allowToReport << " "
482+
<< NKikimrBlobStorage::EPutHandleClass_Name(PutImpl.GetPutHandleClass()));
483+
if (allowToReport) {
484+
R_LOG_WARN_S("BPP71", "TEvPut Request was being processed for more than " << LongRequestThreshold
482485
<< ", serialized RootCause# " << RootCauseTrack.ToString());
483486
}
484487
}
@@ -677,12 +680,10 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor {
677680
void UpdatePengingVDiskResponseCount(const TDeque<TPutImpl::TPutEvent>& putEvents) {
678681
for (auto& event : putEvents) {
679682
std::visit([&](auto& event) {
680-
//Y_ABORT_UNLESS(event->Record.HasCookie());
681-
//TCookie cookie(event->Record.GetCookie());
682-
//if (RootCauseTrack.IsOn) {
683-
// cookie.SetCauseIdx(RootCauseTrack.RegisterCause());
684-
// event->Record.SetCookie(cookie);
685-
//}
683+
ui64 causeIdx = RootCauseTrack.RegisterCause();
684+
if (event->Record.HasCookie() && RootCauseTrack.IsOn) {
685+
event->Record.SetCookie(causeIdx);
686+
}
686687
const ui32 orderNumber = Info->GetOrderNumber(VDiskIDFromVDiskID(event->Record.GetVDiskID()));
687688
Y_ABORT_UNLESS(orderNumber < WaitingVDiskResponseCount.size());
688689
WaitingVDiskCount += !WaitingVDiskResponseCount[orderNumber]++;

ydb/core/blobstorage/dsproxy/dsproxy_request.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ namespace NKikimr {
112112
},
113113
.NodeLayout = TNodeLayoutInfoPtr(NodeLayoutInfo),
114114
.AccelerationParams = GetAccelerationParams(),
115-
.LongRequestThreshold = LongRequestThreshold.Get(ev->Get()->GetHandleClass),
115+
.LongRequestThreshold = LongRequestThreshold,
116116
}),
117117
ev->Get()->Deadline
118118
);
@@ -226,7 +226,7 @@ namespace NKikimr {
226226
.Stats = PerDiskStats,
227227
.EnableRequestMod3x3ForMinLatency = enableRequestMod3x3ForMinLatency,
228228
.AccelerationParams = GetAccelerationParams(),
229-
.LongRequestThreshold = LongRequestThreshold.Get(ev->Get()->HandleClass),
229+
.LongRequestThreshold = LongRequestThreshold,
230230
}),
231231
ev->Get()->Deadline
232232
);
@@ -501,7 +501,7 @@ namespace NKikimr {
501501
.Stats = PerDiskStats,
502502
.EnableRequestMod3x3ForMinLatency = enableRequestMod3x3ForMinLatency,
503503
.AccelerationParams = GetAccelerationParams(),
504-
.LongRequestThreshold = LongRequestThreshold.Get(ev->Get()->HandleClass),
504+
.LongRequestThreshold = LongRequestThreshold,
505505
}),
506506
ev->Get()->Deadline
507507
);
@@ -524,6 +524,7 @@ namespace NKikimr {
524524
.Tactic = tactic,
525525
.EnableRequestMod3x3ForMinLatency = enableRequestMod3x3ForMinLatency,
526526
.AccelerationParams = GetAccelerationParams(),
527+
.LongRequestThreshold = LongRequestThreshold,
527528
}),
528529
TInstant::Max()
529530
);

ydb/core/blobstorage/dsproxy/dsproxy_request_reporting.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ namespace NKikimr {
55
static std::array<std::atomic<bool>, 7> ReportPermissions;
66

77
bool AllowToReport(NKikimrBlobStorage::EPutHandleClass handleClass) {
8-
return ReportPermissions[(ui32)handleClass].exchange(false);
8+
return ReportPermissions[(ui32)handleClass - 1].exchange(false);
99
}
1010

1111
bool AllowToReport(NKikimrBlobStorage::EGetHandleClass handleClass) {
12-
return ReportPermissions[(ui32)handleClass + 3].exchange(false);
12+
return ReportPermissions[(ui32)handleClass - 1 + 3].exchange(false);
1313
}
1414

1515
class TRequestReportingThrottler : public TActorBootstrapped<TRequestReportingThrottler> {
@@ -19,8 +19,8 @@ class TRequestReportingThrottler : public TActorBootstrapped<TRequestReportingTh
1919
{}
2020

2121
void Bootstrap() {
22-
Schedule(UpdatePermissionsDelay, new TEvents::TEvWakeup);
2322
Become(&TThis::StateFunc);
23+
HandleWakeup();
2424
}
2525

2626
STRICT_STFUNC(StateFunc,
@@ -29,7 +29,7 @@ class TRequestReportingThrottler : public TActorBootstrapped<TRequestReportingTh
2929

3030
private:
3131
void HandleWakeup() {
32-
for (auto& permission : ReportPermissions) {
32+
for (std::atomic<bool>& permission : ReportPermissions) {
3333
permission.store(true);
3434
}
3535
Schedule(UpdatePermissionsDelay, new TEvents::TEvWakeup);

ydb/core/blobstorage/dsproxy/root_cause.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ struct TRootCause {
7676
}
7777

7878
ui64 RegisterCause() {
79-
if (IsOn && Items.size() < InvalidCauseIdx - 1) {
79+
if (Items.size() < InvalidCauseIdx - 1) {
8080
Items.emplace_back(CurrentCauseIdx, GetCycleCountFast(), false);
8181
return Items.size() - 1;
8282
} else {
@@ -85,7 +85,7 @@ struct TRootCause {
8585
}
8686

8787
ui64 RegisterAccelerate() {
88-
if (IsOn && Items.size() < InvalidCauseIdx - 1) {
88+
if (Items.size() < InvalidCauseIdx - 1) {
8989
Items.emplace_back(CurrentCauseIdx, GetCycleCountFast(), true);
9090
return Items.size() - 1;
9191
} else {

ydb/core/blobstorage/nodewarden/node_warden.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,8 @@ namespace NKikimr {
3535
bool CachePDisks = false;
3636
bool CacheVDisks = false;
3737
bool EnableVDiskCooldownTimeout = false;
38-
TDuration RequestReportingThrottlerDelay = TDuration::Seconds(1);
39-
TParameterByHandleClass<TDuration> LongRequestThreshold = TParameterByHandleClass<TDuration>(
40-
TDuration::Seconds(20), TDuration::Seconds(20), TDuration::Seconds(60)
41-
);
38+
TDuration RequestReportingThrottlerDelay = TDuration::Seconds(60);
39+
TDuration LongRequestThreshold = TDuration::Seconds(50);
4240

4341
// debugging options
4442
bool VDiskReplPausedAtStart = false;

ydb/core/blobstorage/nodewarden/node_warden_impl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ void TNodeWarden::Bootstrap() {
265265
StartDistributedConfigKeeper();
266266

267267
HandleGroupPendingQueueTick();
268+
269+
StartRequestReportingThrottler();
268270
}
269271

270272
void TNodeWarden::HandleReadCache() {

0 commit comments

Comments
 (0)