Skip to content

Commit 11be8af

Browse files
committed
Fix scrubbing and flapping unittest
1 parent bc5b8c1 commit 11be8af

File tree

9 files changed

+171
-95
lines changed

9 files changed

+171
-95
lines changed

ydb/core/blobstorage/backpressure/queue.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ class TBlobStorageQueue {
180180
return Queues.InFlight.size();
181181
}
182182

183+
ui64 GetInFlightCost() const {
184+
return InFlightCost;
185+
}
186+
183187
void UpdateCostModel(TInstant now, const NKikimrBlobStorage::TVDiskCostSettings& settings,
184188
const TBlobStorageGroupType& type);
185189
void InvalidateCosts();

ydb/core/blobstorage/backpressure/queue_backpressure_client.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,11 @@ class TVDiskBackpressureClientActor : public TActorBootstrapped<TVDiskBackpressu
365365
<< " msgId# " << msgId << " sequenceId# " << sequenceId
366366
<< " expectedMsgId# " << expectedMsgId << " expectedSequenceId# " << expectedSequenceId
367367
<< " status# " << NKikimrProto::EReplyStatus_Name(status)
368-
<< " ws# " << NKikimrBlobStorage::TWindowFeedback_EStatus_Name(ws));
368+
<< " ws# " << NKikimrBlobStorage::TWindowFeedback_EStatus_Name(ws)
369+
<< " InFlightCost# " << Queue.GetInFlightCost()
370+
<< " InFlightCount# " << Queue.InFlightCount()
371+
<< " ItemsWaiting# " << Queue.GetItemsWaiting()
372+
<< " BytesWaiting# " << Queue.GetBytesWaiting());
369373

370374
switch (ws) {
371375
case NKikimrBlobStorage::TWindowFeedback::IncorrectMsgId:

ydb/core/blobstorage/backpressure/queue_backpressure_server.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,7 @@ namespace NKikimr {
196196
}
197197
}
198198

199-
TWindowStatus *Processed(bool checkMsgId, const TMessageId &msgId, ui64 cost, TWindowStatus *opStatus) {
200-
Y_UNUSED(checkMsgId);
201-
Y_UNUSED(msgId);
199+
TWindowStatus *Processed(bool /*checkMsgId*/, const TMessageId& /*msgId*/, ui64 cost, TWindowStatus *opStatus) {
202200
Y_ABORT_UNLESS(Cost >= cost);
203201
Cost -= cost;
204202
--InFlight;

ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,12 +293,13 @@ void TGetImpl::PrepareRequests(TLogContext &logCtx, TDeque<std::unique_ptr<TEvBl
293293
msg->SetId(ReaderTabletData->Id);
294294
msg->SetGeneration(ReaderTabletData->Generation);
295295
}
296-
R_LOG_DEBUG_SX(logCtx, "BPG14", "Send get to orderNumber# " << get.OrderNumber
297-
<< " vget# " << vget->ToString());
298296
}
299297

300298
for (auto& vget : gets) {
301299
if (vget) {
300+
R_LOG_DEBUG_SX(logCtx, "BPG14", "Send get to orderNumber# "
301+
<< Info->GetOrderNumber(VDiskIDFromVDiskID(vget->Record.GetVDiskID()))
302+
<< " vget# " << vget->ToString());
302303
outVGets.push_back(std::move(vget));
303304
++RequestIndex;
304305
}

ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,64 +16,95 @@ void Test() {
1616

1717
TString data = TString::Uninitialized(8_MB);
1818
memset(data.Detach(), 'X', data.size());
19-
TLogoBlobID id(1, 1, 1, 0, data.size(), 0);
2019

21-
{ // write data to group
22-
TActorId sender = runtime->AllocateEdgeActor(1);
23-
runtime->WrapInActorContext(sender, [&] {
24-
SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvPut(id, data, TInstant::Max()));
25-
});
26-
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(sender);
27-
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
28-
}
20+
for (ui32 step = 1; step < 100; ++step) {
21+
TLogoBlobID id(1, 1, step, 0, data.size(), 0);
2922

30-
auto checkReadable = [&](NKikimrProto::EReplyStatus status) {
31-
TActorId sender = runtime->AllocateEdgeActor(1);
32-
runtime->WrapInActorContext(sender, [&] {
33-
SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvGet(id, 0, 0, TInstant::Max(),
34-
NKikimrBlobStorage::EGetHandleClass::FastRead));
35-
});
36-
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvGetResult>(sender);
37-
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
38-
UNIT_ASSERT_VALUES_EQUAL(res->Get()->ResponseSz, 1);
39-
auto& r = res->Get()->Responses[0];
40-
UNIT_ASSERT_VALUES_EQUAL(r.Status, status);
41-
if (status == NKikimrProto::OK) {
42-
UNIT_ASSERT_VALUES_EQUAL(r.Buffer.ConvertToString(), data);
23+
{ // write data to group
24+
TActorId sender = runtime->AllocateEdgeActor(1);
25+
runtime->WrapInActorContext(sender, [&] {
26+
SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvPut(id, data, TInstant::Max()));
27+
});
28+
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(sender);
29+
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
4330
}
44-
};
4531

46-
checkReadable(NKikimrProto::OK);
32+
auto checkReadable = [&] {
33+
TActorId sender = runtime->AllocateEdgeActor(1);
34+
runtime->WrapInActorContext(sender, [&] {
35+
SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvGet(id, 0, 0, TInstant::Max(),
36+
NKikimrBlobStorage::EGetHandleClass::FastRead));
37+
});
38+
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvGetResult>(sender);
39+
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
40+
UNIT_ASSERT_VALUES_EQUAL(res->Get()->ResponseSz, 1);
41+
auto& r = res->Get()->Responses[0];
42+
UNIT_ASSERT_VALUES_EQUAL(r.Status, NKikimrProto::OK);
43+
UNIT_ASSERT_VALUES_EQUAL(r.Buffer.ConvertToString(), data);
44+
45+
ui32 partsMask = 0;
46+
for (ui32 i = 0; i < info->GetTotalVDisksNum(); ++i) {
47+
const TVDiskID& vdiskId = info->GetVDiskId(i);
48+
env.WithQueueId(vdiskId, NKikimrBlobStorage::EVDiskQueueId::GetFastRead, [&](TActorId queueId) {
49+
const TActorId sender = runtime->AllocateEdgeActor(1);
50+
auto ev = TEvBlobStorage::TEvVGet::CreateExtremeDataQuery(vdiskId, TInstant::Max(),
51+
NKikimrBlobStorage::EGetHandleClass::FastRead);
52+
ev->AddExtremeQuery(id, 0, 0);
53+
runtime->Send(new IEventHandle(queueId, sender, ev.release()), sender.NodeId());
54+
auto reply = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvVGetResult>(sender);
55+
auto& record = reply->Get()->Record;
56+
UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrProto::OK);
57+
UNIT_ASSERT_VALUES_EQUAL(record.ResultSize(), 1);
58+
for (const auto& result : record.GetResult()) {
59+
if (result.GetStatus() == NKikimrProto::OK) {
60+
const TLogoBlobID& id = LogoBlobIDFromLogoBlobID(result.GetBlobID());
61+
UNIT_ASSERT(id.PartId());
62+
const ui32 partIdx = id.PartId() - 1;
63+
const ui32 mask = 1 << partIdx;
64+
UNIT_ASSERT(!(partsMask & mask));
65+
partsMask |= mask;
66+
} else {
67+
UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NKikimrProto::NODATA);
68+
}
69+
}
70+
});
71+
}
72+
UNIT_ASSERT_VALUES_EQUAL(partsMask, (1 << info->Type.TotalPartCount()) - 1);
73+
};
4774

48-
for (ui32 i = 0; i < info->GetTotalVDisksNum(); ++i) {
49-
const TActorId vdiskActorId = info->GetActorId(i);
75+
checkReadable();
5076

51-
ui32 nodeId, pdiskId;
52-
std::tie(nodeId, pdiskId, std::ignore) = DecomposeVDiskServiceId(vdiskActorId);
53-
auto it = env.PDiskMockStates.find(std::make_pair(nodeId, pdiskId));
54-
Y_ABORT_UNLESS(it != env.PDiskMockStates.end());
77+
for (ui32 i = 0; i < info->GetTotalVDisksNum(); ++i) {
78+
const TActorId vdiskActorId = info->GetActorId(i);
5579

56-
const TActorId sender = runtime->AllocateEdgeActor(vdiskActorId.NodeId());
57-
env.Runtime->Send(new IEventHandle(vdiskActorId, sender, new TEvBlobStorage::TEvCaptureVDiskLayout), sender.NodeId());
58-
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvCaptureVDiskLayoutResult>(sender);
80+
ui32 nodeId, pdiskId;
81+
std::tie(nodeId, pdiskId, std::ignore) = DecomposeVDiskServiceId(vdiskActorId);
82+
auto it = env.PDiskMockStates.find(std::make_pair(nodeId, pdiskId));
83+
Y_ABORT_UNLESS(it != env.PDiskMockStates.end());
5984

60-
for (auto& item : res->Get()->Layout) {
61-
using T = TEvBlobStorage::TEvCaptureVDiskLayoutResult;
62-
if (item.Database == T::EDatabase::LogoBlobs && item.RecordType == T::ERecordType::HugeBlob) {
63-
const TDiskPart& part = item.Location;
64-
it->second->SetCorruptedArea(part.ChunkIdx, part.Offset, part.Offset + part.Size, true);
65-
break;
85+
const TActorId sender = runtime->AllocateEdgeActor(vdiskActorId.NodeId());
86+
env.Runtime->Send(new IEventHandle(vdiskActorId, sender, new TEvBlobStorage::TEvCaptureVDiskLayout), sender.NodeId());
87+
auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvCaptureVDiskLayoutResult>(sender);
88+
89+
for (auto& item : res->Get()->Layout) {
90+
using T = TEvBlobStorage::TEvCaptureVDiskLayoutResult;
91+
if (item.Database == T::EDatabase::LogoBlobs && item.RecordType == T::ERecordType::HugeBlob && item.BlobId.FullID() == id) {
92+
const TDiskPart& part = item.Location;
93+
it->second->SetCorruptedArea(part.ChunkIdx, part.Offset, part.Offset + 1 + RandomNumber(part.Size), true);
94+
break;
95+
}
6696
}
97+
98+
checkReadable();
6799
}
68100

69-
checkReadable(NKikimrProto::OK);
101+
env.Sim(TDuration::Seconds(60));
70102
}
71-
72-
env.Sim(TDuration::Seconds(60));
73103
}
74104

75105
Y_UNIT_TEST_SUITE(ScrubFast) {
76106
Y_UNIT_TEST(SingleBlob) {
77107
Test();
78108
}
79109
}
110+

ydb/core/blobstorage/vdisk/scrub/blob_recovery_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,15 @@ namespace NKikimr {
9999

100100
// a map to fill upon receiving VGet result
101101
struct TPerBlobInfo {
102-
const TInstant Deadline;
103102
std::weak_ptr<TInFlightContext> Context;
104103
TEvRecoverBlobResult::TItem *Item; // item to update
105104
ui32 BlobReplyCounter = 0; // number of unreplied queries for this blob
106105
};
107106
std::unordered_multimap<TLogoBlobID, TPerBlobInfo, THash<TLogoBlobID>> VGetResultMap;
107+
std::set<std::tuple<TVDiskIdShort, TLogoBlobID>> GetsInFlight;
108108

109109
void AddBlobQuery(const TLogoBlobID& id, NMatrix::TVectorType needed, const std::shared_ptr<TInFlightContext>& context, TEvRecoverBlobResult::TItem *item);
110-
void AddExtremeQuery(const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 worstReplySize);
110+
void AddExtremeQuery(const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 idxInSubgroup);
111111
void SendPendingQueries();
112112
void Handle(TEvBlobStorage::TEvVGetResult::TPtr ev);
113113
NKikimrProto::EReplyStatus ProcessItemData(TEvRecoverBlobResult::TItem& item);

ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp

Lines changed: 73 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,46 +7,56 @@ namespace NKikimr {
77
STLOG(PRI_DEBUG, BS_VDISK_SCRUB, VDS32, VDISKP(LogPrefix, "AddBlobQuery"), (SelfId, SelfId()),
88
(Id, id), (Needed, needed), (RequestId, context->RequestId));
99
const TInstant deadline = context->Iterator->first;
10-
const TBlobStorageGroupType& gtype = Info->Type;
1110
TBlobStorageGroupInfo::TOrderNums nums;
1211
Info->GetTopology().PickSubgroup(id.Hash(), nums);
1312
ui32 blobReplyCounter = 0;
1413
for (ui32 i = 0; i < nums.size(); ++i) {
1514
const TVDiskID& vdiskId = Info->GetVDiskId(i); // obtain VDisk
16-
if (TVDiskIdShort(vdiskId) == VCtx->ShortSelfVDisk) {
17-
continue;
15+
if (TVDiskIdShort(vdiskId) != VCtx->ShortSelfVDisk) {
16+
AddExtremeQuery(vdiskId, id, deadline, i);
17+
++blobReplyCounter;
1818
}
19+
}
20+
VGetResultMap.emplace(id, TPerBlobInfo{context, item, blobReplyCounter});
21+
}
22+
23+
void TBlobRecoveryActor::AddExtremeQuery(const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 idxInSubgroup) {
24+
const auto [_, inserted] = GetsInFlight.emplace(vdiskId, id);
25+
26+
ui32 worstReplySize = 0;
27+
if (inserted) {
28+
const TBlobStorageGroupType& gtype = Info->Type;
1929
switch (TIngress::IngressMode(gtype)) {
2030
case TIngress::EMode::GENERIC:
21-
ui32 maxSize;
22-
maxSize = 0;
2331
if (gtype.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc) {
24-
maxSize += gtype.PartSize(TLogoBlobID(id, i % 3 + 1));
32+
worstReplySize = gtype.PartSize(TLogoBlobID(id, idxInSubgroup % 3 + 1));
2533
} else {
2634
for (ui32 k = 0; k < gtype.TotalPartCount(); ++k) {
27-
maxSize += i >= gtype.TotalPartCount() || k == i ? gtype.PartSize(TLogoBlobID(id, k + 1)) : 0;
35+
worstReplySize += idxInSubgroup >= gtype.TotalPartCount() || k == idxInSubgroup
36+
? gtype.PartSize(TLogoBlobID(id, k + 1)) : 0;
2837
}
2938
}
30-
AddExtremeQuery(vdiskId, id, deadline, maxSize);
3139
break;
3240

3341
case TIngress::EMode::MIRROR3OF4:
34-
AddExtremeQuery(vdiskId, id, deadline, gtype.PartSize(TLogoBlobID(id, 1)) +
35-
gtype.PartSize(TLogoBlobID(id, 2)));
42+
for (ui32 i = 0; i < 2; ++i) {
43+
if (idxInSubgroup % 2 == i || idxInSubgroup >= 4) {
44+
worstReplySize += gtype.PartSize(TLogoBlobID(id, i + 1));
45+
}
46+
}
3647
break;
3748
}
38-
++blobReplyCounter;
3949
}
40-
VGetResultMap.emplace(id, TPerBlobInfo{context->Iterator->first, context, item, blobReplyCounter});
41-
}
4250

43-
void TBlobRecoveryActor::AddExtremeQuery(const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 worstReplySize) {
4451
STLOG(PRI_DEBUG, BS_VDISK_SCRUB, VDS33, VDISKP(LogPrefix, "AddExtremeQuery"), (SelfId, SelfId()),
45-
(VDiskId, vdiskId), (Id, id), (WorstReplySize, worstReplySize));
52+
(VDiskId, vdiskId), (Id, id), (WorstReplySize, worstReplySize), (AlreadyInFlight, !inserted));
53+
if (!inserted) { // the request is already in flight
54+
return;
55+
}
4656

4757
TQuery& query = Queries[vdiskId];
4858

49-
const ui32 maxReplySize = 10000000; // FIXME
59+
const ui32 maxReplySize = 32_MB;
5060
if (query.VGet && query.WorstReplySize + worstReplySize > maxReplySize) { // send the request on overflow
5161
query.Pending.push_back(std::move(query.VGet));
5262
query.WorstReplySize = 0;
@@ -79,42 +89,67 @@ namespace NKikimr {
7989
STLOG(PRI_DEBUG, BS_VDISK_SCRUB, VDS35, VDISKP(LogPrefix, "received TEvVGetResult"), (SelfId, SelfId()),
8090
(Msg, ev->Get()->ToString()));
8191

92+
const TInstant now = TActivationContext::Now();
8293
const auto& record = ev->Get()->Record;
94+
const TVDiskID vdiskId = VDiskIDFromVDiskID(record.GetVDiskID());
95+
std::unordered_map<TLogoBlobID, TInstant, THash<TLogoBlobID>> rerequest;
96+
std::unordered_set<TLogoBlobID> done;
97+
8398
for (const auto& res : record.GetResult()) {
8499
const TLogoBlobID& id = LogoBlobIDFromLogoBlobID(res.GetBlobID());
85100
const TLogoBlobID& fullId = id.FullID(); // whole blob id
86-
auto r = VGetResultMap.equal_range(fullId);
87-
for (auto it = r.first; it != r.second; ) {
101+
done.insert(fullId);
102+
const NKikimrProto::EReplyStatus status = res.GetStatus();
103+
auto [begin, end] = VGetResultMap.equal_range(fullId);
104+
for (auto it = begin; it != end; ) {
88105
TPerBlobInfo& info = it->second;
89106
if (auto context = info.Context.lock()) { // context acquired, request is still intact
90-
auto& item = *info.Item; // only here we can access item, after obtaining context pointer
91-
TRope data = ev->Get()->GetBlobData(res);
92-
bool update = false;
93-
if (res.GetStatus() == NKikimrProto::OK && data) {
94-
item.SetPartData(id, std::move(data));
95-
update = true;
96-
}
97-
const bool term = !--info.BlobReplyCounter;
98-
if (item.Status == NKikimrProto::UNKNOWN && (term || update)) {
99-
const NKikimrProto::EReplyStatus prevStatus = std::exchange(item.Status, ProcessItemData(item));
100-
if (item.Status == NKikimrProto::UNKNOWN && term) { // not enough parts to fulfill request
101-
item.Status = NKikimrProto::NODATA;
107+
if (status == NKikimrProto::DEADLINE && now < context->Iterator->first) {
108+
auto& deadline = rerequest[fullId];
109+
deadline = Max(deadline, context->Iterator->first);
110+
} else {
111+
auto& item = *info.Item; // only here we can access item, after obtaining context pointer
112+
TRope data = ev->Get()->GetBlobData(res);
113+
bool update = false;
114+
if (res.GetStatus() == NKikimrProto::OK && data) {
115+
item.SetPartData(id, std::move(data));
116+
update = true;
117+
}
118+
const bool term = !--info.BlobReplyCounter;
119+
if (item.Status == NKikimrProto::UNKNOWN && (term || update)) {
120+
const NKikimrProto::EReplyStatus prevStatus = std::exchange(item.Status, ProcessItemData(item));
121+
if (item.Status == NKikimrProto::UNKNOWN && term) { // not enough parts to fulfill request
122+
item.Status = NKikimrProto::NODATA;
123+
}
124+
STLOG(PRI_DEBUG, BS_VDISK_SCRUB, VDS36, VDISKP(LogPrefix, "processing item"),
125+
(SelfId, SelfId()), (RequestId, context->RequestId), (Id, id),
126+
(Status, res.GetStatus()), (Last, term), (DataUpdated, update),
127+
(EntryStatus, prevStatus), (ExitStatus, item.Status));
128+
}
129+
if (item.Status != NKikimrProto::UNKNOWN && !--context->NumUnrespondedBlobs) { // request fully completed
130+
context->SendResult(SelfId());
131+
InFlight.erase(context->Iterator);
132+
}
133+
if (term) { // this was the last reply for current blob
134+
it = VGetResultMap.erase(it);
135+
continue;
102136
}
103-
STLOG(PRI_DEBUG, BS_VDISK_SCRUB, VDS36, VDISKP(LogPrefix, "processing item"),
104-
(SelfId, SelfId()), (RequestId, context->RequestId), (Id, id),
105-
(Status, res.GetStatus()), (Last, term), (DataUpdated, update),
106-
(EntryStatus, prevStatus), (ExitStatus, item.Status));
107-
}
108-
if (item.Status != NKikimrProto::UNKNOWN && !--context->NumUnrespondedBlobs) {
109-
context->SendResult(SelfId());
110-
InFlight.erase(context->Iterator);
111137
}
112138
++it;
113139
} else { // request deadlined or canceled, we erase it from the map
114140
it = VGetResultMap.erase(it);
115141
}
116142
}
117143
}
144+
145+
for (const auto& id : done) {
146+
const size_t n = GetsInFlight.erase(std::make_tuple(vdiskId, id));
147+
Y_DEBUG_ABORT_UNLESS(n == 1);
148+
}
149+
for (const auto& [id, deadline] : rerequest) {
150+
AddExtremeQuery(vdiskId, id, deadline, Info->GetTopology().GetIdxInSubgroup(vdiskId, id.Hash()));
151+
}
152+
SendPendingQueries();
118153
}
119154

120155
NKikimrProto::EReplyStatus TBlobRecoveryActor::ProcessItemData(TEvRecoverBlobResult::TItem& item) {

0 commit comments

Comments
 (0)