@@ -7,46 +7,56 @@ namespace NKikimr {
7
7
STLOG (PRI_DEBUG, BS_VDISK_SCRUB, VDS32, VDISKP (LogPrefix, " AddBlobQuery" ), (SelfId, SelfId ()),
8
8
(Id, id), (Needed, needed), (RequestId, context->RequestId ));
9
9
const TInstant deadline = context->Iterator ->first ;
10
- const TBlobStorageGroupType& gtype = Info->Type ;
11
10
TBlobStorageGroupInfo::TOrderNums nums;
12
11
Info->GetTopology ().PickSubgroup (id.Hash (), nums);
13
12
ui32 blobReplyCounter = 0 ;
14
13
for (ui32 i = 0 ; i < nums.size (); ++i) {
15
14
const TVDiskID& vdiskId = Info->GetVDiskId (i); // obtain VDisk
16
- if (TVDiskIdShort (vdiskId) == VCtx->ShortSelfVDisk ) {
17
- continue ;
15
+ if (TVDiskIdShort (vdiskId) != VCtx->ShortSelfVDisk ) {
16
+ AddExtremeQuery (vdiskId, id, deadline, i);
17
+ ++blobReplyCounter;
18
18
}
19
+ }
20
+ VGetResultMap.emplace (id, TPerBlobInfo{context, item, blobReplyCounter});
21
+ }
22
+
23
+ void TBlobRecoveryActor::AddExtremeQuery (const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 idxInSubgroup) {
24
+ const auto [_, inserted] = GetsInFlight.emplace (vdiskId, id);
25
+
26
+ ui32 worstReplySize = 0 ;
27
+ if (inserted) {
28
+ const TBlobStorageGroupType& gtype = Info->Type ;
19
29
switch (TIngress::IngressMode (gtype)) {
20
30
case TIngress::EMode::GENERIC:
21
- ui32 maxSize;
22
- maxSize = 0 ;
23
31
if (gtype.GetErasure () == TBlobStorageGroupType::ErasureMirror3dc) {
24
- maxSize + = gtype.PartSize (TLogoBlobID (id, i % 3 + 1 ));
32
+ worstReplySize = gtype.PartSize (TLogoBlobID (id, idxInSubgroup % 3 + 1 ));
25
33
} else {
26
34
for (ui32 k = 0 ; k < gtype.TotalPartCount (); ++k) {
27
- maxSize += i >= gtype.TotalPartCount () || k == i ? gtype.PartSize (TLogoBlobID (id, k + 1 )) : 0 ;
35
+ worstReplySize += idxInSubgroup >= gtype.TotalPartCount () || k == idxInSubgroup
36
+ ? gtype.PartSize (TLogoBlobID (id, k + 1 )) : 0 ;
28
37
}
29
38
}
30
- AddExtremeQuery (vdiskId, id, deadline, maxSize);
31
39
break ;
32
40
33
41
case TIngress::EMode::MIRROR3OF4:
34
- AddExtremeQuery (vdiskId, id, deadline, gtype.PartSize (TLogoBlobID (id, 1 )) +
35
- gtype.PartSize (TLogoBlobID (id, 2 )));
42
+ for (ui32 i = 0 ; i < 2 ; ++i) {
43
+ if (idxInSubgroup % 2 == i || idxInSubgroup >= 4 ) {
44
+ worstReplySize += gtype.PartSize (TLogoBlobID (id, i + 1 ));
45
+ }
46
+ }
36
47
break ;
37
48
}
38
- ++blobReplyCounter;
39
49
}
40
- VGetResultMap.emplace (id, TPerBlobInfo{context->Iterator ->first , context, item, blobReplyCounter});
41
- }
42
50
43
- void TBlobRecoveryActor::AddExtremeQuery (const TVDiskID& vdiskId, const TLogoBlobID& id, TInstant deadline, ui32 worstReplySize) {
44
51
STLOG (PRI_DEBUG, BS_VDISK_SCRUB, VDS33, VDISKP (LogPrefix, " AddExtremeQuery" ), (SelfId, SelfId ()),
45
- (VDiskId, vdiskId), (Id, id), (WorstReplySize, worstReplySize));
52
+ (VDiskId, vdiskId), (Id, id), (WorstReplySize, worstReplySize), (AlreadyInFlight, !inserted));
53
+ if (!inserted) { // the request is already in flight
54
+ return ;
55
+ }
46
56
47
57
TQuery& query = Queries[vdiskId];
48
58
49
- const ui32 maxReplySize = 10000000 ; // FIXME
59
+ const ui32 maxReplySize = 32_MB;
50
60
if (query.VGet && query.WorstReplySize + worstReplySize > maxReplySize) { // send the request on overflow
51
61
query.Pending .push_back (std::move (query.VGet ));
52
62
query.WorstReplySize = 0 ;
@@ -79,42 +89,67 @@ namespace NKikimr {
79
89
STLOG (PRI_DEBUG, BS_VDISK_SCRUB, VDS35, VDISKP (LogPrefix, " received TEvVGetResult" ), (SelfId, SelfId ()),
80
90
(Msg, ev->Get ()->ToString ()));
81
91
92
+ const TInstant now = TActivationContext::Now ();
82
93
const auto & record = ev->Get ()->Record ;
94
+ const TVDiskID vdiskId = VDiskIDFromVDiskID (record.GetVDiskID ());
95
+ std::unordered_map<TLogoBlobID, TInstant, THash<TLogoBlobID>> rerequest;
96
+ std::unordered_set<TLogoBlobID> done;
97
+
83
98
for (const auto & res : record.GetResult ()) {
84
99
const TLogoBlobID& id = LogoBlobIDFromLogoBlobID (res.GetBlobID ());
85
100
const TLogoBlobID& fullId = id.FullID (); // whole blob id
86
- auto r = VGetResultMap.equal_range (fullId);
87
- for (auto it = r.first ; it != r.second ; ) {
101
+ done.insert (fullId);
102
+ const NKikimrProto::EReplyStatus status = res.GetStatus ();
103
+ auto [begin, end] = VGetResultMap.equal_range (fullId);
104
+ for (auto it = begin; it != end; ) {
88
105
TPerBlobInfo& info = it->second ;
89
106
if (auto context = info.Context .lock ()) { // context acquired, request is still intact
90
- auto & item = *info.Item ; // only here we can access item, after obtaining context pointer
91
- TRope data = ev->Get ()->GetBlobData (res);
92
- bool update = false ;
93
- if (res.GetStatus () == NKikimrProto::OK && data) {
94
- item.SetPartData (id, std::move (data));
95
- update = true ;
96
- }
97
- const bool term = !--info.BlobReplyCounter ;
98
- if (item.Status == NKikimrProto::UNKNOWN && (term || update)) {
99
- const NKikimrProto::EReplyStatus prevStatus = std::exchange (item.Status , ProcessItemData (item));
100
- if (item.Status == NKikimrProto::UNKNOWN && term) { // not enough parts to fulfill request
101
- item.Status = NKikimrProto::NODATA;
107
+ if (status == NKikimrProto::DEADLINE && now < context->Iterator ->first ) {
108
+ auto & deadline = rerequest[fullId];
109
+ deadline = Max (deadline, context->Iterator ->first );
110
+ } else {
111
+ auto & item = *info.Item ; // only here we can access item, after obtaining context pointer
112
+ TRope data = ev->Get ()->GetBlobData (res);
113
+ bool update = false ;
114
+ if (res.GetStatus () == NKikimrProto::OK && data) {
115
+ item.SetPartData (id, std::move (data));
116
+ update = true ;
117
+ }
118
+ const bool term = !--info.BlobReplyCounter ;
119
+ if (item.Status == NKikimrProto::UNKNOWN && (term || update)) {
120
+ const NKikimrProto::EReplyStatus prevStatus = std::exchange (item.Status , ProcessItemData (item));
121
+ if (item.Status == NKikimrProto::UNKNOWN && term) { // not enough parts to fulfill request
122
+ item.Status = NKikimrProto::NODATA;
123
+ }
124
+ STLOG (PRI_DEBUG, BS_VDISK_SCRUB, VDS36, VDISKP (LogPrefix, " processing item" ),
125
+ (SelfId, SelfId ()), (RequestId, context->RequestId ), (Id, id),
126
+ (Status, res.GetStatus ()), (Last, term), (DataUpdated, update),
127
+ (EntryStatus, prevStatus), (ExitStatus, item.Status ));
128
+ }
129
+ if (item.Status != NKikimrProto::UNKNOWN && !--context->NumUnrespondedBlobs ) { // request fully completed
130
+ context->SendResult (SelfId ());
131
+ InFlight.erase (context->Iterator );
132
+ }
133
+ if (term) { // this was the last reply for current blob
134
+ it = VGetResultMap.erase (it);
135
+ continue ;
102
136
}
103
- STLOG (PRI_DEBUG, BS_VDISK_SCRUB, VDS36, VDISKP (LogPrefix, " processing item" ),
104
- (SelfId, SelfId ()), (RequestId, context->RequestId ), (Id, id),
105
- (Status, res.GetStatus ()), (Last, term), (DataUpdated, update),
106
- (EntryStatus, prevStatus), (ExitStatus, item.Status ));
107
- }
108
- if (item.Status != NKikimrProto::UNKNOWN && !--context->NumUnrespondedBlobs ) {
109
- context->SendResult (SelfId ());
110
- InFlight.erase (context->Iterator );
111
137
}
112
138
++it;
113
139
} else { // request deadlined or canceled, we erase it from the map
114
140
it = VGetResultMap.erase (it);
115
141
}
116
142
}
117
143
}
144
+
145
+ for (const auto & id : done) {
146
+ const size_t n = GetsInFlight.erase (std::make_tuple (vdiskId, id));
147
+ Y_DEBUG_ABORT_UNLESS (n == 1 );
148
+ }
149
+ for (const auto & [id, deadline] : rerequest) {
150
+ AddExtremeQuery (vdiskId, id, deadline, Info->GetTopology ().GetIdxInSubgroup (vdiskId, id.Hash ()));
151
+ }
152
+ SendPendingQueries ();
118
153
}
119
154
120
155
NKikimrProto::EReplyStatus TBlobRecoveryActor::ProcessItemData (TEvRecoverBlobResult::TItem& item) {
0 commit comments