Skip to content

Commit 0fd7910

Browse files
correct counters
1 parent fd60ff0 commit 0fd7910

File tree

5 files changed

+184
-223
lines changed

5 files changed

+184
-223
lines changed

ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,55 @@ class TTabletByBlob {
4444

4545
};
4646

47+
class TBlobsByGenStep {
48+
private:
49+
struct Comparator {
50+
bool operator<(const TLogoBlobID& l, const TLogoBlobID& r) const {
51+
TGenStep gsl(l);
52+
TGenStep gsr(l);
53+
if (gsl == gsr) {
54+
return l < r;
55+
} else {
56+
return gsl < gsr;
57+
}
58+
}
59+
};
60+
std::set<TLogoBlobID, Comparator> Blobs;
61+
public:
62+
[[nodiscard]] bool Add(const TLogoBlobID& blobId) {
63+
return Blobs.emplace(blobId).second;
64+
}
65+
[[nodiscard]] bool Remove(const TLogoBlobID& blobId) {
66+
return Blobs.erase(blobId);
67+
}
68+
ui32 GetSize() const {
69+
return Blobs.size();
70+
}
71+
72+
TGenStep GetMinGenStepVerified() const {
73+
AFL_VERIFY(Blobs.size());
74+
return TGenStep(*Blobs.begin());
75+
}
76+
77+
template <class TActor>
78+
bool ExtractFront(const TGenStep border, const ui32 countLimit, const TActor& actor) {
79+
ui32 idx = 0;
80+
for (auto it = Blobs.begin(); it != Blobs.end(); ++it) {
81+
TGenStep gs(*it);
82+
if (border < gs) {
83+
return true;
84+
}
85+
if (++idx > countLimit) {
86+
Blobs.erase(Blobs.begin(), it);
87+
return false;
88+
}
89+
actor(gs, *it);
90+
}
91+
Blobs.clear();
92+
return true;
93+
}
94+
};
95+
4796
class TTabletsByBlob {
4897
private:
4998
THashMap<TUnifiedBlobId, THashSet<TTabletId>> Data;

ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp

Lines changed: 57 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ TBlobManager::TBlobManager(TIntrusivePtr<TTabletStorageInfo> tabletInfo, ui32 ge
129129
, CurrentGen(gen)
130130
, CurrentStep(0)
131131
{
132+
BlobsManagerCounters.CurrentGen->Set(CurrentGen);
133+
BlobsManagerCounters.CurrentStep->Set(CurrentStep);
132134
}
133135

134136
void TBlobManager::RegisterControls(NKikimr::TControlBoard& /*icb*/) {
@@ -151,43 +153,19 @@ bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) {
151153
return false;
152154
}
153155

154-
for (auto it = BlobsToDelete.GetIterator(); it.IsValid(); ++it) {
155-
BlobsManagerCounters.OnDeleteBlobMarker(it.GetBlobId().BlobSize());
156-
}
157-
BlobsManagerCounters.OnBlobsDelete(BlobsToDelete);
156+
BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete);
158157

159158
// Build the list of steps that cannot be garbage collected before Keep flag is set on the blobs
160-
THashSet<TGenStep> genStepsWithBlobsToKeep;
161-
std::map<TGenStep, std::set<TLogoBlobID>> blobsToKeepLocal;
159+
TBlobsByGenStep blobsToKeepLocal;
162160
for (const auto& unifiedBlobId : blobsToKeep) {
163161
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("add_blob_to_keep", unifiedBlobId.ToStringNew());
164162
TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId();
165-
TGenStep genStep(blobId);
166-
Y_ABORT_UNLESS(LastCollectedGenStep < genStep);
167-
168-
AFL_VERIFY(blobsToKeepLocal[genStep].emplace(blobId).second)("blob_to_keep_double", unifiedBlobId.ToStringNew());
169-
BlobsManagerCounters.OnKeepMarker(blobId.BlobSize());
170-
const ui64 groupId = dsGroupSelector.GetGroup(blobId);
171-
// Keep + DontKeep (probably in different gen:steps)
172-
// GC could go through it to a greater LastCollectedGenStep
173-
if (BlobsToDelete.Contains(SelfTabletId, TUnifiedBlobId(groupId, blobId))) {
174-
continue;
175-
}
163+
Y_ABORT_UNLESS(LastCollectedGenStep < TGenStep(blobId));
176164

177-
genStepsWithBlobsToKeep.insert(genStep);
165+
AFL_VERIFY(blobsToKeepLocal.Add(blobId))("blob_to_keep_double", unifiedBlobId.ToStringNew());
178166
}
179167
std::swap(blobsToKeepLocal, BlobsToKeep);
180-
BlobsManagerCounters.OnBlobsKeep(BlobsToKeep);
181-
182-
AllocatedGenSteps.clear();
183-
for (const auto& gs : genStepsWithBlobsToKeep) {
184-
AllocatedGenSteps.push_back(new TAllocatedGenStep(gs));
185-
}
186-
AllocatedGenSteps.push_back(new TAllocatedGenStep({ CurrentGen, 0 }));
187-
188-
Sort(AllocatedGenSteps.begin(), AllocatedGenSteps.end(), [](const TAllocatedGenStepConstPtr& a, const TAllocatedGenStepConstPtr& b) {
189-
return a->GenStep < b->GenStep;
190-
});
168+
BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep);
191169

192170
return true;
193171
}
@@ -280,50 +258,47 @@ void TBlobManager::DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext) {
280258
}
281259
}
282260

283-
bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity) {
284-
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.size());
285-
for (; BlobsToKeep.size() && (!controlCapacity || !gcContext.IsFull()); BlobsToKeep.erase(BlobsToKeep.begin())) {
286-
auto gsBlobs = BlobsToKeep.begin();
287-
TGenStep genStep = gsBlobs->first;
261+
bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext) {
262+
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.GetSize());
263+
264+
const auto pred = [&](const TGenStep& genStep, const TLogoBlobID& logoBlobId) {
288265
AFL_VERIFY(LastCollectedGenStep < genStep)("last", LastCollectedGenStep.ToString())("gen", genStep.ToString());
289-
if (dest < genStep) {
290-
return true;
291-
}
292-
for (auto&& keepBlobIt : gsBlobs->second) {
293-
const ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt.Channel(), keepBlobIt.Generation());
294-
TBlobAddress bAddress(blobGroup, keepBlobIt.Channel());
295-
const TUnifiedBlobId keepUnified(blobGroup, keepBlobIt);
296-
gcContext.MutableKeepsToErase().emplace_back(keepUnified);
297-
if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) {
298-
if (keepBlobIt.Generation() == CurrentGen) {
299-
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew());
300-
continue;
301-
}
302-
if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) {
303-
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew());
304-
continue;
305-
}
306-
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew());
307-
gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(keepBlobIt);
308-
} else {
309-
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew());
310-
gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(keepBlobIt);
266+
const ui32 blobGroup = TabletInfo->GroupFor(logoBlobId.Channel(), logoBlobId.Generation());
267+
TBlobAddress bAddress(blobGroup, logoBlobId.Channel());
268+
const TUnifiedBlobId keepUnified(blobGroup, logoBlobId);
269+
gcContext.MutableKeepsToErase().emplace_back(keepUnified);
270+
if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) {
271+
if (logoBlobId.Generation() == CurrentGen) {
272+
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew());
273+
continue;
274+
}
275+
if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) {
276+
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew());
277+
continue;
311278
}
279+
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew());
280+
gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(logoBlobId);
281+
} else {
282+
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew());
283+
gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(logoBlobId);
312284
}
313-
}
314-
return BlobsToKeep.empty();
285+
};
286+
287+
return BlobsToKeep.ExtractFront(dest, gcContext.GetFreeSpace(), pred);
315288
}
316289

317290
std::shared_ptr<NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTask(const TString& storageId,
318291
const std::shared_ptr<TBlobManager>& manager, const std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>& sharedBlobsInfo,
319292
const std::shared_ptr<NBlobOperations::TRemoveGCCounters>& counters) noexcept {
320293
AFL_VERIFY(!CollectGenStepInFlight);
321-
if (BlobsToKeep.empty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) {
294+
if (BlobsToKeep.IsEmpty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) {
295+
BlobsManagerCounters.GCCounters.SkipCollectionEmpty->Add(1);
322296
ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep);
323297
return nullptr;
324298
}
325299

326300
if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod(TDuration::Seconds(GC_INTERVAL_SECONDS))) {
301+
BlobsManagerCounters.GCCounters.SkipCollectionThrottling->Add(1);
327302
return nullptr;
328303
}
329304

@@ -334,21 +309,12 @@ std::shared_ptr<NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTas
334309
AFL_VERIFY(newCollectGenSteps.size());
335310
AFL_VERIFY(newCollectGenSteps.front() == LastCollectedGenStep);
336311
if (GCBarrierPreparation != LastCollectedGenStep) {
337-
if (!GCBarrierPreparation.Generation()) {
338-
for (auto&& newCollectGenStep : newCollectGenSteps) {
339-
if (!DrainKeepTo(newCollectGenStep, gcContext)) {
340-
break;
341-
}
342-
if (newCollectGenStep.Generation() == CurrentGen) {
343-
CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep);
344-
}
345-
}
346-
AFL_VERIFY(LastCollectedGenStep <= CollectGenStepInFlight)("last", LastCollectedGenStep)("collect", CollectGenStepInFlight);
347-
} else {
348-
AFL_VERIFY(GCBarrierPreparation.Generation() != CurrentGen);
312+
if (GCBarrierPreparation.Generation()) {
313+
AFL_VERIFY(GCBarrierPreparation.Generation() < CurrentGen);
349314
AFL_VERIFY(LastCollectedGenStep <= GCBarrierPreparation);
350-
CollectGenStepInFlight = GCBarrierPreparation;
351-
AFL_VERIFY(DrainKeepTo(*CollectGenStepInFlight, gcContext, false));
315+
if (DrainKeepTo(GCBarrierPreparation, gcContext)) {
316+
CollectGenStepInFlight = GCBarrierPreparation;
317+
}
352318
}
353319
} else {
354320
DrainDeleteTo(LastCollectedGenStep, gcContext);
@@ -368,30 +334,33 @@ std::shared_ptr<NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTas
368334
gcContext.InitializeFirst(TabletInfo);
369335
FirstGC = false;
370336
}
337+
if (!BlobsToKeep.IsEmpty()) {
338+
AFL_VERIFY(*CollectGenStepInFlight < BlobsToKeep.GetMinGenStepVerified());
339+
}
340+
AFL_VERIFY(LastCollectedGenStep < *CollectGenStepInFlight);
371341
}
372-
AFL_VERIFY(LastCollectedGenStep <= *CollectGenStepInFlight);
373342
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", CollectGenStepInFlight)("current_gen", CurrentGen);
374343

375-
const bool isFull = gcContext.IsFull();
344+
if (gcContext.IsFull()) {
345+
PreviousGCTime = TInstant::Zero();
346+
}
376347

348+
BlobsManagerCounters.OnGCTask(gcContext.GetKeepsToErase().size(), gcContext.GetExtractedToRemoveFromDB().GetSize(), gcContext.IsFull(), !!CollectGenStepInFlight);
377349
auto removeCategories = sharedBlobsInfo->BuildRemoveCategories(std::move(gcContext.MutableExtractedToRemoveFromDB()));
378-
379350
auto result = std::make_shared<NBlobOperations::NBlobStorage::TGCTask>(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()),
380351
CollectGenStepInFlight, std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen);
381352
if (result->IsEmpty()) {
353+
BlobsManagerCounters.OnEmptyGCTask();
382354
CollectGenStepInFlight = {};
383355
return nullptr;
384356
}
385357

386-
if (isFull) {
387-
PreviousGCTime = TInstant::Zero();
388-
}
389-
390358
return result;
391359
}
392360

393361
TBlobBatch TBlobManager::StartBlobBatch() {
394362
++CurrentStep;
363+
BlobsManagerCounters.CurrentStep->Set(CurrentStep);
395364
AFL_VERIFY(TabletInfo->Channels.size() > 2);
396365
const auto& channel = TabletInfo->Channels[(CurrentStep % (TabletInfo->Channels.size() - 2)) + 2];
397366
++CountersUpdate.BatchesStarted;
@@ -419,11 +388,9 @@ void TBlobManager::DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) {
419388
AFL_VERIFY(genStep > edgeGenStep)("gen_step", genStep)("edge_gen_step", edgeGenStep)("blob_id", blobId.ToStringNew());
420389
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", logoBlobId.ToString());
421390

422-
BlobsManagerCounters.OnKeepMarker(logoBlobId.BlobSize());
423-
AFL_VERIFY(BlobsToKeep[genStep].emplace(logoBlobId).second);
391+
AFL_VERIFY(BlobsToKeep.Add(logoBlobId));
392+
BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep);
424393
}
425-
BlobsManagerCounters.OnBlobsKeep(BlobsToKeep);
426-
427394
blobBatch.BatchInfo->GenStepRef.Reset();
428395
}
429396

@@ -459,12 +426,11 @@ void TBlobManager::DeleteBlobOnComplete(const TTabletId tabletId, const TUnified
459426
if (!IsBlobInUsage(blobId)) {
460427
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId);
461428
AFL_VERIFY(BlobsToDelete.Add(tabletId, blobId));
462-
BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize());
463-
BlobsManagerCounters.OnBlobsDelete(BlobsToDelete);
429+
BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete);
464430
} else {
465-
BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize());
466431
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId);
467-
BlobsToDeleteDelayed.Add(tabletId, blobId);
432+
AFL_VERIFY(BlobsToDeleteDelayed.Add(tabletId, blobId));
433+
BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed);
468434
}
469435
}
470436

@@ -486,7 +452,7 @@ void TBlobManager::OnGCFinishedOnComplete(const std::optional<TGenStep>& genStep
486452

487453
void TBlobManager::OnGCStartOnExecute(const std::optional<TGenStep>& genStep, IBlobManagerDb& db) {
488454
if (genStep) {
489-
AFL_VERIFY(LastCollectedGenStep <= *genStep)("last", LastCollectedGenStep)("prepared", genStep);
455+
AFL_VERIFY(LastCollectedGenStep < *genStep)("last", LastCollectedGenStep)("prepared", genStep);
490456
db.SaveGCBarrierPreparation(*genStep);
491457
}
492458
}
@@ -503,8 +469,8 @@ void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) {
503469
// Check if the blob is marked for delayed deletion
504470
if (BlobsToDeleteDelayed.ExtractBlobTo(blobId, BlobsToDelete)) {
505471
AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("blob_id", blobId)("event", "blob_delayed_deleted");
506-
BlobsManagerCounters.OnBlobsDelete(BlobsToDelete);
507-
BlobsManagerCounters.OnDeleteBlobMarker(blobId.GetLogoBlobId().BlobSize());
472+
BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete);
473+
BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed);
508474
}
509475
}
510476

ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker {
145145
ui32 CurrentStep;
146146
std::optional<TGenStep> CollectGenStepInFlight;
147147
// Lists of blobs that need Keep flag to be set
148-
std::map<TGenStep, std::set<TLogoBlobID>> BlobsToKeep;
148+
TBlobsByGenStep BlobsToKeep;
149149
// Lists of blobs that need DoNotKeep flag to be set
150150
TTabletsByBlob BlobsToDelete;
151151

@@ -239,7 +239,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker {
239239
bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false);
240240

241241
TGenStep EdgeGenStep() const {
242-
return CollectGenStepInFlight ? *CollectGenStepInFlight : LastCollectedGenStep;
242+
return CollectGenStepInFlight ? *CollectGenStepInFlight : std::max(GCBarrierPreparation, LastCollectedGenStep);
243243
}
244244
};
245245

0 commit comments

Comments
 (0)