Skip to content

Commit a70152e

Browse files
committed
Check space on replication writes to PDisk
1 parent d44df6c commit a70152e

File tree

10 files changed

+370
-75
lines changed

10 files changed

+370
-75
lines changed

ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,19 @@ struct TPDiskMockState::TImpl {
9595
}
9696
}
9797

98-
void UpdateStatusFlags(i64 chunksDelta) {
98+
void UpdateStatusFlags() {
9999
switch (SpaceColorPolicy) {
100100
case ESpaceColorPolicy::SharedQuota: {
101-
NKikimrBlobStorage::TPDiskSpaceColor::E newColor =
102-
ChunkSharedQuota->EstimateSpaceColor(chunksDelta, &Occupancy);
103-
if (chunksDelta < 0) {
104-
ChunkSharedQuota->Release(-chunksDelta);
105-
} else if (chunksDelta > 0) {
106-
ChunkSharedQuota->ForceAllocate(chunksDelta);
101+
i64 before = ChunkSharedQuota->GetFree();
102+
i64 now = GetNumFreeChunks();
103+
if (before < now) {
104+
ChunkSharedQuota->Release(now - before);
105+
} else if (before > now) {
106+
ChunkSharedQuota->ForceAllocate(before - now);
107107
}
108+
109+
NKikimrBlobStorage::TPDiskSpaceColor::E newColor =
110+
ChunkSharedQuota->EstimateSpaceColor(0, &Occupancy);
108111
SetStatusFlags(SpaceColorToStatusFlag(newColor));
109112
break;
110113
}
@@ -128,7 +131,6 @@ struct TPDiskMockState::TImpl {
128131

129132
Y_ABORT_UNLESS(chunkIdx != TotalChunks);
130133

131-
UpdateStatusFlags(1);
132134
return chunkIdx;
133135
}
134136

@@ -213,7 +215,6 @@ struct TPDiskMockState::TImpl {
213215
owner.ChunkData.erase(chunkIdx);
214216
}
215217

216-
UpdateStatusFlags(-(i64)owner.ReservedChunks.size());
217218
FreeChunks.merge(owner.ReservedChunks);
218219
AdjustFreeChunks();
219220
}
@@ -232,7 +233,6 @@ struct TPDiskMockState::TImpl {
232233
const bool inserted = FreeChunks.insert(chunkIdx).second;
233234
Y_ABORT_UNLESS(inserted);
234235

235-
UpdateStatusFlags(-1);
236236
AdjustFreeChunks();
237237
}
238238

@@ -520,8 +520,6 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
520520
break;
521521
} else {
522522
owner.Slain = true;
523-
Impl.UpdateStatusFlags(-(i64)owner.ReservedChunks.size());
524-
Impl.UpdateStatusFlags(-(i64)owner.CommittedChunks.size());
525523
Impl.FreeChunks.merge(owner.ReservedChunks);
526524
Impl.FreeChunks.merge(owner.CommittedChunks);
527525
Impl.AdjustFreeChunks();
@@ -717,6 +715,7 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
717715
if (Impl.GetNumFreeChunks() < msg->SizeChunks) {
718716
PDISK_MOCK_LOG(NOTICE, PDM09, "received TEvChunkReserve", (Msg, msg->ToString()), (Error, "no free chunks"));
719717
res->Status = NKikimrProto::OUT_OF_SPACE;
718+
res->StatusFlags = GetStatusFlags() | ui32(NKikimrBlobStorage::StatusNotEnoughDiskSpaceForOperation);
720719
res->ErrorReason = "no free chunks";
721720
} else {
722721
PDISK_MOCK_LOG(DEBUG, PDM07, "received TEvChunkReserve", (Msg, msg->ToString()), (VDiskId, owner->VDiskId));
@@ -790,9 +789,11 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
790789
if (!msg->ChunkIdx) { // allocate chunk
791790
if (!Impl.GetNumFreeChunks()) {
792791
res->Status = NKikimrProto::OUT_OF_SPACE;
792+
res->StatusFlags = GetStatusFlags() | ui32(NKikimrBlobStorage::StatusNotEnoughDiskSpaceForOperation);
793793
res->ErrorReason = "no free chunks";
794794
} else {
795795
msg->ChunkIdx = res->ChunkIdx = Impl.AllocateChunk(*owner);
796+
res->StatusFlags = GetStatusFlags();
796797
}
797798
}
798799
if (msg->ChunkIdx) {
@@ -931,6 +932,7 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
931932
}
932933

933934
NPDisk::TStatusFlags GetStatusFlags() {
935+
Impl.UpdateStatusFlags();
934936
return Impl.StatusFlags;
935937
}
936938

ydb/core/blobstorage/ut_blobstorage/lib/env.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct TEnvironmentSetup {
5757
const ui32 ReplMaxQuantumBytes = 0;
5858
const ui32 ReplMaxDonorNotReadyCount = 0;
5959
const ui64 PDiskSize = 10_TB;
60+
const ui64 PDiskChunkSize = 0;
6061
const bool TrackSharedQuotaInPDiskMock = false;
6162
};
6263

@@ -75,10 +76,11 @@ struct TEnvironmentSetup {
7576
const auto key = std::make_pair(nodeId, pdiskId);
7677
TIntrusivePtr<TPDiskMockState>& state = Env.PDiskMockStates[key];
7778
if (!state) {
79+
ui64 chunkSize = Env.Settings.PDiskChunkSize ? Env.Settings.PDiskChunkSize : cfg->ChunkSize;
7880
TPDiskMockState::ESpaceColorPolicy spaceColorPolicy = Env.Settings.TrackSharedQuotaInPDiskMock
7981
? TPDiskMockState::ESpaceColorPolicy::SharedQuota
8082
: TPDiskMockState::ESpaceColorPolicy::None;
81-
state.Reset(new TPDiskMockState(nodeId, pdiskId, cfg->PDiskGuid, Env.Settings.PDiskSize, cfg->ChunkSize,
83+
state.Reset(new TPDiskMockState(nodeId, pdiskId, cfg->PDiskGuid, Env.Settings.PDiskSize, chunkSize,
8284
cfg->ReadOnly, Env.Settings.DiskType, spaceColorPolicy));
8385
}
8486
const TActorId& actorId = ctx.Register(CreatePDiskMockActor(state), TMailboxType::HTSwap, poolId);

ydb/core/blobstorage/ut_blobstorage/replication.cpp

Lines changed: 157 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -359,11 +359,12 @@ Y_UNIT_TEST_SUITE(Replication) {
359359

360360
struct TTestCtx : public TTestCtxBase {
361361
public:
362-
TTestCtx(TBlobStorageGroupType erasure, ui64 pdiskSize, ui32 groupsCount = 3)
362+
TTestCtx(TBlobStorageGroupType erasure, ui64 pdiskSize, ui32 groupsCount)
363363
: TTestCtxBase(TEnvironmentSetup::TSettings{
364364
.NodeCount = erasure.BlobSubgroupSize(),
365365
.Erasure = erasure,
366366
.PDiskSize = pdiskSize,
367+
.PDiskChunkSize = 32_MB,
367368
.TrackSharedQuotaInPDiskMock = true,
368369
})
369370
, PDiskSize(pdiskSize)
@@ -393,58 +394,148 @@ struct TTestCtx : public TTestCtxBase {
393394
};
394395

395396
Y_UNIT_TEST_SUITE(ReplicationSpace) {
396-
void TestSpace() {
397+
398+
struct TVDiskStats {
399+
double Occupancy;
400+
bool IsReplicated;
401+
};
402+
403+
TVDiskID VDiskIdFromVSlot(const NKikimrBlobStorage::TBaseConfig::TVSlot& vslot) {
404+
return TVDiskID(vslot.GetGroupId(), vslot.GetGroupGeneration(), vslot.GetFailRealmIdx(),
405+
vslot.GetFailDomainIdx(), vslot.GetVDiskIdx());;
406+
}
407+
408+
void TestSpace(ui64 diskSize, ui64 blobSize, float usedSpaceFraction, bool donorMode) {
397409
TBlobStorageGroupType erasure = TBlobStorageGroupType::ErasureMirror3dc;
398-
ui64 diskSize = 3_GB;
399-
TTestCtx ctx(erasure, diskSize);
410+
TTestCtx ctx(erasure, diskSize, 2);
400411
ctx.Initialize();
401412

402-
ui32 chosenNodeId = 0;
413+
// disable self-heal
414+
ctx.Env->UpdateSettings(false, donorMode, false);
415+
416+
ui64 perDiskDataSize = diskSize * usedSpaceFraction;
417+
ui64 dataSize = perDiskDataSize;
418+
419+
// assure that all groups are green
420+
for (ui32 groupId : ctx.Groups) {
421+
auto status = ctx.GetGroupStatus(groupId);
422+
UNIT_ASSERT(status->Get()->Status == NKikimrProto::OK);
423+
Ctest << "Group# " << groupId << " Status# " << status->Get()->ToString() << Endl;
424+
UNIT_ASSERT(!status->Get()->StatusFlags.Check(NKikimrBlobStorage::StatusDiskSpaceCyan));
425+
}
426+
427+
// write data
428+
for (ui32 groupId : ctx.Groups) {
429+
ctx.WriteCompressedData(TTestCtxBase::TDataProfile{
430+
.GroupId = groupId,
431+
.TotalSize = dataSize,
432+
.BlobSize = blobSize,
433+
.DelayBetweenPuts = TDuration::Seconds(1),
434+
.Erasure = erasure,
435+
.CookieStrategy = TTestCtxBase::TDataProfile::ECookieStrategy::WithSamePlacement,
436+
});
437+
Ctest << "Data written for group " << groupId << Endl;
438+
}
439+
440+
// wait for compaction to finish
441+
ctx.Env->Sim(TDuration::Minutes(360));
442+
443+
// assure that all groups are green
444+
for (ui32 groupId : ctx.Groups) {
445+
auto status = ctx.GetGroupStatus(groupId);
446+
UNIT_ASSERT(status->Get()->Status == NKikimrProto::OK);
447+
Ctest << "Group# " << groupId << " Status# " << status->Get()->ToString() << Endl;
448+
UNIT_ASSERT(!status->Get()->StatusFlags.Check(NKikimrBlobStorage::StatusDiskSpaceCyan));
449+
}
450+
451+
auto getVDiskStats = [&](const TVDiskID& vdiskId) -> TVDiskStats {
452+
double occupancy;
453+
bool isReplicated;
454+
ctx.Env->WithQueueId(vdiskId, NKikimrBlobStorage::EVDiskQueueId::PutTabletLog, [&](TActorId queueId) {
455+
ctx.Env->Runtime->Send(new IEventHandle(queueId, ctx.Edge, new TEvBlobStorage::TEvVStatus()), queueId.NodeId());
456+
auto res = ctx.Env->WaitForEdgeActorEvent<TEvBlobStorage::TEvVStatusResult>(ctx.Edge, false, TInstant::Max());
457+
occupancy = 1 - res->Get()->Record.GetApproximateFreeSpaceShare();
458+
isReplicated = res->Get()->Record.GetReplicated();
459+
});
460+
461+
return { occupancy, isReplicated };
462+
};
463+
464+
TVDiskID chosenVDiskId;
403465
ui32 chosenPDiskId = 0;
466+
ui32 chosenNodeId = 0;
404467

405-
for (const auto& vslot : ctx.BaseConfig.GetVSlot()) {
406-
if (vslot.GetGroupId() == ctx.Groups[0]) {
407-
chosenNodeId = vslot.GetVSlotId().GetNodeId();
408-
chosenPDiskId = vslot.GetVSlotId().GetPDiskId();
409-
break;
468+
// reassign vdisk
469+
{
470+
// choose pdisk with low space
471+
ctx.FetchBaseConfig();
472+
for (const auto& vslot : ctx.BaseConfig.GetVSlot()) {
473+
if (vslot.GetGroupId() == ctx.Groups[0]) {
474+
TVDiskStats stats = getVDiskStats(VDiskIdFromVSlot(vslot));
475+
Ctest << "VDisk# " << VDiskIdFromVSlot(vslot).ToString() << " " << stats.Occupancy << " " << stats.IsReplicated << Endl;
476+
if (stats.Occupancy > 1 - usedSpaceFraction) {
477+
chosenNodeId = vslot.GetVSlotId().GetNodeId();
478+
chosenPDiskId = vslot.GetVSlotId().GetPDiskId();
479+
break;
480+
}
481+
}
410482
}
411-
}
483+
UNIT_ASSERT(chosenNodeId != 0);
412484

413-
UNIT_ASSERT(chosenNodeId != 0);
485+
NKikimrBlobStorage::TConfigRequest request;
486+
for (const auto& vslot : ctx.BaseConfig.GetVSlot()) {
487+
if (vslot.GetGroupId() == ctx.Groups[1]) {
488+
TVDiskID vdiskId = VDiskIdFromVSlot(vslot);
489+
TVDiskStats stats = getVDiskStats(vdiskId);
490+
if (stats.Occupancy > 1 - usedSpaceFraction) {
491+
chosenVDiskId = vdiskId;
492+
NKikimrBlobStorage::TReassignGroupDisk* cmd = request.AddCommand()->MutableReassignGroupDisk();
493+
cmd->SetGroupId(vslot.GetGroupId());
494+
cmd->SetGroupGeneration(vslot.GetGroupGeneration());
495+
cmd->SetFailRealmIdx(vslot.GetFailRealmIdx());
496+
cmd->SetFailDomainIdx(vslot.GetFailDomainIdx());
497+
cmd->SetVDiskIdx(vslot.GetVDiskIdx());
498+
auto* target = cmd->MutableTargetPDiskId();
499+
target->SetNodeId(chosenNodeId);
500+
target->SetPDiskId(chosenPDiskId);
501+
break;
502+
}
503+
}
504+
}
505+
auto res = ctx.Env->Invoke(request);
506+
UNIT_ASSERT_C(res.GetSuccess(), res.GetErrorDescription());
507+
UNIT_ASSERT_C(res.GetStatus(0).GetSuccess(), res.GetStatus(0).GetErrorDescription());
508+
}
414509

415-
// disable self-heal
416-
ctx.Env->UpdateSettings(false, true, false);
510+
Ctest << "Chosen PDisk# [" << chosenNodeId << ":" << chosenPDiskId <<
511+
"] chosen VDiskId# " << chosenVDiskId.ToString() << Endl;
417512

418-
ui64 perDiskDataSize = diskSize * 0.6;
419-
ui64 dataSize = perDiskDataSize * ctx.NodeCount / 3;
513+
// wait for replication to stuck
514+
ctx.Env->Sim(TDuration::Minutes(360));
420515

516+
// check that all groups are YELLOW at worst
421517
for (ui32 groupId : ctx.Groups) {
422-
ctx.WriteCompressedData(groupId, dataSize, 8_MB);
423-
Ctest << "DATA WRITTEN FOR GROUP " << groupId << Endl;
518+
auto status = ctx.GetGroupStatus(groupId);
519+
UNIT_ASSERT(status->Get()->Status == NKikimrProto::OK);
520+
Ctest << "Group# " << groupId << " Status# " << status->Get()->ToString() << Endl;
521+
UNIT_ASSERT(!status->Get()->StatusFlags.Check(NKikimrBlobStorage::StatusDiskSpacePreOrange));
424522
}
425523

426-
Ctest << "REASSIGN DISK" << Endl;
524+
// disable donor mode to free space immediately
525+
ctx.Env->UpdateSettings(false, false, false);
427526

428-
// find vdisk from another node and move it to the chosen
527+
// reassign second vdisk from chosen pdisk
429528
{
430529
NKikimrBlobStorage::TConfigRequest request;
431-
ui32 groupToMove = 1;
432530
for (const auto& vslot : ctx.BaseConfig.GetVSlot()) {
433-
if (vslot.GetGroupId() != ctx.Groups[groupToMove]) {
434-
continue;
435-
}
436-
const ui32 nodeId = vslot.GetVSlotId().GetNodeId();
437-
if (nodeId != chosenNodeId) {
438-
NKikimrBlobStorage::TReassignGroupDisk* cmd = request.AddCommand()->MutableReassignGroupDisk();
439-
cmd->SetGroupId(vslot.GetGroupId());
440-
cmd->SetGroupGeneration(vslot.GetGroupGeneration());
441-
cmd->SetFailRealmIdx(vslot.GetFailRealmIdx());
442-
cmd->SetFailDomainIdx(vslot.GetFailDomainIdx());
443-
cmd->SetVDiskIdx(vslot.GetVDiskIdx());
444-
auto* target = cmd->MutableTargetPDiskId();
445-
target->SetNodeId(chosenNodeId);
446-
target->SetPDiskId(chosenPDiskId);
447-
if (++groupToMove == ctx.GroupsCount) {
531+
if (vslot.GetGroupId() == ctx.Groups[0]) {
532+
if (vslot.GetVSlotId().GetNodeId() == chosenNodeId && vslot.GetVSlotId().GetPDiskId() == chosenPDiskId) {
533+
NKikimrBlobStorage::TReassignGroupDisk* cmd = request.AddCommand()->MutableReassignGroupDisk();
534+
cmd->SetGroupId(vslot.GetGroupId());
535+
cmd->SetGroupGeneration(vslot.GetGroupGeneration());
536+
cmd->SetFailRealmIdx(vslot.GetFailRealmIdx());
537+
cmd->SetFailDomainIdx(vslot.GetFailDomainIdx());
538+
cmd->SetVDiskIdx(vslot.GetVDiskIdx());
448539
break;
449540
}
450541
}
@@ -453,21 +544,39 @@ Y_UNIT_TEST_SUITE(ReplicationSpace) {
453544
UNIT_ASSERT_C(res.GetSuccess(), res.GetErrorDescription());
454545
UNIT_ASSERT_C(res.GetStatus(0).GetSuccess(), res.GetStatus(0).GetErrorDescription());
455546
}
456-
Ctest << "DISK REASSIGNED" << Endl;
457547

458-
ctx.Env->Sim(TDuration::Minutes(3600));
548+
Ctest << "Evicting second VDisk" << Endl;
549+
550+
// wait for replication
551+
ctx.Env->Sim(TDuration::Hours(12));
459552

460-
ctx.Env->Runtime->WrapInActorContext(ctx.Edge, [&] {
461-
SendToBSProxy(ctx.Edge, ctx.GroupId, new TEvBlobStorage::TEvStatus(TInstant::Max()));
462-
});
463-
auto res = ctx.Env->WaitForEdgeActorEvent<TEvBlobStorage::TEvStatusResult>(ctx.Edge,
464-
false, TInstant::Max());
465-
UNIT_ASSERT(res->Get()->Status == NKikimrProto::OK);
466-
Ctest << "FLAGS " << res->Get()->ToString() << Endl;
467-
UNIT_ASSERT(!res->Get()->StatusFlags.Check(NKikimrBlobStorage::StatusDiskSpaceRed));
553+
// check that chosen VDisk finished replication
554+
{
555+
ctx.FetchBaseConfig();
556+
for (const auto& vslot : ctx.BaseConfig.GetVSlot()) {
557+
if (vslot.GetGroupId() == ctx.Groups[1]) {
558+
TVDiskID vdiskId = VDiskIdFromVSlot(vslot);
559+
TVDiskStats stats = getVDiskStats(vdiskId);
560+
UNIT_ASSERT_C(stats.IsReplicated, "Unreplicated VDiskId# " << vdiskId.ToString()
561+
<< " Occupancy# " << stats.Occupancy);
562+
}
563+
}
564+
}
565+
}
566+
567+
Y_UNIT_TEST(HugeBlobsWithDonor) {
568+
TestSpace(4_GB, 8_MB, 0.5, true);
569+
}
570+
571+
Y_UNIT_TEST(SmallBlobsWithDonor) {
572+
TestSpace(4_GB, 100_KB, 0.5, true);
573+
}
574+
575+
Y_UNIT_TEST(HugeBlobsNoDonor) {
576+
TestSpace(4_GB, 8_MB, 0.5, false);
468577
}
469578

470-
Y_UNIT_TEST(Mirror3dc) {
471-
TestSpace();
579+
Y_UNIT_TEST(SmallBlobsNoDonor) {
580+
TestSpace(4_GB, 100_KB, 0.5, false);
472581
}
473582
}

0 commit comments

Comments
 (0)