Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ydb/core/cms/cms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
return false;
}

auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId);
auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId, TabletCounters);
counters->CountGroupState(ClusterInfo, State->Config.DefaultRetryTime, duration, error);

switch (opts.AvailabilityMode) {
Expand All @@ -942,10 +942,11 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
}
break;
case MODE_FORCE_RESTART:
if ( counters->GroupAlreadyHasLockedDisks() && opts.PartialPermissionAllowed) {
if (counters->GroupAlreadyHasLockedDisks() && !counters->GroupHasMoreThanOneDiskPerNode() && opts.PartialPermissionAllowed) {
TabletCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
error.Code = TStatus::DISALLOW_TEMP;
error.Reason = "You cannot get two or more disks from the same group at the same time"
" without specifying the PartialPermissionAllowed parameter";
" in partial permissions allowed mode";
error.Deadline = defaultDeadline;
return false;
}
Expand Down
21 changes: 21 additions & 0 deletions ydb/core/cms/cms_maintenance_api_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) {
UNIT_ASSERT_VALUES_EQUAL(a2.reason(), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
UNIT_ASSERT(a2.reason_details().Contains("too many unavailable vdisks"));
}

Y_UNIT_TEST(SimplifiedMirror3DC) {
TTestEnvOpts options(3);
options.UseMirror3dcErasure = true;
options.DataCenterCount = 3;
TCmsTestEnv env(options);

auto response = env.CheckMaintenanceTaskCreate(
"task-1",
Ydb::StatusIds::SUCCESS,
Ydb::Maintenance::AVAILABILITY_MODE_WEAK,
MakeActionGroup(
MakeLockAction(env.GetNodeId(0), TDuration::Minutes(10))
)
);

UNIT_ASSERT_VALUES_EQUAL(response.action_group_states().size(), 1);
UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(0).action_states().size(), 1);
const auto &a = response.action_group_states(0).action_states(0);
UNIT_ASSERT_VALUES_EQUAL(a.status(), ActionState::ACTION_STATUS_PERFORMED);
}
}

} // namespace NKikimr::NCmsTest
68 changes: 46 additions & 22 deletions ydb/core/cms/cms_ut_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,20 +239,21 @@ class TFakeTenantPool : public TActorBootstrapped<TFakeTenantPool> {

void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseConfig *config,
ui32 pdisks, ui32 vdiskPerPdisk = 4, const TNodeTenantsMap &tenants = {}, bool useMirror3dcErasure = false)
{
{
constexpr ui32 MIRROR_3DC_VDISKS_COUNT = 9;
constexpr ui32 BLOCK_4_2_VDISKS_COUNT = 8;

ui32 numNodes = runtime.GetNodeCount();
ui32 numNodeGroups = pdisks * vdiskPerPdisk;
ui32 vdisksPerNode = pdisks * vdiskPerPdisk;
ui32 numGroups;

if (numNodes < 9)
useMirror3dcErasure = false;

if (useMirror3dcErasure)
numGroups = numNodes * numNodeGroups / 9;
else if (numNodes >= 8)
numGroups = numNodes * numNodeGroups / 8;
numGroups = numNodes * vdisksPerNode / MIRROR_3DC_VDISKS_COUNT;
else if (numNodes >= BLOCK_4_2_VDISKS_COUNT)
numGroups = numNodes * vdisksPerNode / BLOCK_4_2_VDISKS_COUNT;
else
numGroups = numNodes * numNodeGroups;
numGroups = numNodes * vdisksPerNode;

ui32 maxOneGroupVdisksPerNode = useMirror3dcErasure && numNodes < MIRROR_3DC_VDISKS_COUNT ? 3 : 1;

auto now = runtime.GetTimeProvider()->Now();
for (ui32 groupId = 0; groupId < numGroups; ++groupId) {
Expand All @@ -261,7 +262,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
group.SetGroupGeneration(1);
if (useMirror3dcErasure)
group.SetErasureSpecies("mirror-3-dc");
else if (numNodes >= 8)
else if (numNodes >= BLOCK_4_2_VDISKS_COUNT)
group.SetErasureSpecies("block-4-2");
else
group.SetErasureSpecies("none");
Expand All @@ -284,12 +285,18 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
} else {
node.SystemStateInfo.AddRoles("Storage");
}

ui32 groupShift = (nodeIndex / 8) * pdisks * vdiskPerPdisk;
if (numNodes < 8)
groupShift = nodeIndex * numNodeGroups;
if (useMirror3dcErasure)
groupShift = (nodeIndex / 9) * pdisks * vdiskPerPdisk;

ui32 groupsPerNode = vdisksPerNode / maxOneGroupVdisksPerNode;
ui32 groupShift;
if (useMirror3dcErasure) {
ui32 groupNodesSize = MIRROR_3DC_VDISKS_COUNT / maxOneGroupVdisksPerNode;
groupShift = (nodeIndex / groupNodesSize) * groupsPerNode;
} else if (numNodes >= BLOCK_4_2_VDISKS_COUNT) {
ui32 groupNodesSize = BLOCK_4_2_VDISKS_COUNT / maxOneGroupVdisksPerNode;
groupShift = (nodeIndex / groupNodesSize) * groupsPerNode;
} else {
groupShift = nodeIndex * groupsPerNode;
}

for (ui32 pdiskIndex = 0; pdiskIndex < pdisks; ++pdiskIndex) {
auto pdiskId = nodeId * pdisks + pdiskIndex;
Expand All @@ -316,12 +323,28 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC

for (ui8 vdiskIndex = 0; vdiskIndex < vdiskPerPdisk; ++vdiskIndex) {
ui32 vdiskId = pdiskIndex * vdiskPerPdisk + vdiskIndex;
ui32 groupId = groupShift + vdiskId;
ui32 groupId = groupShift + vdiskId / maxOneGroupVdisksPerNode;

if (groupId >= config->GroupSize()) {
break;
}

ui32 failRealm = 0;
if (useMirror3dcErasure)
failRealm = (nodeIndex % 9) / 3;
if (useMirror3dcErasure) {
if (numNodes >= MIRROR_3DC_VDISKS_COUNT) {
failRealm = (nodeIndex % MIRROR_3DC_VDISKS_COUNT) / 3;
} else {
failRealm = nodeIndex % 3;
}
}

TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)(nodeIndex % 8), (ui8)0};
TVDiskID id = {
(ui8)groupId,
1,
(ui8)failRealm,
(ui8)(nodeIndex % BLOCK_4_2_VDISKS_COUNT),
(ui8)(vdiskId % maxOneGroupVdisksPerNode)
};

auto &vdisk = node.VDiskStateInfo[id];
VDiskIDFromVDiskID(id, vdisk.MutableVDiskId());
Expand All @@ -339,7 +362,8 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
vdiskConfig.SetGroupId(groupId);
vdiskConfig.SetGroupGeneration(1);
vdiskConfig.SetFailRealmIdx(failRealm);
vdiskConfig.SetFailDomainIdx(nodeIndex % 8);
vdiskConfig.SetFailDomainIdx(nodeIndex % BLOCK_4_2_VDISKS_COUNT);
vdiskConfig.SetVDiskIdx(vdiskId % maxOneGroupVdisksPerNode);

config->MutableGroup(groupId)->AddVSlotId()
->CopyFrom(vdiskConfig.GetVSlotId());
Expand Down
12 changes: 11 additions & 1 deletion ydb/core/cms/cms_ut_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,14 +411,15 @@ class TCmsTestEnv : public TTestBasicRuntime {
Ydb::Maintenance::MaintenanceTaskResult CheckMaintenanceTaskCreate(
const TString &taskUid,
Ydb::StatusIds::StatusCode code,
Ydb::Maintenance::AvailabilityMode availabilityMode,
const Ts&... actionGroups)
{
auto ev = std::make_unique<NCms::TEvCms::TEvCreateMaintenanceTaskRequest>();
ev->Record.SetUserSID("test-user");

auto *req = ev->Record.MutableRequest();
req->mutable_task_options()->set_task_uid(taskUid);
req->mutable_task_options()->set_availability_mode(Ydb::Maintenance::AVAILABILITY_MODE_STRONG);
req->mutable_task_options()->set_availability_mode(availabilityMode);
AddActionGroups(*req, actionGroups...);

SendToPipe(CmsId, Sender, ev.release(), 0, GetPipeConfigWithRetries());
Expand All @@ -430,6 +431,15 @@ class TCmsTestEnv : public TTestBasicRuntime {
return rec.GetResult();
}

template <typename... Ts>
Ydb::Maintenance::MaintenanceTaskResult CheckMaintenanceTaskCreate(
const TString &taskUid,
Ydb::StatusIds::StatusCode code,
const Ts&... actionGroups)
{
return CheckMaintenanceTaskCreate(taskUid, code, Ydb::Maintenance::AVAILABILITY_MODE_STRONG, actionGroups...);
}

void EnableBSBaseConfig();
void DisableBSBaseConfig();

Expand Down
38 changes: 28 additions & 10 deletions ydb/core/cms/erasure_checkers.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include "erasure_checkers.h"

#include <ydb/core/protos/counters_cms.pb.h>
#include <ydb/core/tablet/tablet_counters.h>

namespace NKikimr::NCms {

bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, TErrorInfo &error) {
Expand Down Expand Up @@ -43,6 +46,10 @@ bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const {
return HasAlreadyLockedDisks;
}

bool TErasureCounterBase::GroupHasMoreThanOneDiskPerNode() const {
return HasMoreThanOneDiskPerNode;
}

static TString DumpVDisksInfo(const THashMap<TVDiskID, TString>& vdisks, TClusterInfoPtr info) {
if (vdisks.empty()) {
return "<empty>";
Expand Down Expand Up @@ -121,11 +128,18 @@ bool TErasureCounterBase::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr in
}

void TErasureCounterBase::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
if (vdId != VDisk.VDiskId)
CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
const auto& group = info->BSGroup(GroupId);

TSet<ui32> groupNodes;
for (const auto &vdId : group.VDisks) {
const auto &vd = info->VDisk(vdId);
if (vd.VDiskId != VDisk.VDiskId)
CountVDisk(vd, info, retryTime, duration, error);
groupNodes.insert(vd.NodeId);
}

HasMoreThanOneDiskPerNode = group.VDisks.size() > groupNodes.size();

if (Locked && error.Code == TStatus::DISALLOW) {
HasAlreadyLockedDisks = true;
}
Expand All @@ -136,10 +150,11 @@ void TErasureCounterBase::CountGroupState(TClusterInfoPtr info, TDuration retryT
bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
TInstant &defaultDeadline, bool allowPartial) const
{
if (HasAlreadyLockedDisks && allowPartial) {
if (HasAlreadyLockedDisks && !HasMoreThanOneDiskPerNode && allowPartial) {
CmsCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
error.Code = TStatus::DISALLOW_TEMP;
error.Reason = "You cannot get two or more disks from the same group at the same time"
" without specifying the PartialPermissionAllowed parameter";
" in partial permissions allowed mode";
error.Deadline = defaultDeadline;
return false;
}
Expand Down Expand Up @@ -170,10 +185,11 @@ bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErr
bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
TInstant &defaultDeadline, bool allowPartial) const
{
if (HasAlreadyLockedDisks && allowPartial) {
if (HasAlreadyLockedDisks && !HasMoreThanOneDiskPerNode && allowPartial) {
CmsCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
error.Code = TStatus::DISALLOW_TEMP;
error.Reason = "You cannot get two or more disks from the same group at the same time"
" without specifying the PartialPermissionAllowed parameter";
" in partial permissions allowed mode";
error.Deadline = defaultDeadline;
return false;
}
Expand Down Expand Up @@ -237,7 +253,9 @@ void TMirror3dcCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTim
++DataCenterDisabledNodes[VDisk.VDiskId.FailRealm];
}

TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo &vdisk, ui32 groupId) {
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es,
const TVDiskInfo &vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
{
switch (es) {
case TErasureType::ErasureNone:
case TErasureType::ErasureMirror3:
Expand All @@ -257,9 +275,9 @@ TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpe
case TErasureType::Erasure2Plus2Block:
case TErasureType::Erasure2Plus2Stripe:
case TErasureType::ErasureMirror3of4:
return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId));
return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId, cmsCounters));
case TErasureType::ErasureMirror3dc:
return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId));
return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId, cmsCounters));
default:
Y_ABORT("Unknown erasure type: %d", es);
}
Expand Down
20 changes: 14 additions & 6 deletions ydb/core/cms/erasure_checkers.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class IErasureCounter {
virtual ~IErasureCounter() = default;

virtual bool GroupAlreadyHasLockedDisks() const = 0;
virtual bool GroupHasMoreThanOneDiskPerNode() const = 0;
virtual bool CheckForMaxAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
virtual bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
virtual void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
Expand All @@ -33,29 +34,35 @@ class TErasureCounterBase: public IErasureCounter {
const TVDiskInfo& VDisk;
const ui32 GroupId;
bool HasAlreadyLockedDisks;
bool HasMoreThanOneDiskPerNode;

TTabletCountersBase* CmsCounters;

protected:
bool IsDown(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TErrorInfo& error);
bool IsLocked(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TDuration& duration, TErrorInfo& error);
bool CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;

public:
TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId)
TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
: VDisk(vdisk)
, GroupId(groupId)
, HasAlreadyLockedDisks(false)
, HasMoreThanOneDiskPerNode(false)
, CmsCounters(cmsCounters)
{
}

bool GroupAlreadyHasLockedDisks() const final;
bool GroupHasMoreThanOneDiskPerNode() const final;
bool CheckForMaxAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const final;
void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
};

class TDefaultErasureCounter: public TErasureCounterBase {
public:
TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId)
: TErasureCounterBase(vdisk, groupId)
TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
: TErasureCounterBase(vdisk, groupId, cmsCounters)
{
}

Expand All @@ -69,15 +76,16 @@ class TMirror3dcCounter: public TErasureCounterBase {
bool CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;

public:
TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId)
: TErasureCounterBase(vdisk, groupId)
TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
: TErasureCounterBase(vdisk, groupId, cmsCounters)
{
}

bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
};

TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo& vdisk, ui32 groupId);
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es,
const TVDiskInfo &vdisk, ui32 groupId, TTabletCountersBase* cmsCounters);

} // namespace NKikimr::NCms
2 changes: 2 additions & 0 deletions ydb/core/protos/counters_cms.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ enum ESimpleCounters {

enum ECumulativeCounters {
COUNTER_CUMULATIVE_IGNORE = 0;

COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED = 1 [(CounterOpts) = {Name: "PartialPermissionsOptimized"}];
}

enum EPercentileCounters {
Expand Down
Loading