Skip to content

Add node interlace in TScore for group mapper #15951

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions ydb/core/mind/bscontroller/group_layout_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,25 @@ namespace NKikimr::NBsController {
TEntityId RealmGroup;
TEntityId Realm;
TEntityId Domain;
TEntityId Device;

TPDiskLayoutPosition() = default;

TPDiskLayoutPosition(TEntityId realmGroup, TEntityId realm, TEntityId domain)
TPDiskLayoutPosition(TEntityId realmGroup, TEntityId realm, TEntityId domain, TEntityId device)
: RealmGroup(realmGroup)
, Realm(realm)
, Domain(domain)
, Device(device)
{}

TPDiskLayoutPosition(TDomainMapper& mapper, const TNodeLocation& location, TPDiskId pdiskId, const TGroupGeometryInfo& geom) {
TStringStream realmGroup, realm, domain;
TStringStream realmGroup, realm, domain, device;
ui32 deviceLevelEnd = TNodeLocation::TKeys::E::Unit + 1;
const std::pair<int, TStringStream*> levels[] = {
{geom.GetRealmLevelBegin(), &realmGroup},
{Max(geom.GetRealmLevelEnd(), geom.GetDomainLevelBegin()), &realm},
{Max(geom.GetRealmLevelEnd(), geom.GetDomainLevelEnd()), &domain}
{Max(geom.GetRealmLevelEnd(), geom.GetDomainLevelEnd()), &domain},
{Max(geom.GetRealmLevelEnd(), geom.GetDomainLevelEnd(), deviceLevelEnd), &device}
};
auto addLevel = [&](int key, const TString& value) {
for (const auto& [reference, stream] : levels) {
Expand All @@ -102,14 +106,15 @@ namespace NKikimr::NBsController {
RealmGroup = mapper(realmGroup.Str());
Realm = mapper(realm.Str());
Domain = mapper(domain.Str());
Device = mapper(device.Str());
}

TString ToString() const {
return TStringBuilder() << "{" << RealmGroup << "." << Realm << "." << Domain << "}";
return TStringBuilder() << "{" << RealmGroup << "." << Realm << "." << Domain << "." << Device << "}";
}

auto AsTuple() const {
return std::tie(RealmGroup, Realm, Domain);
return std::tie(RealmGroup, Realm, Domain, Device);
}

friend bool operator ==(const TPDiskLayoutPosition& x, const TPDiskLayoutPosition& y) {
Expand All @@ -124,12 +129,13 @@ namespace NKikimr::NBsController {
struct TScore {
ui32 RealmInterlace = 0;
ui32 DomainInterlace = 0;
ui32 DeviceInterlace = 0;
ui32 RealmGroupScatter = 0;
ui32 RealmScatter = 0;
ui32 DomainScatter = 0;

auto AsTuple() const {
return std::make_tuple(RealmInterlace, DomainInterlace, RealmGroupScatter, RealmScatter, DomainScatter);
return std::make_tuple(RealmInterlace, DomainInterlace, DeviceInterlace, RealmGroupScatter, RealmScatter, DomainScatter);
}

bool BetterThan(const TScore& other) const {
Expand All @@ -141,12 +147,13 @@ namespace NKikimr::NBsController {
}

static TScore Max() {
return {::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>()};
return {::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>(), ::Max<ui32>()};
}

TString ToString() const {
return TStringBuilder() << "{RealmInterlace# " << RealmInterlace
<< " DomainInterlace# " << DomainInterlace
<< " DeviceInterlace# " << DeviceInterlace
<< " RealmGroupScatter# " << RealmGroupScatter
<< " RealmScatter# " << RealmScatter
<< " DomainScatter# " << DomainScatter
Expand All @@ -168,6 +175,8 @@ namespace NKikimr::NBsController {
TStackVec<THashMap<TEntityId, ui32>, 32> NumDisksPerDomain;
THashMap<TEntityId, ui32> NumDisksPerDomainTotal;

THashMap<TEntityId, ui32> NumDisksPerDevice;

TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology)
: Topology(topology)
, NumDisksInRealm(Topology.GetTotalFailRealmsNum())
Expand All @@ -187,6 +196,8 @@ namespace NKikimr::NBsController {
NumDisksInDomain[domainIdx] += value;
NumDisksPerDomain[domainIdx][pos.Domain] += value;
NumDisksPerDomainTotal[pos.Domain] += value;

NumDisksPerDevice[pos.Device] += value;
}

void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) {
Expand All @@ -201,12 +212,18 @@ namespace NKikimr::NBsController {
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);

const auto& disksPerRealm = NumDisksPerRealm[vdisk.FailRealm][pos.Realm];
const auto& disksPerDomain = NumDisksPerDomain[domainIdx][pos.Domain];

const ui32 disksOnDevice = NumDisksPerDevice[pos.Device];

return {
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - disksPerRealm,
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - disksPerDomain,
.DeviceInterlace = disksOnDevice,
.RealmGroupScatter = NumDisks - NumDisksPerRealmGroup[pos.RealmGroup],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainScatter = NumDisksInDomain[domainIdx] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - disksPerRealm,
.DomainScatter = NumDisksInDomain[domainIdx] - disksPerDomain,
};
}

Expand Down
6 changes: 3 additions & 3 deletions ydb/core/mind/bscontroller/group_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ namespace NKikimr::NBsController {

static std::pair<TPDiskLayoutPosition, TPDiskLayoutPosition> MakeRange(const TPDiskLayoutPosition& x, TEntityId& scope) {
scope = x.Domain;
return {x, x};
return {{x.RealmGroup, x.Realm, x.Domain, TEntityId::Min()}, {x.RealmGroup, x.Realm, x.Domain, TEntityId::Max()}};
}
};

Expand All @@ -400,7 +400,7 @@ namespace NKikimr::NBsController {

static std::pair<TPDiskLayoutPosition, TPDiskLayoutPosition> MakeRange(const TPDiskLayoutPosition& x, TEntityId& scope) {
scope = x.Realm;
return {{x.RealmGroup, x.Realm, TEntityId::Min()}, {x.RealmGroup, x.Realm, TEntityId::Max()}};
return {{x.RealmGroup, x.Realm, TEntityId::Min(), TEntityId::Min()}, {x.RealmGroup, x.Realm, TEntityId::Max(), TEntityId::Max()}};
}
};

Expand All @@ -410,7 +410,7 @@ namespace NKikimr::NBsController {

static std::pair<TPDiskLayoutPosition, TPDiskLayoutPosition> MakeRange(const TPDiskLayoutPosition& x, TEntityId& scope) {
scope = x.RealmGroup;
return {{x.RealmGroup, TEntityId::Min(), TEntityId::Min()}, {x.RealmGroup, TEntityId::Max(), TEntityId::Max()}};
return {{x.RealmGroup, TEntityId::Min(), TEntityId::Min(), TEntityId::Min()}, {x.RealmGroup, TEntityId::Max(), TEntityId::Max(), TEntityId::Max()}};
}
};

Expand Down
54 changes: 50 additions & 4 deletions ydb/core/mind/bscontroller/group_mapper_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,10 +592,6 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
TestBlock42(1);
}

Y_UNIT_TEST(Block42_2disk) {
TestBlock42(2);
}

Y_UNIT_TEST(Mirror3dc) {
TTestContext context(6, 3, 3, 3, 3);
TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::ErasureMirror3dc));
Expand All @@ -613,6 +609,23 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
context.CheckIfGroupsAreMappedCompact();
}

Y_UNIT_TEST(Mirror3dc3Nodes) {
// Each node has 3 disks.
TTestContext context(
{
{1, 1, 1, 1, 3},
{2, 1, 2, 1, 3},
{3, 1, 3, 1, 3},
}
);

TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::ErasureMirror3dc, 3, 3, 1, 10, 20, 10, 256));
context.PopulateGroupMapper(mapper, 9);

TGroupMapper::TGroupDefinition group;
UNIT_ASSERT_UNEQUAL(0, context.AllocateGroup(mapper, group));
}

Y_UNIT_TEST(NonUniformCluster) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0, body = 0; rack < 12; ++rack) {
Expand All @@ -637,6 +650,39 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
}
}

Y_UNIT_TEST(InterlacedRacksWithoutInterlacedNodes) {
TTestContext context(
{
{1, 1, 1, 1, 1}, // node 1
{1, 1, 2, 2, 1},
{1, 1, 3, 3, 2}, // node 3 has two disks
{1, 1, 4, 4, 1},
{1, 1, 5, 5, 1},
{1, 1, 6, 6, 1},
{1, 1, 2, 7, 1}, // node 7 is in the same rack as node 2
{1, 1, 8, 8, 1},
{1, 1, 3, 9, 1}, // node 9 is in the same rack as node 3
}
);

TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block));
context.PopulateGroupMapper(mapper, 8);

TGroupMapper::TGroupDefinition group;
group.emplace_back(TVector<TVector<TPDiskId>>(8));
auto& g = group[0];

for (int i = 0; i < 8; i++) {
g[i].emplace_back(TPDiskId(i + 1, 1));
}

context.SetGroup(1, group);

TGroupMapper::TGroupDefinition newGroup = context.ReallocateGroup(mapper, 1, {TPDiskId(8, 1)});

UNIT_ASSERT_EQUAL_C(TPDiskId(9, 1), newGroup[0][7][0], context.FormatGroup(newGroup));
}

Y_UNIT_TEST(NonUniformCluster2) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0, body = 0; rack < 12; ++rack) {
Expand Down
Loading