Skip to content

Commit c9c8784

Browse files
authored
Support migration from conventional config to distconf (#12659)
1 parent ae22fee commit c9c8784

26 files changed

+831
-151
lines changed

ydb/core/blobstorage/nodewarden/distconf.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -442,18 +442,13 @@ namespace NKikimr::NStorage {
442442
}
443443
const auto& bsConfig = config.GetBlobStorageConfig();
444444

445-
if (!bsConfig.HasAutoconfigSettings()) {
445+
if (!bsConfig.HasDefineBox()) {
446446
return;
447447
}
448-
const auto& autoconfigSettings = bsConfig.GetAutoconfigSettings();
449-
450-
if (!autoconfigSettings.HasDefineBox()) {
451-
return;
452-
}
453-
const auto& defineBox = autoconfigSettings.GetDefineBox();
448+
const auto& defineBox = bsConfig.GetDefineBox();
454449

455450
THashMap<ui64, const NKikimrBlobStorage::TDefineHostConfig*> defineHostConfigMap;
456-
for (const auto& defineHostConfig : autoconfigSettings.GetDefineHostConfig()) {
451+
for (const auto& defineHostConfig : bsConfig.GetDefineHostConfig()) {
457452
defineHostConfigMap.emplace(defineHostConfig.GetHostConfigId(), &defineHostConfig);
458453
}
459454

ydb/core/blobstorage/nodewarden/distconf_fsm.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ namespace NKikimr::NStorage {
4646
}
4747

4848
void TDistributedConfigKeeper::SwitchToError(const TString& reason) {
49-
STLOG(PRI_ERROR, BS_NODE, NWDC38, "SwitchToError", (RootState, RootState), (Reason, reason));
49+
STLOG(PRI_NOTICE, BS_NODE, NWDC38, "SwitchToError", (RootState, RootState), (Reason, reason));
5050
if (Scepter) {
5151
UnbecomeRoot();
5252
}
@@ -163,6 +163,11 @@ namespace NKikimr::NStorage {
163163

164164
STLOG(PRI_DEBUG, BS_NODE, NWDC31, "ProcessCollectConfigs", (RootState, RootState), (NodeQuorum, nodeQuorum),
165165
(ConfigQuorum, configQuorum), (Res, *res));
166+
167+
if (nodeQuorum && !configQuorum) {
168+
// check if there is quorum of no-distconf config along the cluster
169+
}
170+
166171
if (!nodeQuorum || !configQuorum) {
167172
return "no quorum for CollectConfigs";
168173
}
@@ -327,14 +332,8 @@ namespace NKikimr::NStorage {
327332
NKikimrBlobStorage::TStorageConfig *configToPropose = nullptr;
328333
std::optional<NKikimrBlobStorage::TStorageConfig> propositionBase;
329334

330-
bool canPropose = false;
331-
if (StorageConfig->HasBlobStorageConfig()) {
332-
if (const auto& bsConfig = StorageConfig->GetBlobStorageConfig(); bsConfig.HasAutoconfigSettings()) {
333-
if (const auto& settings = bsConfig.GetAutoconfigSettings(); settings.HasDefineBox()) {
334-
canPropose = true;
335-
}
336-
}
337-
}
335+
auto& sc = *StorageConfig;
336+
const bool canPropose = sc.HasBlobStorageConfig() && sc.GetBlobStorageConfig().HasDefineBox();
338337

339338
STLOG(PRI_DEBUG, BS_NODE, NWDC59, "ProcessCollectConfigs", (BaseConfig, baseConfig),
340339
(PersistedConfig, persistedConfig), (ProposedConfig, proposedConfig), (CanPropose, canPropose));
@@ -378,12 +377,12 @@ namespace NKikimr::NStorage {
378377
}
379378
UpdateFingerprint(configToPropose);
380379

381-
const bool error = StorageConfig && configToPropose->GetGeneration() <= StorageConfig->GetGeneration();
380+
const bool error = configToPropose->GetGeneration() <= sc.GetGeneration();
382381

383382
STLOG(error ? PRI_ERROR : PRI_INFO, BS_NODE, NWDC60, "ProcessCollectConfigs proposing config",
384383
(ConfigToPropose, *configToPropose),
385384
(PropositionBase, propositionBase),
386-
(StorageConfig, StorageConfig),
385+
(StorageConfig, sc),
387386
(BaseConfig, static_cast<bool>(baseConfig)),
388387
(PersistedConfig, static_cast<bool>(persistedConfig)),
389388
(ProposedConfig, static_cast<bool>(proposedConfig)),

ydb/core/blobstorage/nodewarden/node_warden_impl.cpp

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,9 @@ void TNodeWarden::Bootstrap() {
392392

393393
// fill in a base storage config (from the file)
394394
NKikimrConfig::TAppConfig appConfig;
395+
if (Cfg->DomainsConfig) {
396+
appConfig.MutableDomainsConfig()->CopyFrom(*Cfg->DomainsConfig);
397+
}
395398
appConfig.MutableBlobStorageConfig()->CopyFrom(Cfg->BlobStorageConfig);
396399
appConfig.MutableNameserviceConfig()->CopyFrom(Cfg->NameserviceConfig);
397400
TString errorReason;
@@ -1014,11 +1017,26 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
10141017
const auto& ssFrom = bsFrom.GetServiceSet();
10151018
auto *ssTo = bsTo->MutableServiceSet();
10161019

1017-
ssTo->MutableAvailabilityDomains()->CopyFrom(ssFrom.GetAvailabilityDomains());
1020+
// update availability domains if set
1021+
if (ssFrom.AvailabilityDomainsSize()) {
1022+
ssTo->MutableAvailabilityDomains()->CopyFrom(ssFrom.GetAvailabilityDomains());
1023+
}
1024+
1025+
// replace replication broker configuration
10181026
if (ssFrom.HasReplBrokerConfig()) {
10191027
ssTo->MutableReplBrokerConfig()->CopyFrom(ssFrom.GetReplBrokerConfig());
1028+
} else {
1029+
ssTo->ClearReplBrokerConfig();
10201030
}
1021-
if (!ssTo->PDisksSize() && !ssTo->VDisksSize() && !ssTo->GroupsSize()) {
1031+
1032+
const auto hasStaticGroupInfo = [](const NKikimrBlobStorage::TNodeWardenServiceSet& ss) {
1033+
return ss.PDisksSize() && ss.VDisksSize() && ss.GroupsSize();
1034+
};
1035+
1036+
// update static group information unless distconf is enabled
1037+
if (!hasStaticGroupInfo(ssFrom) && bsFrom.HasAutoconfigSettings()) {
1038+
// distconf enabled, keep it as is
1039+
} else if (!hasStaticGroupInfo(*ssTo)) {
10221040
ssTo->MutablePDisks()->CopyFrom(ssFrom.GetPDisks());
10231041
ssTo->MutableVDisks()->CopyFrom(ssFrom.GetVDisks());
10241042
ssTo->MutableGroups()->CopyFrom(ssFrom.GetGroups());
@@ -1031,16 +1049,16 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
10311049
};
10321050

10331051
auto pdiskKey = [](const auto *item) {
1034-
return TStringBuilder() << "PDisk NodeId# " << item->GetNodeID() << " PDiskId# " << item->GetPDiskID();
1052+
return TStringBuilder() << "PDisk [" << item->GetNodeID() << ':' << item->GetPDiskID() << ']';
10351053
};
10361054

10371055
auto vdiskKey = [](const auto *item) {
1038-
return TStringBuilder() << "VDisk NodeId# " << item->GetNodeID() << " PDiskId# " << item->GetPDiskID()
1039-
<< " VDiskSlotId# " << item->GetVDiskSlotID();
1056+
return TStringBuilder() << "VSlot [" << item->GetNodeID() << ':' << item->GetPDiskID() << ':'
1057+
<< item->GetVDiskSlotID() << ']';
10401058
};
10411059

10421060
auto groupKey = [](const auto *item) {
1043-
return TStringBuilder() << "group GroupId# " << item->GetGroupID();
1061+
return TStringBuilder() << "group " << item->GetGroupID();
10441062
};
10451063

10461064
auto duplicateKey = [&](auto&& key) { return error(std::move(key), "duplicate key in existing StorageConfig"); };
@@ -1064,7 +1082,13 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
10641082
}
10651083
}
10661084
if (!pdiskMap.empty()) {
1067-
*errorReason = "some PDisks were added in newly provided configuration";
1085+
TStringStream err;
1086+
err << "some static PDisks were removed in newly provided configuration:";
1087+
for (const auto& [id, _] : pdiskMap) {
1088+
const auto& [nodeId, pdiskId] = id;
1089+
err << " [" << nodeId << ':' << pdiskId << ']';
1090+
}
1091+
*errorReason = std::move(err.Str());
10681092
return false;
10691093
}
10701094

@@ -1096,7 +1120,13 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
10961120
}
10971121
}
10981122
if (!vdiskMap.empty()) {
1099-
*errorReason = "some VDisks were added in newly provided configuration";
1123+
TStringStream err;
1124+
err << "some static VDisks were removed in newly provided configuration:";
1125+
for (const auto& [id, _] : vdiskMap) {
1126+
const auto& [nodeId, pdiskId, vdiskSlotId] = id;
1127+
err << " [" << nodeId << ':' << pdiskId << ':' << vdiskSlotId << ']';
1128+
}
1129+
*errorReason = std::move(err.Str());
11001130
return false;
11011131
}
11021132

@@ -1116,12 +1146,20 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
11161146
}
11171147
}
11181148
if (!groupMap.empty()) {
1119-
*errorReason = "some groups were added in newly provided configuration";
1149+
*errorReason = "some static groups were removed in newly provided configuration";
11201150
return false;
11211151
}
11221152
}
11231153
}
11241154

1155+
// copy define box
1156+
if (bsFrom.HasDefineBox()) {
1157+
bsTo->MutableDefineBox()->CopyFrom(bsFrom.GetDefineBox());
1158+
} else {
1159+
bsTo->ClearDefineBox();
1160+
}
1161+
bsTo->MutableDefineHostConfig()->CopyFrom(bsFrom.GetDefineHostConfig());
1162+
11251163
// copy nameservice-related things
11261164
if (!appConfig.HasNameserviceConfig()) {
11271165
*errorReason = "origin config missing mandatory NameserviceConfig section";
@@ -1148,7 +1186,47 @@ bool NKikimr::NStorage::DeriveStorageConfig(const NKikimrConfig::TAppConfig& app
11481186
// and copy ClusterUUID from there too
11491187
config->SetClusterUUID(nsFrom.GetClusterUUID());
11501188

1151-
// TODO(alexvru): apply SS, SSB, SB configs from there too
1189+
if (appConfig.HasDomainsConfig()) {
1190+
const auto& domains = appConfig.GetDomainsConfig();
1191+
1192+
// we expect strictly one domain
1193+
if (domains.DomainSize() == 1) {
1194+
const auto& domain = domains.GetDomain(0);
1195+
1196+
auto updateConfig = [&](bool needMerge, auto *to, const auto& from) {
1197+
if (needMerge) {
1198+
char prefix[TActorId::MaxServiceIDLength] = {0};
1199+
auto toInfo = BuildStateStorageInfo(prefix, *to);
1200+
auto fromInfo = BuildStateStorageInfo(prefix, from);
1201+
if (toInfo->NToSelect != fromInfo->NToSelect) {
1202+
*errorReason = "NToSelect differs";
1203+
return false;
1204+
} else if (toInfo->SelectAllReplicas() != fromInfo->SelectAllReplicas()) {
1205+
*errorReason = "StateStorage rings differ";
1206+
return false;
1207+
}
1208+
}
1209+
1210+
to->CopyFrom(from);
1211+
return true;
1212+
};
1213+
1214+
// find state storage setup for that domain
1215+
for (const auto& ss : domains.GetStateStorage()) {
1216+
if (domain.SSIdSize() == 1 && ss.GetSSId() == domain.GetSSId(0)) {
1217+
const bool hadStateStorageConfig = config->HasStateStorageConfig();
1218+
const bool hadStateStorageBoardConfig = config->HasStateStorageBoardConfig();
1219+
const bool hadSchemeBoardConfig = config->HasSchemeBoardConfig();
1220+
if (!updateConfig(hadStateStorageConfig, config->MutableStateStorageConfig(), ss) ||
1221+
!updateConfig(hadStateStorageBoardConfig, config->MutableStateStorageBoardConfig(), ss) ||
1222+
!updateConfig(hadSchemeBoardConfig, config->MutableSchemeBoardConfig(), ss)) {
1223+
return false;
1224+
}
1225+
break;
1226+
}
1227+
}
1228+
}
1229+
}
11521230

11531231
return true;
11541232
}

ydb/core/grpc_services/rpc_bsconfig.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,20 @@ class TReplaceStorageConfigRequest : public TBSConfigRequestGrpc<TReplaceStorage
101101
void FillDistconfResult(NKikimrBlobStorage::TEvNodeConfigInvokeOnRootResult& /*record*/,
102102
Ydb::BSConfig::ReplaceStorageConfigResult& /*result*/)
103103
{}
104+
105+
bool IsDistconfEnableQuery() const {
106+
NKikimrConfig::TAppConfig newConfig;
107+
try {
108+
newConfig = NYaml::Parse(GetProtoRequest()->yaml_config());
109+
} catch (const std::exception&) {
110+
return false; // assuming no distconf enabled in this config
111+
}
112+
if (!newConfig.HasBlobStorageConfig()) {
113+
return false;
114+
}
115+
const NKikimrConfig::TBlobStorageConfig& bsConfig = newConfig.GetBlobStorageConfig();
116+
return bsConfig.HasAutoconfigSettings();
117+
}
104118
};
105119

106120
class TFetchStorageConfigRequest : public TBSConfigRequestGrpc<TFetchStorageConfigRequest, TEvFetchStorageConfigRequest,
@@ -124,6 +138,10 @@ class TFetchStorageConfigRequest : public TBSConfigRequestGrpc<TFetchStorageConf
124138
Ydb::BSConfig::FetchStorageConfigResult& result) {
125139
result.set_yaml_config(record.GetFetchStorageConfig().GetYAML());
126140
}
141+
142+
bool IsDistconfEnableQuery() const {
143+
return false;
144+
}
127145
};
128146

129147
void DoReplaceBSConfig(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {

ydb/core/grpc_services/rpc_bsconfig_base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ class TBSConfigRequestGrpc : public TRpcOperationRequestActor<TDerived, TRequest
178178

179179
void Handle(TEvNodeWardenStorageConfig::TPtr ev) {
180180
auto *self = Self();
181-
if (ev->Get()->Config->GetGeneration()) { // distconf
181+
if (ev->Get()->Config->GetGeneration() || self->IsDistconfEnableQuery()) { // distconf (will be) enabled
182182
auto ev = std::make_unique<NStorage::TEvNodeConfigInvokeOnRoot>();
183183
self->FillDistconfQuery(*ev);
184184
self->Send(MakeBlobStorageNodeWardenID(self->SelfId().NodeId()), ev.release());

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,13 @@ void TBlobStorageController::ApplyStorageConfig() {
230230
auto ev = std::make_unique<TEvBlobStorage::TEvControllerConfigRequest>();
231231
auto& r = ev->Record;
232232
auto *request = r.MutableRequest();
233-
for (const auto& hostConfig : autoconfigSettings.GetDefineHostConfig()) {
233+
for (const auto& hostConfig : bsConfig.GetDefineHostConfig()) {
234234
auto *cmd = request->AddCommand();
235235
cmd->MutableDefineHostConfig()->CopyFrom(hostConfig);
236236
}
237237
auto *cmd = request->AddCommand();
238238
auto *defineBox = cmd->MutableDefineBox();
239-
defineBox->CopyFrom(autoconfigSettings.GetDefineBox());
239+
defineBox->CopyFrom(bsConfig.GetDefineBox());
240240
defineBox->SetBoxId(1);
241241
for (auto& host : *defineBox->MutableHost()) {
242242
const ui32 nodeId = host.GetEnforcedNodeId();

ydb/core/protos/config.proto

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,6 @@ message TBlobStorageConfig {
289289
optional bool AutomaticBoxManagement = 4; // invoke BSC DefineHostConfig/DefineBox automatically (true when unset)
290290
optional bool AutomaticBootstrap = 9; // whether bootstrap should be performed automatically; PROHIBITED for production
291291

292-
// filled in by config parser, not by user; required for automatic static group creation
293-
repeated NKikimrBlobStorage.TDefineHostConfig DefineHostConfig = 5;
294-
optional NKikimrBlobStorage.TDefineBox DefineBox = 6;
295-
296292
// generation of the distconf section; when set, one can automatically apply in-filesystem config
297293
optional uint64 Generation = 8;
298294

@@ -334,6 +330,10 @@ message TBlobStorageConfig {
334330
reserved 7; // TCostMetricsSettings, moved to ICB
335331
optional TVDiskPerformanceSettings VDiskPerformanceSettings = 8;
336332
optional TVDiskBalancingConfig VDiskBalancingConfig = 9;
333+
334+
// filled in by config parser, not by user; required for correct distconf operation
335+
repeated NKikimrBlobStorage.TDefineHostConfig DefineHostConfig = 10;
336+
optional NKikimrBlobStorage.TDefineBox DefineBox = 11;
337337
}
338338

339339
message TBlobStorageFormatConfig {

ydb/library/yaml_config/ut_transform/canondata/test_transform.TestYamlConfigTransformations.test_basic_args0-dump_/block-4-2.yaml.result.json

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,62 @@
651651
[
652652
1
653653
]
654+
},
655+
"DefineHostConfig":
656+
[
657+
{
658+
"HostConfigId":1,
659+
"Drive":
660+
[
661+
{
662+
"Path":"/dev/disk/by-partlabel/ydb_disk_ssd_01",
663+
"Type":"SSD"
664+
},
665+
{
666+
"Path":"/dev/disk/by-partlabel/ydb_disk_ssd_02",
667+
"Type":"SSD"
668+
}
669+
]
670+
}
671+
],
672+
"DefineBox":
673+
{
674+
"BoxId":1,
675+
"Host":
676+
[
677+
{
678+
"HostConfigId":1,
679+
"EnforcedNodeId":1
680+
},
681+
{
682+
"HostConfigId":1,
683+
"EnforcedNodeId":2
684+
},
685+
{
686+
"HostConfigId":1,
687+
"EnforcedNodeId":3
688+
},
689+
{
690+
"HostConfigId":1,
691+
"EnforcedNodeId":4
692+
},
693+
{
694+
"HostConfigId":1,
695+
"EnforcedNodeId":5
696+
},
697+
{
698+
"HostConfigId":1,
699+
"EnforcedNodeId":6
700+
},
701+
{
702+
"HostConfigId":1,
703+
"EnforcedNodeId":7
704+
},
705+
{
706+
"HostConfigId":1,
707+
"EnforcedNodeId":8
708+
}
709+
]
654710
}
655711
},
656712
"ChannelProfileConfig":

0 commit comments

Comments
 (0)