Skip to content

[stable-25-1-1] Distconfig fixes part 2 #16294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ydb/core/base/blobstorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,8 @@ struct TEvBlobStorage {
EvControllerShredResponse = 0x1003162a,
EvControllerFetchConfigRequest = 0x1003162b,
EvControllerFetchConfigResponse = 0x1003162c,
EvControllerDistconfRequest = 0x1003162d,
EvControllerDistconfResponse = 0x1003162e,

// BSC interface result section
EvControllerNodeServiceSetUpdate = 0x10031802,
Expand Down Expand Up @@ -2523,6 +2525,8 @@ struct TEvBlobStorage {
struct TEvControllerShredResponse;
struct TEvControllerFetchConfigRequest;
struct TEvControllerFetchConfigResponse;
struct TEvControllerDistconfRequest;
struct TEvControllerDistconfResponse;

struct TEvMonStreamQuery;
struct TEvMonStreamActorDeathNote;
Expand Down
33 changes: 17 additions & 16 deletions ydb/core/blobstorage/base/blobstorage_console_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ namespace NKikimr {
NKikimrBlobStorage::TEvControllerProposeConfigRequest, EvControllerProposeConfigRequest> {
TEvControllerProposeConfigRequest() = default;

TEvControllerProposeConfigRequest(ui64 configHash, ui64 configVersion) {
TEvControllerProposeConfigRequest(ui64 configHash, ui64 configVersion, bool distconf) {
Record.SetConfigHash(configHash);
Record.SetConfigVersion(configVersion);
Record.SetDistconf(distconf);
}

TString ToString() const override {
Expand All @@ -32,14 +33,8 @@ namespace NKikimr {
NKikimrBlobStorage::TEvControllerConsoleCommitRequest, EvControllerConsoleCommitRequest> {
TEvControllerConsoleCommitRequest() = default;

TEvControllerConsoleCommitRequest(
const TString& yamlConfig,
bool allowUnknownFields = false,
bool bypassMetadataChecks = false) {

TEvControllerConsoleCommitRequest(const TString& yamlConfig) {
Record.SetYAML(yamlConfig);
Record.SetAllowUnknownFields(allowUnknownFields);
Record.SetBypassMetadataChecks(bypassMetadataChecks);
}

TString ToString() const override {
Expand Down Expand Up @@ -82,14 +77,9 @@ namespace NKikimr {
NKikimrBlobStorage::TEvControllerReplaceConfigRequest, EvControllerReplaceConfigRequest> {
TEvControllerReplaceConfigRequest() = default;

TEvControllerReplaceConfigRequest(
std::optional<TString> clusterYaml,
std::optional<TString> storageYaml,
std::optional<bool> switchDedicatedStorageSection,
bool dedicatedConfigMode,
bool allowUnknownFields,
bool bypassMetadataChecks) {

TEvControllerReplaceConfigRequest(std::optional<TString> clusterYaml, std::optional<TString> storageYaml,
std::optional<bool> switchDedicatedStorageSection, bool dedicatedConfigMode, bool allowUnknownFields,
bool bypassMetadataChecks, bool enableConfigV2, bool disableConfigV2) {
if (clusterYaml) {
Record.SetClusterYaml(*clusterYaml);
}
Expand All @@ -102,6 +92,11 @@ namespace NKikimr {
Record.SetDedicatedConfigMode(dedicatedConfigMode);
Record.SetAllowUnknownFields(allowUnknownFields);
Record.SetBypassMetadataChecks(bypassMetadataChecks);
if (enableConfigV2) {
Record.SetSwitchEnableConfigV2(true);
} else if (disableConfigV2) {
Record.SetSwitchEnableConfigV2(false);
}
}

TString ToString() const override {
Expand Down Expand Up @@ -130,4 +125,10 @@ namespace NKikimr {
struct TEvBlobStorage::TEvControllerFetchConfigResponse : TEventPB<TEvControllerFetchConfigResponse,
NKikimrBlobStorage::TEvControllerFetchConfigResponse, EvControllerFetchConfigResponse> {};

struct TEvBlobStorage::TEvControllerDistconfRequest : TEventPB<TEvControllerDistconfRequest,
NKikimrBlobStorage::TEvControllerDistconfRequest, EvControllerDistconfRequest> {};

struct TEvBlobStorage::TEvControllerDistconfResponse : TEventPB<TEvControllerDistconfResponse,
NKikimrBlobStorage::TEvControllerDistconfResponse, EvControllerDistconfResponse> {};

}
9 changes: 7 additions & 2 deletions ydb/core/blobstorage/nodewarden/distconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,13 @@ namespace NKikimr::NStorage {
bool HasQuorum() const;
void ProcessCollectConfigs(TEvGather::TCollectConfigs *res);

using TProcessCollectConfigsResult = std::variant<std::monostate, TString, NKikimrBlobStorage::TStorageConfig>;
TProcessCollectConfigsResult ProcessCollectConfigs(TEvGather::TCollectConfigs *res, const TString *selfAssemblyUUID);
struct TProcessCollectConfigsResult {
std::variant<std::monostate, TString, NKikimrBlobStorage::TStorageConfig> Outcome;
bool IsDistconfDisabledQuorum = false;
};
TProcessCollectConfigsResult ProcessCollectConfigs(TEvGather::TCollectConfigs *res,
std::optional<TStringBuf> selfAssemblyUUID);

std::optional<TString> ProcessProposeStorageConfig(TEvGather::TProposeStorageConfig *res);

struct TExConfigError : yexception {};
Expand Down
19 changes: 14 additions & 5 deletions ydb/core/blobstorage/nodewarden/distconf_console.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,14 @@ namespace NKikimr::NStorage {
return; // still waiting for previous one
}

ProposeRequestInFlight = true;

if (!StorageConfig || !StorageConfig->HasConfigComposite()) {
return; // no config yet
// send empty proposition just to connect to console
auto ev = std::make_unique<TEvBlobStorage::TEvControllerProposeConfigRequest>();
ev->Record.SetDistconf(true);
NTabletPipe::SendData(SelfId(), ConsolePipeId, ev.release(), ++ProposeRequestCookie);
return;
}

Y_ABORT_UNLESS(MainConfigYamlVersion);
Expand All @@ -51,11 +57,14 @@ namespace NKikimr::NStorage {
MainConfigFetchYamlHash, *MainConfigYamlVersion));
ProposedConfigHashVersion.emplace(MainConfigFetchYamlHash, *MainConfigYamlVersion);
NTabletPipe::SendData(SelfId(), ConsolePipeId, new TEvBlobStorage::TEvControllerProposeConfigRequest(
MainConfigFetchYamlHash, *MainConfigYamlVersion), ++ProposeRequestCookie);
ProposeRequestInFlight = true;
MainConfigFetchYamlHash, *MainConfigYamlVersion, true), ++ProposeRequestCookie);
}

void TDistributedConfigKeeper::Handle(TEvBlobStorage::TEvControllerValidateConfigResponse::TPtr ev) {
STLOG(PRI_DEBUG, BS_NODE, NWDC10, "received TEvControllerValidateConfigResponse",
(Sender, ev->Sender), (Cookie, ev->Cookie), (Record, ev->Get()->Record),
(ConsoleConfigValidationQ.size, ConsoleConfigValidationQ.size()));

auto& q = ConsoleConfigValidationQ;
auto pred = [&](const auto& item) {
const auto& [actorId, yaml, cookie] = item;
Expand Down Expand Up @@ -114,7 +123,7 @@ namespace NKikimr::NStorage {
break;

case NKikimrBlobStorage::TEvControllerProposeConfigResponse::ReverseCommit:
Y_DEBUG_ABORT();
// just do nothing, we didn't have the config in distconf, possibly it is being enabled
break;
}
}
Expand Down Expand Up @@ -204,7 +213,7 @@ namespace NKikimr::NStorage {
if (!fetched) { // fill in 'to-be-fetched' version of config with version incremented by one
try {
auto metadata = NYamlConfig::GetMainMetadata(yaml);
metadata.Cluster = metadata.Cluster.value_or("unknown"); // TODO: fix this
metadata.Cluster = metadata.Cluster.value_or(AppData()->ClusterName);
metadata.Version = metadata.Version.value_or(0) + 1;
temp = NYamlConfig::ReplaceMetadata(yaml, metadata);
} catch (const std::exception& ex) {
Expand Down
50 changes: 32 additions & 18 deletions ydb/core/blobstorage/nodewarden/distconf_fsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,17 @@ namespace NKikimr::NStorage {
}

void TDistributedConfigKeeper::ProcessCollectConfigs(TEvGather::TCollectConfigs *res) {
TOverloaded handler{
[&](std::monostate&&) {
auto r = ProcessCollectConfigs(res, std::nullopt);
std::visit(TOverloaded{
[&](std::monostate&) {
STLOG(PRI_DEBUG, BS_NODE, NWDC61, "ProcessCollectConfigs: monostate");
RootState = ERootState::RELAX;
},
[&](TString&& error) {
[&](TString& error) {
STLOG(PRI_DEBUG, BS_NODE, NWDC63, "ProcessCollectConfigs: error", (Error, error));
SwitchToError(error);
},
[&](NKikimrBlobStorage::TStorageConfig&& proposedConfig) {
[&](NKikimrBlobStorage::TStorageConfig& proposedConfig) {
STLOG(PRI_DEBUG, BS_NODE, NWDC64, "ProcessCollectConfigs: proposed new config",
(ProposedConfig, proposedConfig));
TEvScatter task;
Expand All @@ -130,12 +131,11 @@ namespace NKikimr::NStorage {
propose->MutableConfig()->Swap(&proposedConfig);
IssueScatterTask(TActorId(), std::move(task));
}
};
std::visit(handler, ProcessCollectConfigs(res, nullptr));
}, r.Outcome);
}

TDistributedConfigKeeper::TProcessCollectConfigsResult TDistributedConfigKeeper::ProcessCollectConfigs(
TEvGather::TCollectConfigs *res, const TString *selfAssemblyUUID) {
TEvGather::TCollectConfigs *res, std::optional<TStringBuf> selfAssemblyUUID) {
auto generateSuccessful = [&](auto&& callback) {
for (const auto& item : res->GetNodes()) {
for (const auto& node : item.GetNodeIds()) {
Expand Down Expand Up @@ -171,10 +171,24 @@ namespace NKikimr::NStorage {

if (nodeQuorum && !configQuorum) {
// check if there is quorum of no-distconf config along the cluster
auto generateNodesWithoutDistconf = [&](auto&& callback) {
for (const auto& item : res->GetNodes()) {
if (item.GetBaseConfig().GetSelfManagementConfig().GetEnabled()) {
continue;
}
for (const auto& node : item.GetNodeIds()) {
callback(node);
}
}
};
if (HasNodeQuorum(*StorageConfig, generateNodesWithoutDistconf)) {
// yes, distconf is disabled on the majority of the nodes, so we can't do anything about it
return {.IsDistconfDisabledQuorum = true};
}
}

if (!nodeQuorum || !configQuorum) {
return "no quorum for CollectConfigs";
return {"no quorum for CollectConfigs"};
}

// TODO: validate self-assembly UUID
Expand Down Expand Up @@ -223,7 +237,7 @@ namespace NKikimr::NStorage {
(BaseConfigs.size, baseConfigs.size()));
Y_DEBUG_ABORT("Multiple nonintersecting node sets have quorum of BaseConfig");
Halt();
return "Multiple nonintersecting node sets have quorum of BaseConfig";
return {"Multiple nonintersecting node sets have quorum of BaseConfig"};
}
NKikimrBlobStorage::TStorageConfig *baseConfig = nullptr;
for (auto& [meta, info] : baseConfigs) {
Expand Down Expand Up @@ -289,13 +303,13 @@ namespace NKikimr::NStorage {
(Generation, generation), (Configs, configs));
Y_DEBUG_ABORT("Multiple nonintersecting node sets have quorum of persistent config");
Halt();
return "Multiple nonintersecting node sets have quorum of persistent config";
return {"Multiple nonintersecting node sets have quorum of persistent config"};
}
Y_ABORT_UNLESS(configs.size() == 1);
persistedConfig = configs.front();
}
if (maxSeenGeneration && (!persistedConfig || persistedConfig->GetGeneration() < maxSeenGeneration)) {
return "couldn't obtain quorum for configuration that was seen in effect";
return {"couldn't obtain quorum for configuration that was seen in effect"};
}

// let's try to find possibly proposed config, but without a quorum, and try to reconstruct it
Expand All @@ -311,7 +325,7 @@ namespace NKikimr::NStorage {
(PersistentConfig, *persistedConfig), (ProposedConfig, config));
Y_DEBUG_ABORT("persistently proposed config has too big generation");
Halt();
return "persistently proposed config has too big generation";
return {"persistently proposed config has too big generation"};
}
}
if (proposedConfig && (proposedConfig->GetGeneration() != config.GetGeneration() ||
Expand Down Expand Up @@ -361,11 +375,11 @@ namespace NKikimr::NStorage {
if (!CurrentSelfAssemblyUUID) {
CurrentSelfAssemblyUUID.emplace(CreateGuidAsString());
}
selfAssemblyUUID = &CurrentSelfAssemblyUUID.value();
selfAssemblyUUID.emplace(CurrentSelfAssemblyUUID.value());
}
propositionBase.emplace(*baseConfig);
if (auto error = GenerateFirstConfig(baseConfig, *selfAssemblyUUID)) {
return *error;
if (auto error = GenerateFirstConfig(baseConfig, TString(*selfAssemblyUUID))) {
return {*error};
}
configToPropose = baseConfig;
}
Expand All @@ -392,12 +406,12 @@ namespace NKikimr::NStorage {

if (error) {
Y_DEBUG_ABORT("incorrect config proposition");
return "incorrect config proposition";
return {"incorrect config proposition"};
}

if (propositionBase) {
if (auto error = ValidateConfig(*propositionBase)) {
return TStringBuilder() << "failed to propose configuration, base config contains errors: " << *error;
return {TStringBuilder() << "failed to propose configuration, base config contains errors: " << *error};
}
if (auto error = ValidateConfigUpdate(*propositionBase, *configToPropose)) {
Y_FAIL_S("incorrect config proposed: " << *error);
Expand All @@ -408,7 +422,7 @@ namespace NKikimr::NStorage {
}
}

return std::move(*configToPropose);
return {std::move(*configToPropose)};
}

return {};
Expand Down
Loading
Loading