Skip to content

Commit

Permalink
Add -r changeclusterkey tool for snapshot restore (apple#7687)
Browse files Browse the repository at this point in the history
* Debug version of fdbserver -r changeclusterkey tool

* Remove debug symbols; move function to Coordinaton.actor.cpp

* Format and add traces

* fix comments
  • Loading branch information
sfc-gh-clin authored Jul 26, 2022
1 parent 98bff11 commit 5045844
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 4 deletions.
12 changes: 12 additions & 0 deletions fdbserver/CoordinatedState.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,15 @@ Future<Void> MovableCoordinatedState::setExclusive(Value v) {
Future<Void> MovableCoordinatedState::move(ClusterConnectionString const& nc) {
return MovableCoordinatedStateImpl::move(impl.get(), nc);
}

Optional<Value> updateCCSInMovableValue(ValueRef movableVal, KeyRef oldClusterKey, KeyRef newClusterKey) {
Optional<Value> result;
MovableValue moveVal = BinaryReader::fromStringRef<MovableValue>(
movableVal, IncludeVersion(ProtocolVersion::withMovableCoordinatedStateV2()));
if (moveVal.other.present() && moveVal.other.get().startsWith(oldClusterKey)) {
TraceEvent(SevDebug, "UpdateCCSInMovableValue").detail("OldConnectionString", moveVal.other.get());
moveVal.other = moveVal.other.get().removePrefix(oldClusterKey).withPrefix(newClusterKey);
result = BinaryWriter::toValue(moveVal, IncludeVersion(ProtocolVersion::withMovableCoordinatedStateV2()));
}
return result;
}
75 changes: 75 additions & 0 deletions fdbserver/Coordination.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,3 +776,78 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder,
throw;
}
}

ACTOR Future<Void> changeClusterDescription(std::string datafolder, KeyRef newClusterKey, KeyRef oldClusterKey) {
state UID myID = deterministicRandom()->randomUniqueID();
state OnDemandStore store(datafolder, myID, "coordination-");
RangeResult res = wait(store->readRange(allKeys));
// Context, in coordinators' kv-store
// cluster description and the random id are always appear together as the clusterKey
// The old cluster key, (call it oldCKey) below can appear in the following scenarios:
// 1. oldCKey is a key in the store: the value is a binary format of _GenerationRegVal_ which contains a different
// clusterKey(either movedFrom or moveTo)
// 2. oldCKey appears in a key for forwarding message:
// 2.1: the prefix is _fwdKeys.begin_: the value is the new connection string
// 2.2: the prefix is _fwdTimeKeys.begin_: the value is the time
// 3. oldCKey does not appear in any keys but in a value:
// 3.1: it's in the value of a forwarding message(see 2.1)
// 3.2: it's inside the value of _GenerationRegVal_ (see 1), which is a cluster connection string.
// it seems that even we do not change it the cluster should still be good, but to be safe we still update it.
for (auto& [key, value] : res) {
if (key.startsWith(fwdKeys.begin)) {
if (key.removePrefix(fwdKeys.begin) == oldClusterKey) {
store->clear(singleKeyRange(key));
store->set(KeyValueRef(newClusterKey.withPrefix(fwdKeys.begin), value));
} else if (value.startsWith(oldClusterKey)) {
store->set(KeyValueRef(key, value.removePrefix(oldClusterKey).withPrefix(newClusterKey)));
}
} else if (key.startsWith(fwdTimeKeys.begin) && key.removePrefix(fwdTimeKeys.begin) == oldClusterKey) {
store->clear(singleKeyRange(key));
store->set(KeyValueRef(newClusterKey.withPrefix(fwdTimeKeys.begin), value));
} else if (key == oldClusterKey) {
store->clear(singleKeyRange(key));
store->set(KeyValueRef(newClusterKey, value));
} else {
// parse the value part
GenerationRegVal regVal = BinaryReader::fromStringRef<GenerationRegVal>(value, IncludeVersion());
if (regVal.val.present()) {
Optional<Value> newVal = updateCCSInMovableValue(regVal.val.get(), oldClusterKey, newClusterKey);
if (newVal.present()) {
regVal.val = newVal.get();
store->set(KeyValueRef(
key, BinaryWriter::toValue(regVal, IncludeVersion(ProtocolVersion::withGenerationRegVal()))));
}
}
}
}
wait(store->commit());
return Void();
}

Future<Void> coordChangeClusterKey(std::string dataFolder, KeyRef newClusterKey, KeyRef oldClusterKey) {
TraceEvent(SevInfo, "CoordChangeClusterKey")
.detail("DataFolder", dataFolder)
.detail("NewClusterKey", newClusterKey)
.detail("OldClusterKey", oldClusterKey);
std::string absDataFolder = abspath(dataFolder);
std::vector<std::string> returnList = platform::listDirectories(absDataFolder);
std::vector<Future<Void>> futures;
for (const auto& dirEntry : returnList) {
if (dirEntry == "." || dirEntry == "..") {
continue;
}
std::string processDir = dataFolder + "/" + dirEntry;
TraceEvent(SevInfo, "UpdatingCoordDataForProcess").detail("ProcessDataDir", processDir);
std::vector<std::string> returnFiles = platform::listFiles(processDir, "");
bool isCoord = false;
for (const auto& fileEntry : returnFiles) {
if (fileEntry.rfind("coordination-", 0) == 0) {
isCoord = true;
}
}
if (!isCoord)
continue;
futures.push_back(changeClusterDescription(processDir, newClusterKey, oldClusterKey));
}
return waitForAll(futures);
}
59 changes: 55 additions & 4 deletions fdbserver/fdbserver.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/pubsub.h"
#include "fdbserver/OnDemandStore.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ArgParseUtil.h"
#include "flow/DeterministicRandom.h"
Expand Down Expand Up @@ -111,7 +112,8 @@ enum {
OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_PRINT_CODE_PROBES, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE,
OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_FAULT_INJECTION, OPT_PROFILER, OPT_PRINT_SIMTIME,
OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT
OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT,
OPT_NEW_CLUSTER_KEY
};

CSimpleOpt::SOption g_rgOptions[] = {
Expand Down Expand Up @@ -205,9 +207,11 @@ CSimpleOpt::SOption g_rgOptions[] = {
{ OPT_FLOW_PROCESS_NAME, "--process-name", SO_REQ_SEP },
{ OPT_FLOW_PROCESS_ENDPOINT, "--process-endpoint", SO_REQ_SEP },
{ OPT_IP_TRUSTED_MASK, "--trusted-subnet-", SO_REQ_SEP },
{ OPT_NEW_CLUSTER_KEY, "--new-cluster-key", SO_REQ_SEP },
{ OPT_KMS_CONN_DISCOVERY_URL_FILE, "--discover-kms-conn-url-file", SO_REQ_SEP},
{ OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS, "--kms-conn-validation-token-details", SO_REQ_SEP},
{ OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, "--kms-conn-get-encryption-keys-endpoint", SO_REQ_SEP},

TLS_OPTION_FLAGS,
SO_END_OF_OPTIONS
};
Expand Down Expand Up @@ -735,6 +739,17 @@ static void printUsage(const char* name, bool devhelp) {
" - FDB_DUMP_STARTKEY: start key for the dump, default is empty\n"
" - FDB_DUMP_ENDKEY: end key for the dump, default is \"\\xff\\xff\"\n"
" - FDB_DUMP_DEBUG: print key-values to stderr in escaped format\n");

printf(
"\n"
"The 'changedescription' role replaces the old cluster key in all coordinators' data file to the specified "
"new cluster key,\n"
"which is passed in by '--new-cluster-key'. In particular, cluster key means '[description]:[id]'.\n"
"'--datadir' is supposed to point to the top level directory of FDB's data, where subdirectories are for "
"each process's data.\n"
"The given cluster file passed in by '-C, --cluster-file' is considered to contain the old cluster key.\n"
"It is used before restoring a snapshotted cluster to let the cluster have a different cluster key.\n"
"Please make sure run it on every host in the cluster with the same '--new-cluster-key'.\n");
} else {
printOptionUsage("--dev-help", "Display developer-specific help and exit.");
}
Expand Down Expand Up @@ -980,10 +995,12 @@ void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::stri

namespace {
enum class ServerRole {
ChangeClusterKey,
ConsistencyCheck,
CreateTemplateDatabase,
DSLTest,
FDBD,
FlowProcess,
KVFileGenerateIOLogChecksums,
KVFileIntegrityCheck,
KVFileDump,
Expand All @@ -996,13 +1013,12 @@ enum class ServerRole {
SkipListTest,
Test,
VersionedMapTest,
UnitTests,
FlowProcess
UnitTests
};
struct CLIOptions {
std::string commandLine;
std::string fileSystemPath, dataFolder, connFile, seedConnFile, seedConnString, logFolder = ".", metricsConnFile,
metricsPrefix;
metricsPrefix, newClusterKey;
std::string logGroup = "default";
uint64_t rollsize = TRACE_DEFAULT_ROLL_SIZE;
uint64_t maxLogsSize = TRACE_DEFAULT_MAX_LOGS_SIZE;
Expand Down Expand Up @@ -1250,6 +1266,8 @@ struct CLIOptions {
role = ServerRole::UnitTests;
else if (!strcmp(sRole, "flowprocess"))
role = ServerRole::FlowProcess;
else if (!strcmp(sRole, "changeclusterkey"))
role = ServerRole::ChangeClusterKey;
else {
fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole);
printHelpTeaser(argv[0]);
Expand Down Expand Up @@ -1653,6 +1671,19 @@ struct CLIOptions {
knobs.emplace_back("rest_kms_connector_get_encryption_keys_endpoint", args.OptionArg());
break;
}
case OPT_NEW_CLUSTER_KEY: {
newClusterKey = args.OptionArg();
try {
ClusterConnectionString ccs;
// make sure the new cluster key is in valid format
ccs.parseKey(newClusterKey);
} catch (Error& e) {
std::cerr << "Invalid cluster key(description:id) '" << newClusterKey << "' from --new-cluster-key"
<< std::endl;
flushAndExit(FDB_EXIT_ERROR);
}
break;
}
}
}

Expand Down Expand Up @@ -1748,6 +1779,21 @@ struct CLIOptions {
flushAndExit(FDB_EXIT_ERROR);
}

if (role == ServerRole::ChangeClusterKey) {
bool error = false;
if (!newClusterKey.size()) {
fprintf(stderr, "ERROR: please specify --new-cluster-key\n");
error = true;
} else if (connectionFile->getConnectionString().clusterKey() == newClusterKey) {
fprintf(stderr, "ERROR: the new cluster key is the same as the old one\n");
error = true;
}
if (error) {
printHelpTeaser(argv[0]);
flushAndExit(FDB_EXIT_ERROR);
}
}

// Interpret legacy "maxLogs" option in the most sensible and unsurprising way we can while eliminating its code
// path
if (maxLogsSet) {
Expand Down Expand Up @@ -2272,6 +2318,11 @@ int main(int argc, char* argv[]) {
} else if (role == ServerRole::KVFileDump) {
f = stopAfter(KVFileDump(opts.kvFile));
g_network->run();
} else if (role == ServerRole::ChangeClusterKey) {
Key newClusterKey(opts.newClusterKey);
Key oldClusterKey = opts.connectionFile->getConnectionString().clusterKey();
f = stopAfter(coordChangeClusterKey(opts.dataFolder, newClusterKey, oldClusterKey));
g_network->run();
}

int rc = FDB_EXIT_SUCCESS;
Expand Down
5 changes: 5 additions & 0 deletions fdbserver/include/fdbserver/CoordinationInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,4 +236,9 @@ Future<Void> coordinationServer(std::string const& dataFolder,
Reference<ConfigNode> const&,
ConfigBroadcastInterface const&);

// Read a value of MovableValue and if old cluster key presents in it, update to the new key
Optional<Value> updateCCSInMovableValue(ValueRef movableVal, KeyRef oldClusterKey, KeyRef newClusterKey);

Future<Void> coordChangeClusterKey(std::string dataFolder, KeyRef newClusterKey, KeyRef oldClusterKey);

#endif

0 comments on commit 5045844

Please sign in to comment.