Skip to content

Commit

Permalink
MXS-5022 Cleanup replication setup
Browse files Browse the repository at this point in the history
Replication setup is no longer retried as any failure is likely
final.
  • Loading branch information
ekorh475 committed Jun 12, 2024
1 parent abb607b commit 58bddfc
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 50 deletions.
2 changes: 1 addition & 1 deletion system-test/maxtest/include/maxtest/galera_cluster.hh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class GaleraCluster : public MariaDBCluster
public:
GaleraCluster(mxt::SharedData* shared);

bool start_replication() override;
bool setup_replication() override;

std::string get_srv_cnf_filename(int node) override;

Expand Down
5 changes: 3 additions & 2 deletions system-test/maxtest/include/maxtest/mariadb_nodes.hh
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,12 @@ public:
int stop_slaves();

/**
* Start replication in manner relevant to the cluster.
* Start replication in manner relevant to the cluster. Is called on a blank cluster so needs to
* also generate users.
*
* @return True on success
*/
virtual bool start_replication() = 0;
virtual bool setup_replication() = 0;

/**
* Check if the cluster is replicating or otherwise properly synced. May also attempt light fixes.
Expand Down
2 changes: 1 addition & 1 deletion system-test/maxtest/include/maxtest/replication_cluster.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ReplicationCluster : public MariaDBCluster
public:
ReplicationCluster(SharedData* shared);

bool start_replication() override;
bool setup_replication() override;

/**
* Wait until slaves are up-to-date. Only considers the first gtid-triplet.
Expand Down
2 changes: 1 addition & 1 deletion system-test/maxtest/src/galera_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ GaleraCluster::GaleraCluster(mxt::SharedData* shared)
{
}

bool GaleraCluster::start_replication()
bool GaleraCluster::setup_replication()
{
int local_result = stop_nodes() ? 0 : 1;

Expand Down
29 changes: 2 additions & 27 deletions system-test/maxtest/src/mariadb_nodes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -628,46 +628,21 @@ bool MariaDBCluster::prepare_for_test()
if (reset_servers())
{
log.log_msgf("%s reset. Starting replication.", namec);
start_replication();

int attempts = 0;
bool cluster_ok = false;

while (!cluster_ok && attempts < 10)
{
if (attempts > 0)
{
log.log_msgf("Iteration %i, %s is still broken, waiting.", attempts, namec);
sleep(10);
}
if (check_fix_replication())
{
cluster_ok = true;
}
attempts++;
}

if (cluster_ok)
if (setup_replication())
{
log.log_msgf("%s is replicating/synced.", namec);
rval = prepare_servers_for_test();
}
else
{
log.add_failure("%s is still broken.", namec);
}
}
else
{
logger().add_failure("Server preparation on %s failed.", name().c_str());
log.log_msgf("Failed to reset servers of %s.", name().c_str());
}
}
else
{
rval = true;
}

disconnect();
return rval;
}

Expand Down
38 changes: 21 additions & 17 deletions system-test/maxtest/src/replication_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,32 +83,34 @@ const std::string& ReplicationCluster::type_string() const
return type_mariadb;
}

bool ReplicationCluster::start_replication()
bool ReplicationCluster::setup_replication()
{
const int n = N;
// Generate users on all nodes.
// TODO: most users can be generated just on the master once replication is on.
bool gtids_reset = true;
for (int i = 0; i < n; i++)
{
create_users(i);
}

ping_or_open_admin_connections();

// At this point, the servers have conflicting gtids but identical data. Set gtids manually so
// replication can start.
bool reset_ok = true;
for (int i = 0; i < n; i++)
{
auto conn = backend(i)->admin_connection();
if (!conn->try_cmd("RESET MASTER;") || !conn->try_cmd("SET GLOBAL gtid_slave_pos='0-1-0'"))
if (create_users(i))
{
reset_ok = false;
// The servers now have conflicting gtids but identical data. Set gtids manually so
// replication can start.
auto conn = backend(i)->admin_connection();
if (!conn->try_cmd("RESET MASTER;") || !conn->try_cmd("SET GLOBAL gtid_slave_pos='0-1-0'"))
{
gtids_reset = false;
logger().log_msgf("Gtid reset failed on %s. Cannot setup replication.",
backend(i)->vm_node().name());
}
}
else
{
gtids_reset = false;
}
}

bool rval = false;
if (reset_ok)
if (gtids_reset)
{
bool repl_ok = true;
// Finally, begin replication.
Expand All @@ -118,16 +120,18 @@ bool ReplicationCluster::start_replication()
auto conn = backend(i)->admin_connection();
if (!conn->try_cmd(change_master) || !conn->try_cmd("START SLAVE;"))
{
logger().log_msgf("Failed to start replication on %s. Cannot setup replication.",
backend(i)->vm_node().name());
repl_ok = false;
}
}

if (repl_ok)
if (repl_ok && sync_slaves(0, 5))
{
logger().log_msgf("Replication setup success on %s.", name().c_str());
rval = true;
}
}

return rval;
}

Expand Down
2 changes: 1 addition & 1 deletion system-test/maxtest/src/testconnections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int TestConnections::prepare_for_test(int argc, char* argv[])
if (galera && restart_galera && m_shared.settings.mdbci_test)
{
galera->stop_nodes();
galera->start_replication();
galera->setup_replication();
}

if (m_check_nodes)
Expand Down

0 comments on commit 58bddfc

Please sign in to comment.