Skip to content

Commit c268d06

Browse files
authored
[Bugfix] Disable maintenance before healthcheck (#299)
1 parent bf907ed commit c268d06

File tree

1 file changed

+42
-35
lines changed

1 file changed

+42
-35
lines changed

service/upgrade_manager.go

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,27 +1006,20 @@ func (m *upgradeManager) processUpgradePlan(ctx context.Context, plan UpgradePla
10061006
return recordFailure(errors.Wrap(err, "Failed to restart dbserver"))
10071007
}
10081008

1009-
m.log.Info().Msg("Disabling supervision")
1010-
if err := m.disableSupervision(ctx); err != nil {
1011-
return recordFailure(errors.Wrap(err, "Failed to disable supervision"))
1012-
}
1013-
m.log.Info().Msg("Disabled supervision")
1014-
1015-
defer func() {
1016-
m.log.Info().Msg("Enabling supervision")
1017-
if err := m.enableSupervision(ctx); err != nil {
1018-
recordFailure(errors.Wrap(err, "Failed to enable supervision"))
1009+
if err := m.withMaintenance(ctx, recordFailure)(func() error {
1010+
// Wait until dbserver restarted
1011+
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
1012+
return recordFailure(errors.Wrap(err, "DBServer restart in upgrade mode did not succeed"))
10191013
}
1020-
}()
10211014

1022-
// Wait until dbserver restarted
1023-
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
1024-
return recordFailure(errors.Wrap(err, "DBServer restart in upgrade mode did not succeed"))
1025-
}
1015+
// Wait until all dbservers respond
1016+
if err := m.waitUntil(ctx, m.areDBServersResponding, "DBServers are not yet all responding: %v"); err != nil {
1017+
return recordFailure(errors.Wrap(err, "Not all DBServers are responding in time"))
1018+
}
10261019

1027-
// Wait until all dbservers respond
1028-
if err := m.waitUntil(ctx, m.areDBServersResponding, "DBServers are not yet all responding: %v"); err != nil {
1029-
return recordFailure(errors.Wrap(err, "Not all DBServers are responding in time"))
1020+
return nil
1021+
}); err != nil {
1022+
return err
10301023
}
10311024

10321025
// Wait until cluster healthy
@@ -1074,28 +1067,22 @@ func (m *upgradeManager) processUpgradePlan(ctx context.Context, plan UpgradePla
10741067
return recordFailure(errors.Wrap(err, "Failed to restart single server"))
10751068
}
10761069

1077-
m.log.Info().Msg("Disabling supervision")
1078-
if err := m.disableSupervision(ctx); err != nil {
1079-
return recordFailure(errors.Wrap(err, "Failed to disable supervision"))
1080-
}
1070+
if err := m.withMaintenance(ctx, recordFailure)(func() error {
1071+
// Wait until single server restarted
1072+
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
1073+
return recordFailure(errors.Wrap(err, "Single server restart in upgrade mode did not succeed"))
1074+
}
10811075

1082-
m.log.Info().Msg("Disabled supervision")
1083-
defer func() {
1084-
m.log.Info().Msg("Enabling supervision")
1085-
if err := m.enableSupervision(ctx); err != nil {
1086-
recordFailure(errors.Wrap(err, "Failed to enable supervision"))
1076+
// Wait until all single servers respond
1077+
if err := m.waitUntil(ctx, m.areSingleServersResponding, "Active failover single server is not yet responding: %v"); err != nil {
1078+
return recordFailure(errors.Wrap(err, "Not all single servers are responding in time"))
10871079
}
1088-
}()
10891080

1090-
// Wait until single server restarted
1091-
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
1092-
return recordFailure(errors.Wrap(err, "Single server restart in upgrade mode did not succeed"))
1081+
return nil
1082+
}); err != nil {
1083+
return err
10931084
}
10941085

1095-
// Wait until all single servers respond
1096-
if err := m.waitUntil(ctx, m.areSingleServersResponding, "Active failover single server is not yet responding: %v"); err != nil {
1097-
return recordFailure(errors.Wrap(err, "Not all single servers are responding in time"))
1098-
}
10991086
return nil
11001087
}
11011088
if err := upgrade(); err != nil {
@@ -1160,6 +1147,26 @@ func (m *upgradeManager) processUpgradePlan(ctx context.Context, plan UpgradePla
11601147
return nil
11611148
}
11621149

1150+
// withMaintenance wraps upgrade action with maintenance steps
1151+
func (m *upgradeManager) withMaintenance(ctx context.Context, recordFailure func(err error) error) func(func() error) error {
1152+
return func(f func() error) error {
1153+
m.log.Info().Msg("Disabling supervision")
1154+
if err := m.disableSupervision(ctx); err != nil {
1155+
return recordFailure(errors.Wrap(err, "Failed to disable supervision"))
1156+
}
1157+
1158+
m.log.Info().Msg("Disabled supervision")
1159+
defer func() {
1160+
m.log.Info().Msg("Enabling supervision")
1161+
if err := m.enableSupervision(ctx); err != nil {
1162+
recordFailure(errors.Wrap(err, "Failed to enable supervision"))
1163+
}
1164+
}()
1165+
1166+
return f()
1167+
}
1168+
}
1169+
11631170
// finishUpgradePlan is called at the end of the upgrade process.
11641171
// It shows the user that everything is ready & what versions we have now.
11651172
func (m *upgradeManager) finishUpgradePlan(ctx context.Context, plan UpgradePlan) error {

0 commit comments

Comments
 (0)