Skip to content

Fixed single server upgrade race-condition. #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# ArangoDB Starter Changelog

## Changes from 0.14.5 to 0.14.6

- Fixed race condition in Single Server upgrade

## Changes from 0.14.4 to 0.14.5

- Implement special upgrade procedure for upgrading from 3.4.6
Expand Down
51 changes: 28 additions & 23 deletions service/upgrade_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,16 +265,19 @@ func (m *upgradeManager) StartDatabaseUpgrade(ctx context.Context) error {
// Fetch mode
config, myPeer, mode := m.upgradeManagerContext.ClusterConfig()

if !mode.HasAgency() {
// Run upgrade without agency
go m.runSingleServerUpgradeProcess(ctx, myPeer, mode)
if mode.IsSingleMode() {
// Run upgrade without agency (i.e., SingleServer)

// Create a new context to be independent of ctx
timeoutContext, _ := context.WithTimeout(context.Background(), time.Minute*5)
go m.runSingleServerUpgradeProcess(timeoutContext, myPeer, mode)
return nil
}

// Check cluster health
if mode.IsClusterMode() {
if err := m.isClusterHealthy(ctx); err != nil {
return maskAny(errors.Wrap(err, "Found unhealthy cluster"))
return maskAny(errors.Wrap(err, "Cannot upgrade unhealthy cluster"))
}
}

Expand Down Expand Up @@ -356,7 +359,7 @@ func (m *upgradeManager) StartDatabaseUpgrade(ctx context.Context) error {
}
}

m.log.Info().Msg("Applied special update procedure for 3.4.6")
m.log.Info().Msg("Applied special upgrade procedure for 3.4.6")
}

// Create upgrade plan
Expand Down Expand Up @@ -1163,31 +1166,33 @@ func (m *upgradeManager) finishUpgradePlan(ctx context.Context, plan UpgradePlan

// runSingleServerUpgradeProcess runs the entire upgrade process of a single server until it is finished.
func (m *upgradeManager) runSingleServerUpgradeProcess(ctx context.Context, myPeer *Peer, mode ServiceMode) {
// Unlock when we're done
// Cleanup when we're done
defer func() {
m.upgradeServerType = ""
m.updateNeeded = false
}()

if mode.IsSingleMode() {
// Restart the single server in auto-upgrade mode
m.log.Info().Msg("Upgrading single server")
m.upgradeServerType = ServerTypeSingle
m.updateNeeded = true
if err := m.upgradeManagerContext.RestartServer(ServerTypeSingle); err != nil {
m.log.Error().Err(err).Msg("Failed to restart single server")
return
}
if !mode.IsSingleMode() {
m.log.Info().Msg("Not in Single Server Mode, aborting.")
return
}
// Restart the single server in auto-upgrade mode
m.log.Info().Msg("Upgrading single server")
m.upgradeServerType = ServerTypeSingle
m.updateNeeded = true
if err := m.upgradeManagerContext.RestartServer(ServerTypeSingle); err != nil {
m.log.Error().Err(err).Msg("Failed to restart single server")
return
}

// Wait until single server restarted
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
return
}
// Wait until single server restarted
if err := m.waitUntilUpgradeServerStarted(ctx); err != nil {
return
}

// Wait until all single servers respond
if err := m.waitUntil(ctx, m.areSingleServersResponding, "Single server is not yet responding: %v"); err != nil {
return
}
// Wait until all single servers respond
if err := m.waitUntil(ctx, m.areSingleServersResponding, "Single server is not yet responding: %v"); err != nil {
return
}

// We're done
Expand Down