Skip to content

Bug fix/scale up error #275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/apis/deployment/v1alpha/member_status_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (l MemberStatusList) SelectMemberToRemove() (MemberStatus, error) {
if len(l) > 0 {
// Try to find a not ready member
for _, m := range l {
if m.Phase == MemberPhaseNone {
if m.Phase == MemberPhaseNone || !m.Conditions.IsTrue(ConditionTypeReady) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do two loops. PhaseNone has higher priority.

return m, nil
}
}
Expand Down
16 changes: 12 additions & 4 deletions pkg/deployment/cluster_scaling_integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,19 @@ func (ci *clusterScalingIntegration) updateClusterServerCount(ctx context.Contex
}
coordinatorCount := spec.Coordinators.GetCount()
dbserverCount := spec.DBServers.GetCount()
if err := arangod.SetNumberOfServers(ctx, c.Connection(), coordinatorCount, dbserverCount); err != nil {
if expectSuccess {
log.Debug().Err(err).Msg("Failed to set number of servers")

ci.lastNumberOfServers.mutex.Lock()
lastNumberOfServers := ci.lastNumberOfServers.NumberOfServers
ci.lastNumberOfServers.mutex.Unlock()

// This is to prevent unneseccary updates that may override some values written by the WebUI (in the case of a update loop)
if coordinatorCount != lastNumberOfServers.GetCoordinators() && dbserverCount != lastNumberOfServers.GetDBServers() {
if err := arangod.SetNumberOfServers(ctx, c.Connection(), coordinatorCount, dbserverCount); err != nil {
if expectSuccess {
log.Debug().Err(err).Msg("Failed to set number of servers")
}
return false, maskAny(err)
}
return false, maskAny(err)
}

// Success, now update internal state
Expand Down
15 changes: 12 additions & 3 deletions pkg/deployment/reconcile/plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,22 @@ func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api
if m, err := members.SelectMemberToRemove(); err != nil {
log.Warn().Err(err).Str("role", group.AsRole()).Msg("Failed to select member to remove")
} else {
if group == api.ServerGroupDBServers {

log.Debug().
Str("member-id", m.ID).
Str("phase", string(m.Phase)).
Msg("Found member to remove")
if m.Conditions.IsTrue(api.ConditionTypeReady) {
if group == api.ServerGroupDBServers {
plan = append(plan,
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),
)
}
plan = append(plan,
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
)
}
plan = append(plan,
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
api.NewAction(api.ActionTypeRemoveMember, group, m.ID),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put this logic into plan execution and keep the plan as it was. Reason: reduce time one can screw things up by killing a running dbserver.

)
log.Debug().
Expand Down
2 changes: 1 addition & 1 deletion pkg/deployment/resources/secret_hashes.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
// ValidateSecretHashes checks the hash of used secrets
// against the stored ones.
// If a hash is different, the deployment is marked
// with a SecretChangedCondition and the operator will no
// with a SecretChangedCondition and the operator will not
// touch it until this is resolved.
func (r *Resources) ValidateSecretHashes() error {
// validate performs a secret hash comparison for a single secret.
Expand Down