Skip to content

Commit

Permalink
controller/volume: avoid to wait all replica healthy
Browse files Browse the repository at this point in the history
    The case we met is that we have three replicas, one of which is not
    ready. Then the engine state could not be changed to running. Seems
    we have enough replica for the engine running on the above case.

    So we could continue next replica checking if it is not running
    instead skip this round. A healthy should be OK for the engine
    to run.

Signed-off-by: Vicente Cheng <vicente.cheng@suse.com>
  • Loading branch information
Vicente-Cheng committed Jul 27, 2023
1 parent ee3a4c0 commit d7bf2cb
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@ func (c *VolumeController) openVolumeDependentResources(v *longhorn.Volume, e *l
}

replicaAddressMap := map[string]string{}
anyRunningReplica := false
for _, r := range rs {
// Ignore unscheduled replicas
if r.Spec.NodeID == "" {
Expand All @@ -1707,10 +1708,12 @@ func (c *VolumeController) openVolumeDependentResources(v *longhorn.Volume, e *l
if r.Status.CurrentState == longhorn.InstanceStateError {
continue
}
// wait for all potentially healthy replicas become running
if r.Status.CurrentState != longhorn.InstanceStateRunning {
log.WithField("replica", r.Name).Debug("Replica is not running yet, just skip this round reconcile.")
return nil
// wait at least one replica is running
if r.Status.CurrentState == longhorn.InstanceStateRunning {
anyRunningReplica = true
} else {
log.WithField("replica", r.Name).Debug("Replica is not running yet, just continue to check next.")
continue
}
if r.Status.IP == "" {
log.WithField("replica", r.Name).Warn("Replica is running but IP is empty")
Expand All @@ -1726,6 +1729,9 @@ func (c *VolumeController) openVolumeDependentResources(v *longhorn.Volume, e *l
}
replicaAddressMap[r.Name] = imutil.GetURL(r.Status.StorageIP, r.Status.Port)
}
if !anyRunningReplica {
return nil
}
if len(replicaAddressMap) == 0 {
return fmt.Errorf("no healthy or scheduled replica for starting")
}
Expand Down

0 comments on commit d7bf2cb

Please sign in to comment.