Skip to content

Commit

Permalink
fix(manager): add detailed message when deleting pdb is not allowed
Browse files Browse the repository at this point in the history
Longhorn 9183

Signed-off-by: Jian Wang <jian.wang@suse.com>
  • Loading branch information
w13915984028 committed Aug 5, 2024
1 parent 1286504 commit 1c374c0
Showing 1 changed file with 18 additions and 16 deletions.
34 changes: 18 additions & 16 deletions controller/instance_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -814,12 +814,13 @@ func (imc *InstanceManagerController) syncInstanceManagerPDB(im *longhorn.Instan
return nil
}

canDeletePDB, err := imc.canDeleteInstanceManagerPDB(im)
canDeletePDB, msg, err := imc.canDeleteInstanceManagerPDB(im)
if err != nil {
return err
}

if !canDeletePDB {
imc.logger.Infof("Node %v is marked unschedulable but removing %v PDB is blocked: %s ", im.Name, imc.controllerID, msg)
return nil
}

Expand All @@ -842,12 +843,13 @@ func (imc *InstanceManagerController) syncInstanceManagerPDB(im *longhorn.Instan
}

if clusterAutoscalerEnabled {
canDeletePDB, err := imc.canDeleteInstanceManagerPDB(im)
canDeletePDB, msg, err := imc.canDeleteInstanceManagerPDB(im)
if err != nil {
return err
}

if !canDeletePDB {
imc.logger.Infof("Node %v is marked unschedulable but removing %v PDB get message: %s. Autoscaler skips it", im.Name, imc.controllerID, msg)
if imPDB == nil {
return imc.createInstanceManagerPDB(im)
}
Expand Down Expand Up @@ -920,52 +922,52 @@ func (imc *InstanceManagerController) deleteInstanceManagerPDB(im *longhorn.Inst
return nil
}

func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.InstanceManager) (bool, error) {
func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.InstanceManager) (bool, string, error) {
// If there is no engine instance process inside the engine instance manager,
// it means that all volumes are detached.
// We can delete the PodDisruptionBudget for the engine instance manager.
if im.Spec.Type == longhorn.InstanceManagerTypeEngine {
if len(im.Status.InstanceEngines)+len(im.Status.Instances) == 0 { // nolint: staticcheck
return true, nil
return true, "", nil
}
return false, nil
return false, "some instances are still running", nil
}

// Make sure that the instance manager is of type replica
if im.Spec.Type != longhorn.InstanceManagerTypeReplica && im.Spec.Type != longhorn.InstanceManagerTypeAllInOne {
return false, fmt.Errorf("the instance manager %v has invalid type: %v ", im.Name, im.Spec.Type)
return false, "", fmt.Errorf("the instance manager %v has invalid type: %v ", im.Name, im.Spec.Type)
}

// Must wait for all volumes detached from the current node first.
// This also means that we must wait until the PDB of engine instance manager
// on the current node is deleted
allVolumeDetached, err := imc.areAllVolumesDetachedFromNode(im.Spec.NodeID)
if err != nil {
return false, err
return false, "", err
}
if !allVolumeDetached {
return false, nil
return false, "some volumes are still attached", nil
}

nodeDrainingPolicy, err := imc.ds.GetSettingValueExisted(types.SettingNameNodeDrainPolicy)
if err != nil {
return false, err
return false, "", err
}
if nodeDrainingPolicy == string(types.NodeDrainPolicyAlwaysAllow) {
return true, nil
return true, "", nil
}

replicasOnCurrentNode, err := imc.ds.ListReplicasByNodeRO(im.Spec.NodeID)
if err != nil {
if datastore.ErrorIsNotFound(err) {
return true, nil
return true, "", nil
}
return false, err
return false, "", err
}

if nodeDrainingPolicy == string(types.NodeDrainPolicyBlockForEviction) && len(replicasOnCurrentNode) > 0 {
// We must wait for ALL replicas to be evicted before removing the PDB.
return false, nil
return false, "some replicas block eviction", nil
}

targetReplicas := []*longhorn.Replica{}
Expand All @@ -987,7 +989,7 @@ func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.I

pdbProtectedHealthyReplicas, err := imc.ds.ListVolumePDBProtectedHealthyReplicasRO(replica.Spec.VolumeName)
if err != nil {
return false, err
return false, "", err
}
for _, pdbProtectedHealthyReplica := range pdbProtectedHealthyReplicas {
if pdbProtectedHealthyReplica.Spec.NodeID != im.Spec.NodeID {
Expand All @@ -1005,11 +1007,11 @@ func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.I
replica.Spec.NodeID == im.Spec.NodeID

if !hasPDBOnAnotherNode && !isUnusedReplicaOnCurrentNode {
return false, nil
return false, "no pdb on another node, wait", nil
}
}

return true, nil
return true, "", nil
}

func (imc *InstanceManagerController) areAllVolumesDetachedFromNode(nodeName string) (bool, error) {
Expand Down

0 comments on commit 1c374c0

Please sign in to comment.