Skip to content

Commit

Permalink
feat(setting): add tolerate node disk empty selector volume setting
Browse files Browse the repository at this point in the history
ref: longhorn/longhorn 4826

Signed-off-by: Jack Lin <jack.lin@suse.com>
  • Loading branch information
ChanYiLin committed Aug 1, 2023
1 parent a5b67e5 commit ea666e5
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 42 deletions.
7 changes: 6 additions & 1 deletion controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2304,10 +2304,15 @@ func (c *VolumeController) listReadySchedulableAndScheduledNodes(volume *longhor
return nil, err
}

allowEmptyNodeSelectorVolume, err := c.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume)
if err != nil {
return nil, errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyNodeSelectorVolume)
}

filteredReadyNodes := readyNodes
if len(volume.Spec.NodeSelector) != 0 {
for nodeName, node := range readyNodes {
if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector) {
if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector, allowEmptyNodeSelectorVolume) {
delete(filteredReadyNodes, nodeName)
}
}
Expand Down
67 changes: 46 additions & 21 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod

nodeSoftAntiAffinity, err := rcs.ds.GetSettingAsBool(types.SettingNameReplicaSoftAntiAffinity)
if err != nil {
logrus.Errorf("error getting replica soft anti-affinity setting: %v", err)
err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameReplicaSoftAntiAffinity)
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
}

if volume.Spec.ReplicaSoftAntiAffinity != longhorn.ReplicaSoftAntiAffinityDefault &&
Expand All @@ -157,7 +159,9 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod

zoneSoftAntiAffinity, err := rcs.ds.GetSettingAsBool(types.SettingNameReplicaZoneSoftAntiAffinity)
if err != nil {
logrus.Errorf("Error getting replica zone soft anti-affinity setting: %v", err)
err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameReplicaZoneSoftAntiAffinity)
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
}
if volume.Spec.ReplicaZoneSoftAntiAffinity != longhorn.ReplicaZoneSoftAntiAffinityDefault &&
volume.Spec.ReplicaZoneSoftAntiAffinity != "" {
Expand Down Expand Up @@ -202,14 +206,21 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
return result
}

allowEmptyNodeSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume)
if err != nil {
err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyNodeSelectorVolume)
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
}

unusedNodes := map[string]*longhorn.Node{}
unusedNodesInNewZones := map[string]*longhorn.Node{}
nodesInUnusedZones := map[string]*longhorn.Node{}
nodesWithEvictingReplicas := getNodesWithEvictingReplicas(replicas, nodeInfo)

for nodeName, node := range nodeInfo {
// Filter Nodes. If the Nodes don't match the tags, don't bother marking them as candidates.
if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector) {
if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector, allowEmptyNodeSelectorVolume) {
continue
}
if _, ok := usedNodes[nodeName]; !ok {
Expand Down Expand Up @@ -291,6 +302,11 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk
multiError = util.NewMultiError()
preferredDisks = map[string]*Disk{}

allowEmptyDiskSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyDiskSelectorVolume)
if err != nil {
logrus.Errorf("Failed to get allow empty tag volume setting: %v", err)
}

if len(disks) == 0 {
multiError.Append(util.NewMultiError(longhorn.ErrorReplicaScheduleDiskUnavailable))
return preferredDisks, multiError
Expand Down Expand Up @@ -349,7 +365,7 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk
}

// Check if the Disk's Tags are valid.
if !types.IsSelectorsInTags(diskSpec.Tags, volume.Spec.DiskSelector) {
if !types.IsSelectorsInTags(diskSpec.Tags, volume.Spec.DiskSelector, allowEmptyDiskSelectorVolume) {
multiError.Append(util.NewMultiError(longhorn.ErrorReplicaScheduleTagsNotFulfilled))
continue
}
Expand Down Expand Up @@ -445,7 +461,11 @@ func (rcs *ReplicaScheduler) CheckAndReuseFailedReplica(replicas map[string]*lon
availableNodeDisksMap := map[string]map[string]struct{}{}
reusableNodeReplicasMap := map[string][]*longhorn.Replica{}
for _, r := range replicas {
if !rcs.isFailedReplicaReusable(r, volume, allNodesInfo, hardNodeAffinity) {
isReusable, err := rcs.isFailedReplicaReusable(r, volume, allNodesInfo, hardNodeAffinity)
if err != nil {
return nil, err
}
if !isReusable {
continue
}

Expand Down Expand Up @@ -539,29 +559,34 @@ func (rcs *ReplicaScheduler) RequireNewReplica(replicas map[string]*longhorn.Rep
return lastDegradedAt.Add(waitInterval).Sub(now) + time.Second
}

func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *longhorn.Volume, nodeInfo map[string]*longhorn.Node, hardNodeAffinity string) bool {
func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *longhorn.Volume, nodeInfo map[string]*longhorn.Node, hardNodeAffinity string) (bool, error) {
if r.Spec.FailedAt == "" {
return false
return false, nil
}
if r.Spec.NodeID == "" || r.Spec.DiskID == "" {
return false
return false, nil
}
if r.Spec.RebuildRetryCount >= FailedReplicaMaxRetryCount {
return false
return false, nil
}
if r.Status.EvictionRequested {
return false
return false, nil
}
if hardNodeAffinity != "" && r.Spec.NodeID != hardNodeAffinity {
return false
return false, nil
}
if isReady, _ := rcs.ds.CheckEngineImageReadiness(r.Spec.EngineImage, r.Spec.NodeID); !isReady {
return false
return false, nil
}

allowEmptyDiskSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyDiskSelectorVolume)
if err != nil {
return false, errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyDiskSelectorVolume)
}

node, exists := nodeInfo[r.Spec.NodeID]
if !exists {
return false
return false, nil
}
diskFound := false
for diskName, diskStatus := range node.Status.DiskStatus {
Expand Down Expand Up @@ -590,30 +615,30 @@ func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *lon
diskFound = true
diskSpec, exists := node.Spec.Disks[diskName]
if !exists {
return false
return false, nil
}
if !diskSpec.AllowScheduling || diskSpec.EvictionRequested {
return false
return false, nil
}
if !types.IsSelectorsInTags(diskSpec.Tags, v.Spec.DiskSelector) {
return false
if !types.IsSelectorsInTags(diskSpec.Tags, v.Spec.DiskSelector, allowEmptyDiskSelectorVolume) {
return false, nil
}
}
}
if !diskFound {
return false
return false, nil
}

im, err := rcs.ds.GetInstanceManagerByInstance(r)
if err != nil {
logrus.Errorf("failed to get instance manager when checking replica %v is reusable: %v", r.Name, err)
return false
return false, nil
}
if im.DeletionTimestamp != nil || im.Status.CurrentState != longhorn.InstanceManagerStateRunning {
return false
return false, nil
}

return true
return true, nil
}

// IsPotentiallyReusableReplica is used to check if a failed replica is potentially reusable.
Expand Down
30 changes: 30 additions & 0 deletions types/setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ const (
SettingNameV2DataEngine = SettingName("v2-data-engine")
SettingNameV2DataEngineHugepageLimit = SettingName("v2-data-engine-hugepage-limit")
SettingNameOfflineReplicaRebuilding = SettingName("offline-replica-rebuilding")
SettingNameAllowEmptyNodeSelectorVolume = SettingName("allow-empty-node-selector-volume")
SettingNameAllowEmptyDiskSelectorVolume = SettingName("allow-empty-disk-selector-volume")
)

var (
Expand Down Expand Up @@ -179,6 +181,8 @@ var (
SettingNameV2DataEngine,
SettingNameV2DataEngineHugepageLimit,
SettingNameOfflineReplicaRebuilding,
SettingNameAllowEmptyNodeSelectorVolume,
SettingNameAllowEmptyDiskSelectorVolume,
}
)

Expand Down Expand Up @@ -277,6 +281,8 @@ var (
SettingNameV2DataEngine: SettingDefinitionV2DataEngine,
SettingNameV2DataEngineHugepageLimit: SettingDefinitionV2DataEngineHugepageLimit,
SettingNameOfflineReplicaRebuilding: SettingDefinitionOfflineReplicaRebuilding,
SettingNameAllowEmptyNodeSelectorVolume: SettingDefinitionAllowEmptyNodeSelectorVolume,
SettingNameAllowEmptyDiskSelectorVolume: SettingDefinitionAllowEmptyDiskSelectorVolume,
}

SettingDefinitionBackupTarget = SettingDefinition{
Expand Down Expand Up @@ -1103,6 +1109,26 @@ var (
ReadOnly: true,
Default: "1024",
}

SettingDefinitionAllowEmptyNodeSelectorVolume = SettingDefinition{
DisplayName: "Allow Scheduling Empty Node Selector Volumes To Any Node",
Description: "Allow replica of the volume without node selector to be scheduled on node with tags, default true",
Category: SettingCategoryScheduling,
Type: SettingTypeBool,
Required: true,
ReadOnly: false,
Default: "true",
}

SettingDefinitionAllowEmptyDiskSelectorVolume = SettingDefinition{
DisplayName: "Allow Scheduling Empty Disk Selector Volumes To Any Disk",
Description: "Allow replica of the volume without disk selector to be scheduled on disk with tags, default true",
Category: SettingCategoryScheduling,
Type: SettingTypeBool,
Required: true,
ReadOnly: false,
Default: "true",
}
)

type NodeDownPodDeletionPolicy string
Expand Down Expand Up @@ -1198,6 +1224,10 @@ func ValidateSetting(name, value string) (err error) {
fallthrough
case SettingNameV2DataEngine:
fallthrough
case SettingNameAllowEmptyNodeSelectorVolume:
fallthrough
case SettingNameAllowEmptyDiskSelectorVolume:
fallthrough
case SettingNameAllowCollectingLonghornUsage:
if value != "true" && value != "false" {
return fmt.Errorf("value %v of setting %v should be true or false", value, sName)
Expand Down
8 changes: 7 additions & 1 deletion types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,12 +995,18 @@ func GetLHVolumeAttachmentNameFromVolumeName(volName string) string {

// IsSelectorsInTags checks if all the selectors are present in the tags slice.
// It returns true if all selectors are found, false otherwise.
func IsSelectorsInTags(tags, selectors []string) bool {
func IsSelectorsInTags(tags, selectors []string, allowEmptySelector bool) bool {
if !sort.StringsAreSorted(tags) {
logrus.Debug("BUG: Tags are not sorted, sorting now")
sort.Strings(tags)
}

if len(selectors) == 0 {
if !allowEmptySelector && len(tags) != 0 {
return false
}
}

for _, selector := range selectors {
index := sort.SearchStrings(tags, selector)
// If the selector is not found or the index is out of bounds, return false.
Expand Down
50 changes: 31 additions & 19 deletions types/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,43 +123,55 @@ func (s *TestSuite) TestParseToleration(c *C) {

func (s *TestSuite) TestIsSelectorsInTags(c *C) {
type testCase struct {
inputTags []string
inputSelectors []string
inputTags []string
inputSelectors []string
allowEmptySelector bool

expected bool
}
testCases := map[string]testCase{
"selectors exist": {
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{"aaa", "bbb", "ccc"},
expected: true,
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{"aaa", "bbb", "ccc"},
allowEmptySelector: true,
expected: true,
},
"selectors mis-matched": {
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{"aaa", "b", "ccc"},
expected: false,
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{"aaa", "b", "ccc"},
allowEmptySelector: true,
expected: false,
},
"selectors empty": {
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{},
expected: true,
"selectors empty and tolerate": {
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{},
allowEmptySelector: true,
expected: true,
},
"selectors empty and not tolerate": {
inputTags: []string{"aaa", "bbb", "ccc"},
inputSelectors: []string{},
allowEmptySelector: false,
expected: false,
},
"tags unsorted": {
inputTags: []string{"bbb", "aaa", "ccc"},
inputSelectors: []string{"aaa", "bbb", "ccc"},
expected: true,
inputTags: []string{"bbb", "aaa", "ccc"},
inputSelectors: []string{"aaa", "bbb", "ccc"},
allowEmptySelector: true,
expected: true,
},
"tags empty": {
inputTags: []string{},
inputSelectors: []string{"aaa", "bbb", "ccc"},
expected: false,
inputTags: []string{},
inputSelectors: []string{"aaa", "bbb", "ccc"},
allowEmptySelector: true,
expected: false,
},
}

for testName, testCase := range testCases {
fmt.Printf("testing %v\n", testName)

actual := IsSelectorsInTags(testCase.inputTags, testCase.inputSelectors)
actual := IsSelectorsInTags(testCase.inputTags, testCase.inputSelectors, testCase.allowEmptySelector)
c.Assert(actual, Equals, testCase.expected, Commentf(TestErrResultFmt, testName))
}
}

0 comments on commit ea666e5

Please sign in to comment.