Skip to content

Commit

Permalink
fix(schedule): not schedule replicas to a disk
Browse files Browse the repository at this point in the history
if the file system format of the disk is `ext4` and the volume size
of this replica is bigger than 16TiB, it will not schedule replicas
to this disk.

Ref: 7423

Signed-off-by: James Lu <james.lu@suse.com>
  • Loading branch information
mantissahz authored and David Ko committed Dec 28, 2023
1 parent baa097e commit a09d8e5
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 4 deletions.
3 changes: 3 additions & 0 deletions controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ const (

TestCustomResourceDefinitionName = "test-crd"
TestVolumeAttachmentName = "test-volume"

TestDiskPathFSType = "ext4"
)

var (
Expand Down Expand Up @@ -539,6 +541,7 @@ func newNode(name, namespace string, allowScheduling bool, status longhorn.Condi
},
DiskUUID: TestDiskID1,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down
12 changes: 12 additions & 0 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,7 @@ func (nc *NodeController) syncDiskStatus(node *longhorn.Node, collectedDataInfo

for _, diskInfoMap := range readyDiskInfoMap {
nc.updateReadyDiskStatusReadyCondition(node, diskInfoMap)
nc.updateReadyDiskStatusFileSystemType(node, diskInfoMap)
}

return nc.updateDiskStatusSchedulableCondition(node)
Expand Down Expand Up @@ -842,6 +843,17 @@ func (nc *NodeController) updateReadyDiskStatusReadyCondition(node *longhorn.Nod
}
}

func (nc *NodeController) updateReadyDiskStatusFileSystemType(node *longhorn.Node, diskInfoMap map[string]*monitor.CollectedDiskInfo) {
diskStatusMap := node.Status.DiskStatus
for diskName, info := range diskInfoMap {
diskStatus := diskStatusMap[diskName]
if diskStatus.DiskUUID == info.DiskUUID && diskStatus.Type == longhorn.DiskTypeFilesystem {
diskStatus.FSType = info.DiskStat.Type
}
diskStatusMap[diskName] = diskStatus
}
}

func (nc *NodeController) updateDiskStatusSchedulableCondition(node *longhorn.Node) error {
log := getLoggerForNode(nc.logger, node)

Expand Down
23 changes: 19 additions & 4 deletions controller/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
StorageScheduled: 0,
StorageAvailable: 0,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}
node2 := newNode(TestNode2, TestNamespace, true, longhorn.ConditionStatusUnknown, "")
Expand All @@ -482,7 +483,8 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.DiskConditionTypeSchedulable, longhorn.ConditionStatusUnknown, ""),
},
Type: longhorn.DiskTypeFilesystem,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand Down Expand Up @@ -555,6 +557,7 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
},
DiskUUID: TestDiskID1,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand All @@ -570,7 +573,8 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.DiskConditionTypeSchedulable, longhorn.ConditionStatusUnknown, ""),
},
Type: longhorn.DiskTypeFilesystem,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down Expand Up @@ -627,6 +631,7 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
StorageScheduled: 0,
StorageAvailable: 0,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand Down Expand Up @@ -691,6 +696,7 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
ScheduledReplica: map[string]int64{},
DiskUUID: TestDiskID1,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand All @@ -704,6 +710,7 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
StorageScheduled: 0,
StorageAvailable: 0,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down Expand Up @@ -757,6 +764,7 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
},
DiskUUID: "new-uuid",
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand All @@ -766,6 +774,7 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
StorageScheduled: 0,
StorageAvailable: 0,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand Down Expand Up @@ -830,6 +839,7 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
ScheduledReplica: map[string]int64{},
DiskUUID: "new-uuid",
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand All @@ -843,6 +853,7 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
StorageScheduled: 0,
StorageAvailable: 0,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down Expand Up @@ -884,7 +895,8 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.DiskConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
},
Type: longhorn.DiskTypeFilesystem,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand Down Expand Up @@ -948,6 +960,7 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) {
ScheduledReplica: map[string]int64{},
DiskUUID: TestDiskID1,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down Expand Up @@ -1000,7 +1013,8 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.DiskConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
},
Type: longhorn.DiskTypeFilesystem,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
}

Expand Down Expand Up @@ -1085,6 +1099,7 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) {
ScheduledReplica: map[string]int64{},
DiskUUID: TestDiskID1,
Type: longhorn.DiskTypeFilesystem,
FSType: TestDiskPathFSType,
},
},
},
Expand Down
8 changes: 8 additions & 0 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package scheduler

import (
"fmt"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -320,6 +321,13 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk
continue
}

volumeSize := volume.Spec.Size
// unix.Statfs can not differentiate the ext2/ext3/ext4 file systems.
if (strings.HasPrefix(diskStatus.FSType, "ext") && volumeSize >= util.MaxExt4VolumeSize) || (diskStatus.FSType == "xfs" && volumeSize >= util.MaxXfsVolumeSize) {
logrus.Debugf("Volume %v size %v is not compatible with the file system %v of the disk %v", volume.Name, volume.Spec.Size, diskStatus.Type, diskName)
continue
}

if requireSchedulingCheck {
info, err := rcs.GetDiskSchedulingInfo(diskSpec, diskStatus)
if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ import (
)

const (
KiB = 1024
MiB = 1024 * KiB
GiB = 1024 * MiB
TiB = 1024 * GiB
PiB = 1024 * TiB
EiB = 1024 * PiB

VolumeStackPrefix = "volume-"
ControllerServiceName = "controller"
ReplicaServiceName = "replica"
Expand All @@ -67,6 +74,9 @@ const (
SizeAlignment = 2 * 1024 * 1024
MinimalVolumeSize = 10 * 1024 * 1024

MaxExt4VolumeSize = 16 * TiB
MaxXfsVolumeSize = 8*EiB - 1

RandomIDLenth = 8

DeterministicUUIDNamespace = "08958d54-65cd-4d87-8627-9831a1eab170" // Arbitrarily generated.
Expand Down

0 comments on commit a09d8e5

Please sign in to comment.