Skip to content

Commit

Permalink
UPSTREAM: 1919: Add reserved-volume-attachments
Browse files Browse the repository at this point in the history
Pick both commits in kubernetes-sigs#1919 to OCP.

Introduce --reserved-volume-attachments cmd line option that disables
loading list of volume attachments from instance metadata.

Instance metadata has volume attachments that were present on instance
boot, which may be different than the current nr. of attachments. In
addition, it may include CSI volumes that were not drained before reboot,
and thus the value may be completely wrong.

--reserved-volume-attachments simply reserves given nr. of attachments for
any system disks. Typically it should be 1 for the root volume.
  • Loading branch information
jsafrane committed Apr 29, 2024
1 parent 2e2e277 commit f6e6350
Show file tree
Hide file tree
Showing 9 changed files with 376 additions and 175 deletions.
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func main() {
driver.WithExtraVolumeTags(options.ControllerOptions.ExtraVolumeTags),
driver.WithMode(options.DriverMode),
driver.WithVolumeAttachLimit(options.NodeOptions.VolumeAttachLimit),
driver.WithReservedVolumeAttachments(options.NodeOptions.ReservedVolumeAttachments),
driver.WithKubernetesClusterID(options.ControllerOptions.KubernetesClusterID),
driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog),
driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag),
Expand Down
8 changes: 8 additions & 0 deletions cmd/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ func GetOptions(fs *flag.FlagSet) *Options {
klog.ErrorS(err, "failed to validate and apply logging configuration")
}

if mode != driver.ControllerMode {
// nodeOptions must have been populated from the cmdline, validate them.
if err := nodeOptions.Validate(); err != nil {
klog.Error(err.Error())
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
}

if *version {
versionInfo, err := driver.GetVersionJSON()
if err != nil {
Expand Down
19 changes: 18 additions & 1 deletion cmd/options/node_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package options

import (
"fmt"

flag "github.com/spf13/pflag"
)

Expand All @@ -31,8 +33,23 @@ type NodeOptions struct {
// itself (dynamically discovering the maximum number of attachable volume per EC2 machine type, see also
// https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/347).
VolumeAttachLimit int64

// ReservedVolumeAttachments specifies number of volume attachments reserved for system use.
// Typically 1 for the root disk, but may be larger when more system disks are attached to nodes.
// This option is not used when --volume-attach-limit is specified.
// When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot
// and may include not only system disks but also CSI volumes (and therefore it may be wrong).
ReservedVolumeAttachments int
}

func (o *NodeOptions) AddFlags(fs *flag.FlagSet) {
fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes. If not specified, the value is approximated from the instance type.")
fs.Int64Var(&o.VolumeAttachLimit, "volume-attach-limit", -1, "Value for the maximum number of volumes attachable per node. If specified, the limit applies to all nodes and overrides --reserved-volume-attachments. If not specified, the value is approximated from the instance type.")
fs.IntVar(&o.ReservedVolumeAttachments, "reserved-volume-attachments", -1, "Number of volume attachments reserved for system use. Not used when --volume-attach-limit is specified. The total amount of volume attachments for a node is computed as: <nr. of attachments for corresponding instance type> - <number of NICs, if relevant to the instance type> - <reserved-volume-attachments value>. When -1, the amount of reserved attachments is loaded from instance metadata that captured state at node boot and may include not only system disks but also CSI volumes.")
}

func (o *NodeOptions) Validate() error {
if o.VolumeAttachLimit != -1 && o.ReservedVolumeAttachments != -1 {
return fmt.Errorf("only one of --volume-attach-limit and --reserved-volume-attachments may be specified")
}
return nil
}
53 changes: 53 additions & 0 deletions cmd/options/node_options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,56 @@ func TestNodeOptions(t *testing.T) {
})
}
}

func TestValidate(t *testing.T) {
testCases := []struct {
name string
options *NodeOptions
expectError bool
}{
{
name: "valid VolumeAttachLimit",
options: &NodeOptions{
VolumeAttachLimit: 42,
ReservedVolumeAttachments: -1,
},
expectError: false,
},
{
name: "valid ReservedVolumeAttachments",
options: &NodeOptions{
VolumeAttachLimit: -1,
ReservedVolumeAttachments: 42,
},
expectError: false,
},
{
name: "default options",
options: &NodeOptions{
VolumeAttachLimit: -1,
ReservedVolumeAttachments: -1,
},
expectError: false,
},
{
name: "both options set",
options: &NodeOptions{
VolumeAttachLimit: 1,
ReservedVolumeAttachments: 1,
},
expectError: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := tc.options.Validate()
if tc.expectError && err == nil {
t.Errorf("Expected error, got nil")
}
if !tc.expectError && err != nil {
t.Errorf("Unexpected error: %v", err)
}
})
}
}
44 changes: 44 additions & 0 deletions cmd/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"testing"

flag "github.com/spf13/pflag"
"k8s.io/klog/v2"

"github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver"
)
Expand Down Expand Up @@ -51,6 +52,9 @@ func TestGetOptions(t *testing.T) {
awsSdkDebugFlagValue := true
VolumeAttachLimitFlagName := "volume-attach-limit"
var VolumeAttachLimit int64 = 42
reservedVolumeAttachmentsFlagName := "reserved-volume-attachments"
reservedVolumeAttachments := -1


args := append([]string{
"aws-ebs-csi-driver",
Expand Down Expand Up @@ -107,6 +111,13 @@ func TestGetOptions(t *testing.T) {
if options.NodeOptions.VolumeAttachLimit != VolumeAttachLimit {
t.Fatalf("expected VolumeAttachLimit to be %d but it is %d", VolumeAttachLimit, options.NodeOptions.VolumeAttachLimit)
}
reservedVolumeAttachmentsFlag := flagSet.Lookup(reservedVolumeAttachmentsFlagName)
if reservedVolumeAttachmentsFlag == nil {
t.Fatalf("expected %q flag to be added but it is not", reservedVolumeAttachmentsFlagName)
}
if options.NodeOptions.ReservedVolumeAttachments != reservedVolumeAttachments {
t.Fatalf("expected reservedVolumeAttachmentsFlagName to be %d but it is %d", reservedVolumeAttachments, options.NodeOptions.ReservedVolumeAttachments)
}
}

return options
Expand Down Expand Up @@ -188,6 +199,39 @@ func TestGetOptions(t *testing.T) {
}
},
},
{
name: "both volume-attach-limit and reserved-volume-attachments specified",
testFunc: func(t *testing.T) {
oldOSExit := klog.OsExit
defer func() { klog.OsExit = oldOSExit }()

var exitCode int
calledExit := false
testExit := func(code int) {
exitCode = code
calledExit = true
}
klog.OsExit = testExit

oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = []string{
"aws-ebs-csi-driver",
"--volume-attach-limit=10",
"--reserved-volume-attachments=10",
}

flagSet := flag.NewFlagSet("test-flagset", flag.ContinueOnError)
_ = GetOptions(flagSet)

if exitCode != 1 {
t.Fatalf("expected exit code 1 but got %d", exitCode)
}
if !calledExit {
t.Fatalf("expect osExit to be called, but wasn't")
}
},
},
}

for _, tc := range testCases {
Expand Down
21 changes: 14 additions & 7 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,14 @@ type Driver struct {
}

type DriverOptions struct {
endpoint string
extraTags map[string]string
mode Mode
volumeAttachLimit int64
kubernetesClusterID string
awsSdkDebugLog bool
warnOnInvalidTag bool
endpoint string
extraTags map[string]string
mode Mode
volumeAttachLimit int64
reservedVolumeAttachments int
kubernetesClusterID string
awsSdkDebugLog bool
warnOnInvalidTag bool
}

func NewDriver(options ...func(*DriverOptions)) (*Driver, error) {
Expand Down Expand Up @@ -181,6 +182,12 @@ func WithVolumeAttachLimit(volumeAttachLimit int64) func(*DriverOptions) {
}
}

func WithReservedVolumeAttachments(reservedVolumeAttachments int) func(*DriverOptions) {
return func(o *DriverOptions) {
o.reservedVolumeAttachments = reservedVolumeAttachments
}
}

func WithKubernetesClusterID(clusterID string) func(*DriverOptions) {
return func(o *DriverOptions) {
o.kubernetesClusterID = clusterID
Expand Down
9 changes: 9 additions & 0 deletions pkg/driver/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ func TestWithVolumeAttachLimit(t *testing.T) {
}
}

func TestWithVolumeAttachLimitFromMetadata(t *testing.T) {
value := 10
options := &DriverOptions{}
WithReservedVolumeAttachments(value)(options)
if options.reservedVolumeAttachments != value {
t.Fatalf("expected reservedVolumeAttachments option got set to %d but is set to %d", value, options.reservedVolumeAttachments)
}
}

func TestWithClusterID(t *testing.T) {
var id string = "test-cluster-id"
options := &DriverOptions{}
Expand Down
9 changes: 7 additions & 2 deletions pkg/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -739,15 +739,20 @@ func (d *nodeService) getVolumesLimit() int64 {

isNitro := cloud.IsNitroInstanceType(instanceType)
availableAttachments := cloud.GetMaxAttachments(isNitro)
blockVolumes := d.metadata.GetNumBlockDeviceMappings()

reservedVolumeAttachments := d.driverOptions.reservedVolumeAttachments
if reservedVolumeAttachments == -1 {
reservedVolumeAttachments = d.metadata.GetNumBlockDeviceMappings() + 1 // +1 for the root device
}


// For Nitro instances, attachments are shared between EBS volumes, ENIs and NVMe instance stores
if isNitro {
enis := d.metadata.GetNumAttachedENIs()
nvmeInstanceStoreVolumes := cloud.GetNVMeInstanceStoreVolumesForInstanceType(instanceType)
availableAttachments = availableAttachments - enis - nvmeInstanceStoreVolumes
}
availableAttachments = availableAttachments - blockVolumes - 1 // -1 for root device
availableAttachments = availableAttachments - reservedVolumeAttachments
if availableAttachments < 0 {
availableAttachments = 0
}
Expand Down
Loading

0 comments on commit f6e6350

Please sign in to comment.