Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Fast Snapshot Restores #1554

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions docs/fast-snapshot-restores.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Fast Snapshot Restores

The EBS CSI Driver provides support for [Fast Snapshot Restores(FSR)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-fast-snapshot-restore.html) via `VolumeSnapshotClass.parameters.fastSnapshotRestoreAvailabilityZones`.

Amazon EBS fast snapshot restore (FSR) enables you to create a volume from a snapshot that is fully initialized at creation. This eliminates the latency of I/O operations on a block when it is accessed for the first time. Volumes that are created using fast snapshot restore instantly deliver all of their provisioned performance.

Availability zones are specified as a comma separated list.

**Example**
```
apiVersion: snapshot.storage.k8s.io/v1
kind: VolumeSnapshotClass
metadata:
name: csi-aws-vsc
driver: ebs.csi.aws.com
deletionPolicy: Delete
parameters:
fastSnapshotRestoreAvailabilityZones: "us-east-1a, us-east-1b"
```

## Prerequisites

- Install the [Kubernetes Volume Snapshot CRDs](https://github.com/kubernetes-csi/external-snapshotter/tree/master/client/config/crd) and external-snapshotter sidecar. For installation instructions, see [CSI Snapshotter Usage](https://github.com/kubernetes-csi/external-snapshotter#usage).

- The EBS CSI Driver must be given permission to access the [`EnableFastSnapshotRestores` EC2 API](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_EnableFastSnapshotRestores.html). This example snippet can be used in an IAM policy to grant access to `EnableFastSnapshotRestores`:

```json
{
"Effect": "Allow",
"Action": [
"ec2:EnableFastSnapshotRestores"
],
"Resource": "*"
}
```

## Failure Mode

The driver will attempt to check if the availability zones provided are supported for fast snapshot restore before attempting to create the snapshot. If the `EnableFastSnapshotRestores` API call fails, the driver will hard-fail the request and delete the snapshot. This is to ensure that the snapshot is not left in an inconsistent state.
torredil marked this conversation as resolved.
Show resolved Hide resolved
31 changes: 31 additions & 0 deletions pkg/cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,24 @@ func (c *cloud) ec2SnapshotResponseToStruct(ec2Snapshot *ec2.Snapshot) *Snapshot
return snapshot
}

func (c *cloud) EnableFastSnapshotRestores(ctx context.Context, availabilityZones []string, snapshotID string) (*ec2.EnableFastSnapshotRestoresOutput, error) {
request := &ec2.EnableFastSnapshotRestoresInput{
AvailabilityZones: aws.StringSlice(availabilityZones),
SourceSnapshotIds: []*string{
aws.String(snapshotID),
},
}
klog.V(4).InfoS("Creating Fast Snapshot Restores", "snapshotID", snapshotID, "availabilityZones", availabilityZones)
response, err := c.ec2.EnableFastSnapshotRestoresWithContext(ctx, request)
if err != nil {
return nil, err
}
if len(response.Unsuccessful) > 0 {
return response, fmt.Errorf("failed to create fast snapshot restores for snapshot %s: %v", snapshotID, response.Unsuccessful)
}
return response, nil
}

func (c *cloud) getVolume(ctx context.Context, request *ec2.DescribeVolumesInput) (*ec2.Volume, error) {
var volumes []*ec2.Volume
var nextToken *string
Expand Down Expand Up @@ -1236,6 +1254,19 @@ func (c *cloud) randomAvailabilityZone(ctx context.Context) (string, error) {
return zones[0], nil
}

// AvailabilityZones returns availability zones from the given region
func (c *cloud) AvailabilityZones(ctx context.Context) (map[string]struct{}, error) {
response, err := c.ec2.DescribeAvailabilityZonesWithContext(ctx, &ec2.DescribeAvailabilityZonesInput{})
if err != nil {
return nil, fmt.Errorf("error describing availability zones: %w", err)
}
zones := make(map[string]struct{})
for _, zone := range response.AvailabilityZones {
zones[*zone.ZoneName] = struct{}{}
}
return zones, nil
}

func volumeModificationDone(state string) bool {
if state == ec2.VolumeModificationStateCompleted || state == ec2.VolumeModificationStateOptimizing {
return true
Expand Down
2 changes: 2 additions & 0 deletions pkg/cloud/cloud_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ type Cloud interface {
GetSnapshotByName(ctx context.Context, name string) (snapshot *Snapshot, err error)
GetSnapshotByID(ctx context.Context, snapshotID string) (snapshot *Snapshot, err error)
ListSnapshots(ctx context.Context, volumeID string, maxResults int64, nextToken string) (listSnapshotsResponse *ListSnapshotsResponse, err error)
EnableFastSnapshotRestores(ctx context.Context, availabilityZones []string, snapshotID string) (*ec2.EnableFastSnapshotRestoresOutput, error)
AvailabilityZones(ctx context.Context) (map[string]struct{}, error)
}
136 changes: 136 additions & 0 deletions pkg/cloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,142 @@ func TestCreateSnapshot(t *testing.T) {
}
}

func TestEnableFastSnapshotRestores(t *testing.T) {
testCases := []struct {
name string
snapshotID string
availabilityZones []string
expOutput *ec2.EnableFastSnapshotRestoresOutput
expErr error
}{
{
name: "success: normal",
snapshotID: "snap-test-id",
availabilityZones: []string{"us-west-2a", "us-west-2b"},
expOutput: &ec2.EnableFastSnapshotRestoresOutput{
Successful: []*ec2.EnableFastSnapshotRestoreSuccessItem{{
AvailabilityZone: aws.String("us-west-2a,us-west-2b"),
SnapshotId: aws.String("snap-test-id")}},
Unsuccessful: []*ec2.EnableFastSnapshotRestoreErrorItem{},
},
expErr: nil,
},
{
name: "fail: unsuccessful response",
snapshotID: "snap-test-id",
availabilityZones: []string{"us-west-2a", "invalid-zone"},
expOutput: &ec2.EnableFastSnapshotRestoresOutput{
Unsuccessful: []*ec2.EnableFastSnapshotRestoreErrorItem{{
SnapshotId: aws.String("snap-test-id"),
FastSnapshotRestoreStateErrors: []*ec2.EnableFastSnapshotRestoreStateErrorItem{
{AvailabilityZone: aws.String("us-west-2a,invalid-zone"),
Error: &ec2.EnableFastSnapshotRestoreStateError{
Message: aws.String("failed to create fast snapshot restore")}},
},
}},
},
expErr: fmt.Errorf("failed to create fast snapshot restores for snapshot"),
},
{
name: "fail: error",
snapshotID: "",
availabilityZones: nil,
expOutput: nil,
expErr: fmt.Errorf("EnableFastSnapshotRestores error"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockCtrl := gomock.NewController(t)
mockEC2 := NewMockEC2(mockCtrl)
c := newCloud(mockEC2)

ctx := context.Background()
mockEC2.EXPECT().EnableFastSnapshotRestoresWithContext(gomock.Eq(ctx), gomock.Any()).Return(tc.expOutput, tc.expErr).AnyTimes()

response, err := c.EnableFastSnapshotRestores(ctx, tc.availabilityZones, tc.snapshotID)

if err != nil {
if tc.expErr == nil {
t.Fatalf("EnableFastSnapshotRestores() failed: expected no error, got: %v", err)
}
if err.Error() != tc.expErr.Error() {
t.Fatalf("EnableFastSnapshotRestores() failed: expected error %v, got %v", tc.expErr, err)
}
} else {
if tc.expErr != nil {
t.Fatalf("EnableFastSnapshotRestores() failed: expected error %v, got nothing", tc.expErr)
}
if len(response.Successful) == 0 || len(response.Unsuccessful) > 0 {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response, got %v", response)
}
if *response.Successful[0].SnapshotId != tc.snapshotID {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response to have SnapshotId %s, got %s", tc.snapshotID, *response.Successful[0].SnapshotId)
}
az := strings.Split(*response.Successful[0].AvailabilityZone, ",")
if !reflect.DeepEqual(az, tc.availabilityZones) {
t.Fatalf("EnableFastSnapshotRestores() failed: expected successful response to have AvailabilityZone %v, got %v", az, tc.availabilityZones)
}
}

mockCtrl.Finish()
})
}
}

func TestAvailabilityZones(t *testing.T) {
testCases := []struct {
name string
availabilityZone string
expOutput *ec2.DescribeAvailabilityZonesOutput
expErr error
}{
{
name: "success: normal",
availabilityZone: expZone,
expOutput: &ec2.DescribeAvailabilityZonesOutput{
AvailabilityZones: []*ec2.AvailabilityZone{
{ZoneName: aws.String(expZone)},
}},
expErr: nil,
},
{
name: "fail: error",
availabilityZone: "",
expOutput: nil,
expErr: fmt.Errorf("TestAvailabilityZones error"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockCtrl := gomock.NewController(t)
mockEC2 := NewMockEC2(mockCtrl)
c := newCloud(mockEC2)

ctx := context.Background()
mockEC2.EXPECT().DescribeAvailabilityZonesWithContext(gomock.Eq(ctx), gomock.Any()).Return(tc.expOutput, tc.expErr).AnyTimes()

az, err := c.AvailabilityZones(ctx)
if err != nil {
if tc.expErr == nil {
t.Fatalf("AvailabilityZones() failed: expected no error, got: %v", err)
}
} else {
if tc.expErr != nil {
t.Fatalf("AvailabilityZones() failed: expected error, got nothing")
}
if val, ok := az[tc.availabilityZone]; !ok {
t.Fatalf("AvailabilityZones() failed: expected to find %s, got %v", tc.availabilityZone, val)
}
}

mockCtrl.Finish()
})
}
}

func TestDeleteSnapshot(t *testing.T) {
testCases := []struct {
name string
Expand Down
1 change: 1 addition & 0 deletions pkg/cloud/ec2_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ type EC2 interface {
DescribeVolumesModificationsWithContext(ctx aws.Context, input *ec2.DescribeVolumesModificationsInput, opts ...request.Option) (*ec2.DescribeVolumesModificationsOutput, error)
DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error)
CreateTagsWithContext(ctx aws.Context, input *ec2.CreateTagsInput, opts ...request.Option) (*ec2.CreateTagsOutput, error)
EnableFastSnapshotRestoresWithContext(ctx aws.Context, input *ec2.EnableFastSnapshotRestoresInput, opts ...request.Option) (*ec2.EnableFastSnapshotRestoresOutput, error)
}
30 changes: 30 additions & 0 deletions pkg/cloud/mock_cloud.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions pkg/cloud/mock_ec2.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pkg/driver/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ const (
TagKeyPrefix = "tagSpecification"
)

// constants of keys in snapshot parameters
const (
// FastSnapShotRestoreAvailabilityZones represents key for fast snapshot restore availability zones
FastSnapshotRestoreAvailabilityZones = "fastsnapshotrestoreavailabilityzones"
)

// constants for volume tags and their values
const (
// ResourceLifecycleTagPrefix is prefix of tag for provisioned EBS volume that
Expand Down
32 changes: 30 additions & 2 deletions pkg/driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,15 +605,19 @@ func (d *controllerService) CreateSnapshot(ctx context.Context, req *csi.CreateS
}

var vscTags []string
var fsrAvailabilityZones []string
vsProps := new(template.VolumeSnapshotProps)
for key, value := range req.GetParameters() {
switch key {
switch strings.ToLower(key) {
case VolumeSnapshotNameKey:
vsProps.VolumeSnapshotName = value
case VolumeSnapshotNamespaceKey:
vsProps.VolumeSnapshotNamespace = value
case VolumeSnapshotContentNameKey:
vsProps.VolumeSnapshotContentName = value
case FastSnapshotRestoreAvailabilityZones:
f := strings.ReplaceAll(value, " ", "")
fsrAvailabilityZones = strings.Split(f, ",")
default:
if strings.HasPrefix(key, TagKeyPrefix) {
vscTags = append(vscTags, value)
Expand Down Expand Up @@ -649,11 +653,35 @@ func (d *controllerService) CreateSnapshot(ctx context.Context, req *csi.CreateS
Tags: snapshotTags,
}

snapshot, err = d.cloud.CreateSnapshot(ctx, volumeID, opts)
// Check if the availability zone is supported for fast snapshot restore
if len(fsrAvailabilityZones) > 0 {
zones, error := d.cloud.AvailabilityZones(ctx)
if error != nil {
klog.ErrorS(error, "failed to get availability zones")
} else {
klog.V(4).InfoS("Availability Zones", "zone", zones)
for _, az := range fsrAvailabilityZones {
if _, ok := zones[az]; !ok {
return nil, status.Errorf(codes.InvalidArgument, "Availability zone %s is not supported for fast snapshot restore", az)
}
}
}
}

snapshot, err = d.cloud.CreateSnapshot(ctx, volumeID, opts)
if err != nil {
return nil, status.Errorf(codes.Internal, "Could not create snapshot %q: %v", snapshotName, err)
}

if len(fsrAvailabilityZones) > 0 {
_, err := d.cloud.EnableFastSnapshotRestores(ctx, fsrAvailabilityZones, snapshot.SnapshotID)
if err != nil {
if _, err = d.cloud.DeleteSnapshot(ctx, snapshot.SnapshotID); err != nil {
return nil, status.Errorf(codes.Internal, "Could not delete snapshot ID %q: %v", snapshotName, err)
}
return nil, status.Errorf(codes.Internal, "Failed to create Fast Snapshot Restores for snapshot ID %q: %v", snapshotName, err)
}
}
return newCreateSnapshotResponse(snapshot)
}

Expand Down
Loading