Skip to content

Commit

Permalink
Merge pull request #12056 from stgraber/zfs-delegate
Browse files Browse the repository at this point in the history
Add support for ZFS delegation
  • Loading branch information
tomponline authored Jul 27, 2023
2 parents e16cc57 + 4bb5f1d commit 6fd175c
Show file tree
Hide file tree
Showing 22 changed files with 405 additions and 81 deletions.
4 changes: 4 additions & 0 deletions doc/api-extensions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2269,3 +2269,7 @@ each `instance`, `network`, `network forward` and `network load-balancer`.
## `storage_api_remote_volume_snapshot_copy`

This allows copying storage volume snapshots to and from remotes.

## `zfs_delegate`
This implements a new `zfs.delegate` volume Boolean for volumes on a ZFS storage driver.
When enabled and a suitable system is in use (requires ZFS 2.2 or higher), the ZFS dataset will be delegated to the container, allowing for its use through the `zfs` command line tool.
1 change: 1 addition & 0 deletions doc/reference/storage_zfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ Key | Type | Condition | Default
`snapshots.schedule` | string | custom volume | same as `snapshots.schedule` | {{snapshot_schedule_format}}
`zfs.blocksize` | string | | same as `volume.zfs.blocksize` | Size of the ZFS block in range from 512 to 16 MiB (must be power of 2) - for block volume, a maximum value of 128 KiB will be used even if a higher value is set
`zfs.block_mode` | bool | | same as `volume.zfs.block_mode` | Whether to use a formatted `zvol` rather than a {spellexception}`dataset` (`zfs.block_mode` can be set only for custom storage volumes; use `volume.zfs.block_mode` to enable ZFS block mode for all storage volumes in the pool, including instance volumes)
`zfs.delegate` | bool | ZFS 2.2 or higher | same as `volume.zfs.delegate` | Controls whether to delegate the ZFS dataset and anything underneath it to the container(s) using it. Allows the use of the `zfs` command in the container.
`zfs.remove_snapshots` | bool | | same as `volume.zfs.remove_snapshots` or `false` | Remove snapshots as needed
`zfs.use_refquota` | bool | | same as `volume.zfs.use_refquota` or `false` | Use `refquota` instead of `quota` for space
`zfs.reserve_space` | bool | | same as `volume.zfs.reserve_space` or `false` | Use `reservation`/`refreservation` along with `quota`/`refquota`
Expand Down
6 changes: 3 additions & 3 deletions lxd/daemon_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func daemonStorageMount(s *state.State) error {
}

// Mount volume.
err = pool.MountCustomVolume(project.Default, volumeName, nil)
_, err = pool.MountCustomVolume(project.Default, volumeName, nil)
if err != nil {
return fmt.Errorf("Failed to mount storage volume %q: %w", source, err)
}
Expand Down Expand Up @@ -191,7 +191,7 @@ func daemonStorageValidate(s *state.State, target string) error {
}

// Mount volume.
err = pool.MountCustomVolume(project.Default, volumeName, nil)
_, err = pool.MountCustomVolume(project.Default, volumeName, nil)
if err != nil {
return fmt.Errorf("Failed to mount storage volume %q: %w", target, err)
}
Expand Down Expand Up @@ -317,7 +317,7 @@ func daemonStorageMove(s *state.State, storageType string, target string) error
}

// Mount volume.
err = pool.MountCustomVolume(project.Default, volumeName, nil)
_, err = pool.MountCustomVolume(project.Default, volumeName, nil)
if err != nil {
return fmt.Errorf("Failed to mount storage volume %q: %w", target, err)
}
Expand Down
49 changes: 32 additions & 17 deletions lxd/device/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ func (d *disk) Register() error {
}

// Try to mount the volume that should already be mounted to reinitialise the ref counter.
err = d.pool.MountCustomVolume(storageProjectName, d.config["source"], nil)
_, err = d.pool.MountCustomVolume(storageProjectName, d.config["source"], nil)
if err != nil {
return err
}
Expand Down Expand Up @@ -604,13 +604,26 @@ func (d *disk) startContainer() (*deviceConfig.RunConfig, error) {
if d.config["pool"] != "" {
var err error
var revertFunc func()
var mountInfo *storagePools.MountInfo

revertFunc, srcPath, err = d.mountPoolVolume()
revertFunc, srcPath, mountInfo, err = d.mountPoolVolume()
if err != nil {
return nil, diskSourceNotFoundError{msg: "Failed mounting volume", err: err}
}

revert.Add(revertFunc)

// Handle post hooks.
runConf.PostHooks = append(runConf.PostHooks, func() error {
for _, hook := range mountInfo.PostHooks {
err := hook(d.inst)
if err != nil {
return err
}
}

return nil
})
}

// Mount the source in the instance devices directory.
Expand Down Expand Up @@ -821,7 +834,7 @@ func (d *disk) startVM() (*deviceConfig.RunConfig, error) {
return &runConf, nil
}

revertFunc, mount.DevPath, err = d.mountPoolVolume()
revertFunc, mount.DevPath, _, err = d.mountPoolVolume()
if err != nil {
return nil, diskSourceNotFoundError{msg: "Failed mounting volume", err: err}
}
Expand Down Expand Up @@ -1208,11 +1221,13 @@ func (w *cgroupWriter) Set(version cgroup.Backend, controller string, key string
}

// mountPoolVolume mounts the pool volume specified in d.config["source"] from pool specified in d.config["pool"]
// and return the mount path. If the instance type is container volume will be shifted if needed.
func (d *disk) mountPoolVolume() (func(), string, error) {
// and return the mount path and MountInfo struct. If the instance type is container volume will be shifted if needed.
func (d *disk) mountPoolVolume() (func(), string, *storagePools.MountInfo, error) {
revert := revert.New()
defer revert.Fail()

var mountInfo *storagePools.MountInfo

// Deal with mounting storage volumes created via the storage api. Extract the name of the storage volume
// that we are supposed to attach. We assume that the only syntactically valid ways of specifying a
// storage volume are:
Expand All @@ -1221,7 +1236,7 @@ func (d *disk) mountPoolVolume() (func(), string, error) {
// Currently, <type> must either be empty or "custom".
// We do not yet support instance mounts.
if filepath.IsAbs(d.config["source"]) {
return nil, "", fmt.Errorf(`When the "pool" property is set "source" must specify the name of a volume, not a path`)
return nil, "", nil, fmt.Errorf(`When the "pool" property is set "source" must specify the name of a volume, not a path`)
}

volumeTypeName := ""
Expand All @@ -1239,31 +1254,31 @@ func (d *disk) mountPoolVolume() (func(), string, error) {
// Check volume type name is custom.
switch volumeTypeName {
case db.StoragePoolVolumeTypeNameContainer:
return nil, "", fmt.Errorf("Using instance storage volumes is not supported")
return nil, "", nil, fmt.Errorf("Using instance storage volumes is not supported")
case "":
// We simply received the name of a storage volume.
volumeTypeName = db.StoragePoolVolumeTypeNameCustom
fallthrough
case db.StoragePoolVolumeTypeNameCustom:
break
case db.StoragePoolVolumeTypeNameImage:
return nil, "", fmt.Errorf("Using image storage volumes is not supported")
return nil, "", nil, fmt.Errorf("Using image storage volumes is not supported")
default:
return nil, "", fmt.Errorf("Unknown storage type prefix %q found", volumeTypeName)
return nil, "", nil, fmt.Errorf("Unknown storage type prefix %q found", volumeTypeName)
}

// Only custom volumes can be attached currently.
storageProjectName, err := project.StorageVolumeProject(d.state.DB.Cluster, d.inst.Project().Name, db.StoragePoolVolumeTypeCustom)
if err != nil {
return nil, "", err
return nil, "", nil, err
}

volStorageName := project.StorageVolume(storageProjectName, volumeName)
srcPath = storageDrivers.GetVolumeMountPath(d.config["pool"], storageDrivers.VolumeTypeCustom, volStorageName)

err = d.pool.MountCustomVolume(storageProjectName, volumeName, nil)
mountInfo, err = d.pool.MountCustomVolume(storageProjectName, volumeName, nil)
if err != nil {
return nil, "", fmt.Errorf("Failed mounting storage volume %q of type %q on storage pool %q: %w", volumeName, volumeTypeName, d.pool.Name(), err)
return nil, "", nil, fmt.Errorf("Failed mounting storage volume %q of type %q on storage pool %q: %w", volumeName, volumeTypeName, d.pool.Name(), err)
}

revert.Add(func() { _, _ = d.pool.UnmountCustomVolume(storageProjectName, volumeName, nil) })
Expand All @@ -1274,30 +1289,30 @@ func (d *disk) mountPoolVolume() (func(), string, error) {
return err
})
if err != nil {
return nil, "", fmt.Errorf("Failed to fetch local storage volume record: %w", err)
return nil, "", nil, fmt.Errorf("Failed to fetch local storage volume record: %w", err)
}

if d.inst.Type() == instancetype.Container {
if dbVolume.ContentType == db.StoragePoolVolumeContentTypeNameFS {
err = d.storagePoolVolumeAttachShift(storageProjectName, d.pool.Name(), volumeName, db.StoragePoolVolumeTypeCustom, srcPath)
if err != nil {
return nil, "", fmt.Errorf("Failed shifting storage volume %q of type %q on storage pool %q: %w", volumeName, volumeTypeName, d.pool.Name(), err)
return nil, "", nil, fmt.Errorf("Failed shifting storage volume %q of type %q on storage pool %q: %w", volumeName, volumeTypeName, d.pool.Name(), err)
}
} else {
return nil, "", fmt.Errorf("Only filesystem volumes are supported for containers")
return nil, "", nil, fmt.Errorf("Only filesystem volumes are supported for containers")
}
}

if dbVolume.ContentType == db.StoragePoolVolumeContentTypeNameBlock || dbVolume.ContentType == db.StoragePoolVolumeContentTypeNameISO {
srcPath, err = d.pool.GetCustomVolumeDisk(storageProjectName, volumeName)
if err != nil {
return nil, "", fmt.Errorf("Failed to get disk path: %w", err)
return nil, "", nil, fmt.Errorf("Failed to get disk path: %w", err)
}
}

cleanup := revert.Clone().Fail // Clone before calling revert.Success() so we can return the Fail func.
revert.Success()
return cleanup, srcPath, err
return cleanup, srcPath, mountInfo, err
}

// createDevice creates a disk device mount on host.
Expand Down
26 changes: 24 additions & 2 deletions lxd/instance/drivers/driver_lxc.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,11 @@ func (d *lxc) initLXC(config bool) (*liblxc.Container, error) {
"/sys/kernel/tracing",
}

// Pass in /dev/zfs to the container if delegation is supported on the system.
if storageDrivers.ZFSSupportsDelegation() && shared.PathExists("/dev/zfs") {
bindMounts = append(bindMounts, "/dev/zfs")
}

if d.IsPrivileged() && !d.state.OS.RunningInUserNS {
err = lxcSetConfigItem(cc, "lxc.mount.entry", "mqueue dev/mqueue mqueue rw,relatime,create=dir,optional 0 0")
if err != nil {
Expand Down Expand Up @@ -867,6 +872,10 @@ func (d *lxc) initLXC(config bool) (*liblxc.Container, error) {
"c 10:200 rwm", // /dev/net/tun
}

if storageDrivers.ZFSSupportsDelegation() {
devices = append(devices, "c 10:249 rwm")
}

for _, dev := range devices {
if d.state.OS.CGInfo.Layout == cgroup.CgroupsUnified {
err = lxcSetConfigItem(cc, "lxc.cgroup2.devices.allow", dev)
Expand Down Expand Up @@ -1877,6 +1886,8 @@ func (d *lxc) handleIdmappedStorage() (idmap.IdmapStorageType, *idmap.IdmapSet,

// Start functions.
func (d *lxc) startCommon() (string, []func() error, error) {
postStartHooks := []func() error{}

revert := revert.New()
defer revert.Fail()

Expand Down Expand Up @@ -1919,11 +1930,23 @@ func (d *lxc) startCommon() (string, []func() error, error) {
d.stopForkfile(false)

// Mount instance root volume.
_, err = d.mount()
mountInfo, err := d.mount()
if err != nil {
return "", nil, err
}

// Handle post hooks.
postStartHooks = append(postStartHooks, func() error {
for _, hook := range mountInfo.PostHooks {
err := hook(d)
if err != nil {
return err
}
}

return nil
})

revert.Add(func() { _ = d.unmount() })

idmapType, nextIdmap, err := d.handleIdmappedStorage()
Expand Down Expand Up @@ -1997,7 +2020,6 @@ func (d *lxc) startCommon() (string, []func() error, error) {
}

// Create the devices
postStartHooks := []func() error{}
nicID := -1
nvidiaDevices := []string{}

Expand Down
44 changes: 40 additions & 4 deletions lxd/storage/backend_lxd.go
Original file line number Diff line number Diff line change
Expand Up @@ -2674,6 +2674,21 @@ func (b *lxdBackend) MountInstance(inst instance.Instance, op *operations.Operat
}

revert.Success() // From here on it is up to caller to call UnmountInstance() when done.

// Handle delegation.
if b.driver.CanDelegateVolume(vol) {
mountInfo.PostHooks = append(mountInfo.PostHooks, func(inst instance.Instance) error {
pid := inst.InitPID()

// Only apply to running instances.
if pid < 1 {
return nil
}

return b.driver.DelegateVolume(vol, pid)
})
}

return mountInfo, nil
}

Expand Down Expand Up @@ -5243,26 +5258,47 @@ func (b *lxdBackend) GetCustomVolumeUsage(projectName, volName string) (*VolumeU
}

// MountCustomVolume mounts a custom volume.
func (b *lxdBackend) MountCustomVolume(projectName, volName string, op *operations.Operation) error {
func (b *lxdBackend) MountCustomVolume(projectName, volName string, op *operations.Operation) (*MountInfo, error) {
l := b.logger.AddContext(logger.Ctx{"project": projectName, "volName": volName})
l.Debug("MountCustomVolume started")
defer l.Debug("MountCustomVolume finished")

err := b.isStatusReady()
if err != nil {
return err
return nil, err
}

volume, err := VolumeDBGet(b, projectName, volName, drivers.VolumeTypeCustom)
if err != nil {
return err
return nil, err
}

// Get the volume name on storage.
volStorageName := project.StorageVolume(projectName, volName)
vol := b.GetVolume(drivers.VolumeTypeCustom, drivers.ContentType(volume.ContentType), volStorageName, volume.Config)

return b.driver.MountVolume(vol, op)
// Perform the mount.
mountInfo := &MountInfo{}
err = b.driver.MountVolume(vol, op)
if err != nil {
return nil, err
}

// Handle delegation.
if b.driver.CanDelegateVolume(vol) {
mountInfo.PostHooks = append(mountInfo.PostHooks, func(inst instance.Instance) error {
pid := inst.InitPID()

// Only apply to running instances.
if pid < 1 {
return nil
}

return b.driver.DelegateVolume(vol, pid)
})
}

return mountInfo, nil
}

// UnmountCustomVolume unmounts a custom volume.
Expand Down
4 changes: 2 additions & 2 deletions lxd/storage/backend_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ func (b *mockBackend) GetCustomVolumeUsage(projectName string, volName string) (
return nil, nil
}

func (b *mockBackend) MountCustomVolume(projectName string, volName string, op *operations.Operation) error {
return nil
func (b *mockBackend) MountCustomVolume(projectName string, volName string, op *operations.Operation) (*MountInfo, error) {
return nil, nil
}

func (b *mockBackend) UnmountCustomVolume(projectName string, volName string, op *operations.Operation) (bool, error) {
Expand Down
2 changes: 1 addition & 1 deletion lxd/storage/drivers/driver_ceph_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ func (d *ceph) CreateVolumeFromMigration(vol Volume, conn io.ReadWriteCloser, vo
return err
}

// Handle zfs send/receive migration.
// Handle rbd migration.
if len(volTargetArgs.Snapshots) > 0 {
// Create the parent directory.
err := createParentSnapshotDirIfMissing(d.name, vol.volType, vol.name)
Expand Down
10 changes: 10 additions & 0 deletions lxd/storage/drivers/driver_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,16 @@ func (d *common) UnmountVolume(vol Volume, keepBlockDev bool, op *operations.Ope
return false, ErrNotSupported
}

// CanDelegateVolume checks whether the volume can be delegated.
func (d *common) CanDelegateVolume(vol Volume) bool {
return false
}

// DelegateVolume delegates a volume.
func (d *common) DelegateVolume(vol Volume, pid int) error {
return nil
}

// RenameVolume renames the volume and all related filesystem entries.
func (d *common) RenameVolume(vol Volume, newVolName string, op *operations.Operation) error {
return ErrNotSupported
Expand Down
Loading

0 comments on commit 6fd175c

Please sign in to comment.