Skip to content

Commit bc28f1c

Browse files
committed
Add EROFS flatten device support (fsmerge feature)
So that hundreds of sub-blobs (container image layers) can be merged into one block device to avoid having too many block devices plugging into a VM. Closes: #30 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
1 parent bd41ef6 commit bc28f1c

File tree

5 files changed

+183
-7
lines changed

5 files changed

+183
-7
lines changed

internal/erofs/vmdk.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package erofs
18+
19+
import (
20+
"fmt"
21+
"io"
22+
"os"
23+
)
24+
25+
const (
26+
max2GbExtentSectors = 0x80000000 >> 9
27+
sectorsPerTrack = 63
28+
numberHeads = 16
29+
subformat = "twoGbMaxExtentFlat"
30+
adapterType = "ide"
31+
hwVersion = "4"
32+
)
33+
34+
// vmdkDescAddExtent writes extent lines to the writer.
35+
// Each extent line follows the format: RW <count> FLAT "<filename>" <offset>
36+
func vmdkDescAddExtent(w io.Writer, sectors uint64, filename string, offset uint64) error {
37+
for sectors > 0 {
38+
count := min(sectors, max2GbExtentSectors)
39+
40+
_, err := fmt.Fprintf(w, "RW %d FLAT \"%s\" %d\n", count, filename, offset)
41+
if err != nil {
42+
return err
43+
}
44+
offset += count
45+
sectors -= count
46+
}
47+
return nil
48+
}
49+
50+
func DumpVMDKDescriptor(w io.Writer, cid uint32, devices []string) error {
51+
parentCID := uint32(0xffffffff)
52+
53+
_, err := fmt.Fprintf(w, `# Disk DescriptorFile
54+
version=1
55+
CID=%08x
56+
parentCID=%08x
57+
createType="%s"
58+
59+
# Extent description
60+
`, cid, parentCID, subformat)
61+
if err != nil {
62+
return err
63+
}
64+
65+
totalSectors := uint64(0)
66+
67+
for _, d := range devices {
68+
fi, err := os.Stat(d)
69+
if err != nil {
70+
return err
71+
}
72+
sectors := uint64(fi.Size()) >> 9
73+
err = vmdkDescAddExtent(w, sectors, d, 0)
74+
if err != nil {
75+
return err
76+
}
77+
totalSectors += sectors
78+
}
79+
80+
cylinders := (totalSectors + sectorsPerTrack*numberHeads - 1) / (sectorsPerTrack * numberHeads)
81+
_, err = fmt.Fprintf(w, `
82+
83+
# The Disk Data Base
84+
#DDB
85+
86+
ddb.virtualHWVersion = "%s"
87+
ddb.geometry.cylinders = "%d"
88+
ddb.geometry.heads = "%d"
89+
ddb.geometry.sectors = "63"
90+
ddb.adapterType = "%s"
91+
`, hwVersion, cylinders, numberHeads, adapterType)
92+
if err != nil {
93+
return err
94+
}
95+
return nil
96+
}
97+
98+
func DumpVMDKDescriptorToFile(vmdkdesc string, cid uint32, devices []string) error {
99+
f, err := os.Create(vmdkdesc)
100+
if err != nil {
101+
return err
102+
}
103+
err = DumpVMDKDescriptor(f, cid, devices)
104+
f.Close()
105+
return err
106+
}

internal/shim/task/mount.go

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,23 @@ package task
1919
import (
2020
"context"
2121
"fmt"
22+
"os"
23+
"path/filepath"
2224
"strings"
2325

2426
"github.com/containerd/containerd/api/types"
2527
"github.com/containerd/errdefs"
2628
"github.com/containerd/log"
2729

30+
"github.com/containerd/nerdbox/internal/erofs"
2831
"github.com/containerd/nerdbox/internal/vm"
2932
)
3033

3134
type diskOptions struct {
3235
name string
3336
source string
3437
readOnly bool
38+
vmdk bool
3539
}
3640

3741
// transformMounts does not perform any local mounts but transforms
@@ -44,25 +48,63 @@ func transformMounts(ctx context.Context, vmi vm.Instance, id string, ms []*type
4448
err error
4549
)
4650

51+
log.G(ctx).Trace("transformMounts", ms)
4752
for _, m := range ms {
4853
switch m.Type {
4954
case "erofs":
55+
5056
disk := fmt.Sprintf("disk-%d-%s", disks, id)
5157
// virtiofs implementation has a limit of 36 characters for the tag
5258
if len(disk) > 36 {
5359
disk = disk[:36]
5460
}
55-
addDisks = append(addDisks, diskOptions{
56-
name: disk,
57-
source: m.Source,
58-
readOnly: true,
59-
})
61+
62+
var Options []string
63+
64+
devices := []string{m.Source}
65+
for _, o := range m.Options {
66+
if d, f := strings.CutPrefix(o, "device="); f {
67+
devices = append(devices, d)
68+
continue
69+
}
70+
Options = append(Options, o)
71+
}
72+
73+
if len(devices) > 1 {
74+
// generate VMDK desc for the EROFS flattened fs if it does not exist
75+
mergedfsPath := filepath.Dir(m.Source) + "/merged_fs.vmdk"
76+
if _, err := os.Stat(mergedfsPath); err != nil {
77+
if !os.IsNotExist(err) {
78+
log.G(ctx).Warnf("failed to stat %v: %v", mergedfsPath, err)
79+
return nil, errdefs.ErrNotImplemented
80+
}
81+
err = erofs.DumpVMDKDescriptorToFile(mergedfsPath, "fffffffe", devices)
82+
if err != nil {
83+
log.G(ctx).Warnf("failed to generate %v: %v", mergedfsPath, err)
84+
return nil, errdefs.ErrNotImplemented
85+
}
86+
}
87+
addDisks = append(addDisks, diskOptions{
88+
name: disk,
89+
source: mergedfsPath,
90+
readOnly: true,
91+
vmdk: true,
92+
})
93+
} else {
94+
addDisks = append(addDisks, diskOptions{
95+
name: disk,
96+
source: m.Source,
97+
readOnly: true,
98+
vmdk: false,
99+
})
100+
}
60101
am = append(am, &types.Mount{
61102
Type: "erofs",
62103
Source: fmt.Sprintf("/dev/vd%c", disks),
63104
Target: m.Target,
64-
Options: filterOptions(m.Options),
105+
Options: filterOptions(Options),
65106
})
107+
66108
disks++
67109
case "ext4":
68110
disk := fmt.Sprintf("disk-%d-%s", disks, id)
@@ -75,6 +117,7 @@ func transformMounts(ctx context.Context, vmi vm.Instance, id string, ms []*type
75117
name: disk,
76118
source: m.Source,
77119
readOnly: false,
120+
vmdk: false,
78121
})
79122
am = append(am, &types.Mount{
80123
Type: "ext4",
@@ -127,6 +170,10 @@ func transformMounts(ctx context.Context, vmi vm.Instance, id string, ms []*type
127170
if do.readOnly {
128171
opts = append(opts, vm.WithReadOnly())
129172
}
173+
if do.vmdk {
174+
opts = append(opts, vm.WithVmdk())
175+
}
176+
130177
if err := vmi.AddDisk(ctx, do.name, do.source, opts...); err != nil {
131178
return nil, err
132179
}

internal/vm/libkrun/instance.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,11 @@ func (v *vmInstance) AddDisk(ctx context.Context, blockID, mountPath string, opt
172172
o(&mc)
173173
}
174174

175-
if err := v.vmc.AddDisk(blockID, mountPath, mc.Readonly); err != nil {
175+
var dskFmt uint32 = 0
176+
if mc.Vmdk {
177+
dskFmt = 2
178+
}
179+
if err := v.vmc.AddDisk2(blockID, mountPath, dskFmt, mc.Readonly); err != nil {
176180
return fmt.Errorf("failed to add disk at '%s': %w", mountPath, err)
177181
}
178182

internal/vm/libkrun/krun.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,17 @@ func (vmc *vmcontext) AddDisk(blockID, path string, readonly bool) error {
156156
return nil
157157
}
158158

159+
func (vmc *vmcontext) AddDisk2(blockID, path string, diskFmt uint32, readonly bool) error {
160+
if vmc.lib.AddDisk2 == nil {
161+
return fmt.Errorf("libkrun not loaded")
162+
}
163+
ret := vmc.lib.AddDisk2(vmc.ctxID, blockID, path, diskFmt, readonly)
164+
if ret != 0 {
165+
return fmt.Errorf("krun_add_disk2 failed: %d", ret)
166+
}
167+
return nil
168+
}
169+
159170
func (vmc *vmcontext) AddNIC(endpoint string, mac net.HardwareAddr, mode vm.NetworkMode, features, flags uint32) error {
160171
if vmc.lib.AddNetUnixgram == nil || vmc.lib.AddNetUnixstream == nil {
161172
return fmt.Errorf("libkrun not loaded")
@@ -243,6 +254,7 @@ type libkrun struct {
243254
SetGvproxyPath func(ctxID uint32, path string) int32 `C:"krun_set_gvproxy_path"`
244255
SetNetMac func(ctxID uint32, mac []uint8) int32 `C:"krun_set_net_mac"`
245256
AddDisk func(ctxID uint32, blockId, path string, readonly bool) int32 `C:"krun_add_disk"`
257+
AddDisk2 func(ctxID uint32, blockId, path string, diskFmt uint32, readonly bool) int32 `C:"krun_add_disk2"`
246258
AddNetUnixstream func(ctxID uint32, path string, fd int, mac []uint8, features, flags uint32) int32 `C:"krun_add_net_unixstream"`
247259
AddNetUnixgram func(ctxID uint32, path string, fd int, mac []uint8, features, flags uint32) int32 `C:"krun_add_net_unixgram"`
248260

internal/vm/vm.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ func WithInitArgs(args ...string) StartOpt {
4949

5050
type MountConfig struct {
5151
Readonly bool
52+
Vmdk bool
5253
}
5354

5455
type MountOpt func(*MountConfig)
@@ -75,3 +76,9 @@ func WithReadOnly() MountOpt {
7576
o.Readonly = true
7677
}
7778
}
79+
80+
func WithVmdk() MountOpt {
81+
return func(o *MountConfig) {
82+
o.Vmdk = true
83+
}
84+
}

0 commit comments

Comments
 (0)