Skip to content

Commit 46d281b

Browse files
hiroTamadaclaudehiroTamadacursoragent
authored
feat: channel-based image notifications + erofs default (#105)
* feat: replace polling with channel-based notifications and default to erofs Replace the 500ms polling loop in waitForImageReady() with a channel-based pub/sub notification system on the image manager, reducing build-to-SSE lag. Switch the default image format from ext4 to erofs (LZ4-compressed read-only filesystem) for faster, smaller rootfs images. The VM init mounts erofs first with an ext4 fallback for backward compatibility with legacy images. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: replace polling with channel-based notifications and default to erofs Replace the 500ms polling loop in waitForImageReady() with a channel-based pub/sub notification system on the image manager, reducing build-to-SSE lag. Switch the default image format from ext4 to erofs (LZ4-compressed read-only filesystem) for faster, smaller rootfs images. The VM init mounts erofs first with an ext4 fallback for backward compatibility with legacy images. Log which filesystem type (erofs or ext4) was actually mounted so operators can verify erofs is being used and diagnose fallback scenarios. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: use ext4 on Darwin where VZ kernel lacks erofs support Make DefaultImageFormat platform-aware: - Linux: erofs (compressed, smaller images) - Darwin: ext4 (VZ kernel doesn't have erofs support) This fixes the Darwin CI failure where VMs couldn't mount erofs rootfs. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: hiroTamada <hiro@kernel.sh> Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent eb40457 commit 46d281b

9 files changed

Lines changed: 232 additions & 55 deletions

File tree

lib/builds/manager.go

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -948,42 +948,20 @@ func (m *manager) updateBuildComplete(id string, status string, digest *string,
948948
m.notifyStatusChange(id, status)
949949
}
950950

951-
// waitForImageReady polls the image manager until the build's image is ready.
951+
// waitForImageReady blocks until the build's image reaches a terminal state.
952952
// imageRef should be the short repo name (e.g., "builds/abc123" or "myapp")
953953
// matching what triggerConversion stores in the image manager.
954954
// This ensures that when a build reports "ready", the image is actually usable
955955
// for instance creation (fixes KERNEL-863 race condition).
956956
func (m *manager) waitForImageReady(ctx context.Context, imageRef string) error {
957-
// Poll for up to 60 seconds (image conversion is typically fast)
958-
const maxAttempts = 120
959-
const pollInterval = 500 * time.Millisecond
960-
961957
m.logger.Debug("waiting for image to be ready", "image_ref", imageRef)
962958

963-
for attempt := 0; attempt < maxAttempts; attempt++ {
964-
select {
965-
case <-ctx.Done():
966-
return ctx.Err()
967-
default:
968-
}
969-
970-
img, err := m.imageManager.GetImage(ctx, imageRef)
971-
if err == nil {
972-
switch img.Status {
973-
case images.StatusReady:
974-
m.logger.Debug("image is ready", "image_ref", imageRef, "attempts", attempt+1)
975-
return nil
976-
case images.StatusFailed:
977-
return fmt.Errorf("image conversion failed")
978-
case images.StatusPending, images.StatusPulling, images.StatusConverting:
979-
// Still processing, continue polling
980-
}
981-
}
982-
// Image not found or still processing, wait and retry
983-
time.Sleep(pollInterval)
959+
if err := m.imageManager.WaitForReady(ctx, imageRef); err != nil {
960+
return err
984961
}
985962

986-
return fmt.Errorf("timeout waiting for image to be ready after %v", time.Duration(maxAttempts)*pollInterval)
963+
m.logger.Debug("image is ready", "image_ref", imageRef)
964+
return nil
987965
}
988966

989967
// subscribeToStatus adds a subscriber channel for status updates on a build

lib/builds/manager_test.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ package builds
33
import (
44
"context"
55
"encoding/json"
6+
"fmt"
67
"io"
78
"log/slog"
89
"os"
910
"path/filepath"
11+
"sync"
1012
"testing"
1113
"time"
1214

@@ -237,6 +239,7 @@ func (m *mockSecretProvider) GetSecrets(ctx context.Context, secretIDs []string)
237239

238240
// mockImageManager implements images.Manager for testing
239241
type mockImageManager struct {
242+
mu sync.RWMutex
240243
images map[string]*images.Image
241244
getImageErr error
242245
}
@@ -274,11 +277,15 @@ func (m *mockImageManager) ImportLocalImage(ctx context.Context, repo, reference
274277
}
275278

276279
func (m *mockImageManager) GetImage(ctx context.Context, name string) (*images.Image, error) {
280+
m.mu.RLock()
281+
defer m.mu.RUnlock()
277282
if m.getImageErr != nil {
278283
return nil, m.getImageErr
279284
}
280285
if img, ok := m.images[name]; ok {
281-
return img, nil
286+
// Return a copy to avoid races on the Status field
287+
imgCopy := *img
288+
return &imgCopy, nil
282289
}
283290
return nil, images.ErrNotFound
284291
}
@@ -298,14 +305,49 @@ func (m *mockImageManager) TotalOCICacheBytes(ctx context.Context) (int64, error
298305
return 0, nil
299306
}
300307

308+
func (m *mockImageManager) WaitForReady(ctx context.Context, name string) error {
309+
for {
310+
select {
311+
case <-ctx.Done():
312+
return ctx.Err()
313+
default:
314+
}
315+
m.mu.RLock()
316+
img, ok := m.images[name]
317+
var status string
318+
if ok {
319+
status = img.Status
320+
}
321+
m.mu.RUnlock()
322+
switch status {
323+
case images.StatusReady:
324+
return nil
325+
case images.StatusFailed:
326+
return fmt.Errorf("image conversion failed")
327+
}
328+
time.Sleep(50 * time.Millisecond)
329+
}
330+
}
331+
301332
// SetImageReady sets an image to ready status for testing
302333
func (m *mockImageManager) SetImageReady(name string) {
334+
m.mu.Lock()
335+
defer m.mu.Unlock()
303336
m.images[name] = &images.Image{
304337
Name: name,
305338
Status: images.StatusReady,
306339
}
307340
}
308341

342+
// SetImageStatus sets an image's status in a thread-safe way for testing
343+
func (m *mockImageManager) SetImageStatus(name, status string) {
344+
m.mu.Lock()
345+
defer m.mu.Unlock()
346+
if img, ok := m.images[name]; ok {
347+
img.Status = status
348+
}
349+
}
350+
309351
// Test helper to create a manager with test paths and mocks
310352
func setupTestManager(t *testing.T) (*manager, *mockInstanceManager, *mockVolumeManager, string) {
311353
mgr, instanceMgr, volumeMgr, _, tempDir := setupTestManagerWithImageMgr(t)

lib/builds/race_test.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,19 @@ func TestWaitForImageReady_WaitsForConversion(t *testing.T) {
9797
imageRef := "builds/" + buildID
9898

9999
// Start with image in pending status
100+
imageMgr.mu.Lock()
100101
imageMgr.images[imageRef] = &images.Image{
101102
Name: imageRef,
102103
Status: images.StatusPending,
103104
}
105+
imageMgr.mu.Unlock()
104106

105107
// Simulate conversion completing after a short delay
106108
go func() {
107109
time.Sleep(100 * time.Millisecond)
108-
imageMgr.images[imageRef].Status = images.StatusConverting
110+
imageMgr.SetImageStatus(imageRef, images.StatusConverting)
109111
time.Sleep(100 * time.Millisecond)
110-
imageMgr.images[imageRef].Status = images.StatusReady
112+
imageMgr.SetImageStatus(imageRef, images.StatusReady)
111113
}()
112114

113115
// waitForImageReady should poll and eventually succeed
@@ -131,10 +133,12 @@ func TestWaitForImageReady_ContextCancelled(t *testing.T) {
131133
imageRef := "builds/" + buildID
132134

133135
// Image stays in pending status forever
136+
imageMgr.mu.Lock()
134137
imageMgr.images[imageRef] = &images.Image{
135138
Name: imageRef,
136139
Status: images.StatusPending,
137140
}
141+
imageMgr.mu.Unlock()
138142

139143
// waitForImageReady should return context error
140144
err := mgr.waitForImageReady(ctx, imageRef)
@@ -152,10 +156,12 @@ func TestWaitForImageReady_Failed(t *testing.T) {
152156
imageRef := "builds/" + buildID
153157

154158
// Image is in failed status
159+
imageMgr.mu.Lock()
155160
imageMgr.images[imageRef] = &images.Image{
156161
Name: imageRef,
157162
Status: images.StatusFailed,
158163
}
164+
imageMgr.mu.Unlock()
159165

160166
// waitForImageReady should return error immediately
161167
err := mgr.waitForImageReady(ctx, imageRef)

lib/images/disk.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"os"
66
"os/exec"
77
"path/filepath"
8+
"runtime"
89

910
"github.com/u-root/u-root/pkg/cpio"
1011
)
@@ -13,13 +14,20 @@ import (
1314
type ExportFormat string
1415

1516
const (
16-
FormatExt4 ExportFormat = "ext4" // Read-only ext4 (app images, default)
17-
FormatErofs ExportFormat = "erofs" // Read-only compressed (future: when kernel supports it)
17+
FormatExt4 ExportFormat = "ext4" // Read-only ext4 (legacy, used on Darwin)
18+
FormatErofs ExportFormat = "erofs" // Read-only compressed with LZ4 (default on Linux)
1819
FormatCpio ExportFormat = "cpio" // Uncompressed archive (initrd, fast boot)
1920
)
2021

21-
// DefaultImageFormat is the default export format for OCI images
22-
const DefaultImageFormat = FormatExt4
22+
// DefaultImageFormat is the default export format for OCI images.
23+
// On Linux, we use erofs (compressed, read-only) for smaller images.
24+
// On Darwin, we use ext4 because the VZ kernel doesn't have erofs support.
25+
var DefaultImageFormat = func() ExportFormat {
26+
if runtime.GOOS == "darwin" {
27+
return FormatExt4
28+
}
29+
return FormatErofs
30+
}()
2331

2432
// ExportRootfs exports rootfs directory in specified format (public for system manager)
2533
func ExportRootfs(rootfsDir, outputPath string, format ExportFormat) (int64, error) {

0 commit comments

Comments
 (0)