Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lxd-agent: Fixes intermittent exec EOF closure when vsock listener is restarted just after boot #12405

Merged
merged 2 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 7 additions & 31 deletions lxd-agent/api_1.0.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package main

import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"time"

"github.com/mdlayher/vsock"

Expand Down Expand Up @@ -193,41 +191,19 @@ func getClient(CID uint32, port int, serverCertificate string) (*http.Client, er
return client, nil
}

// waitVsockContextID checks for valid local context ID and returns it.
// If no valid context ID has been ascertained when the context is cancelled, the last error is returned.
func waitVsockContextID(ctx context.Context) (uint32, error) {
func startHTTPServer(d *Daemon, debug bool) error {
const CIDAny uint32 = 4294967295 // Equivalent to VMADDR_CID_ANY.

for {
cid, err := vsock.ContextID()
if cid == CIDAny {
// Ignore VMADDR_CID_ANY as this seems to indicate the vsock module is still initialising.
err = fmt.Errorf("Invalid context ID %d", cid)
} else if err == nil {
return cid, nil
}

ctxErr := ctx.Err()
if ctxErr != nil {
if err != nil {
return 0, err
}

return 0, ctxErr
}

time.Sleep(time.Second)
}
}

func startHTTPServer(d *Daemon, debug bool) error {
// Setup the listener on VM's context ID for inbound connections from LXD.
l, err := vsock.ListenContextID(d.localCID, shared.HTTPSDefaultPort, nil)
// Setup the listener on wildcard CID for inbound connections from LXD.
// We use the VMADDR_CID_ANY CID so that if the VM's CID changes in the future the listener still works.
// A CID change can occur when restoring a stateful VM that was previously using one CID but is
// subsequently restored using a different one.
l, err := vsock.ListenContextID(CIDAny, shared.HTTPSDefaultPort, nil)
if err != nil {
return fmt.Errorf("Failed to listen on vsock: %w", err)
}

logger.Info("Started vsock listener", logger.Ctx{"contextID": d.localCID})
logger.Info("Started vsock listener")

// Load the expected server certificate.
cert, err := shared.ReadCert("server.crt")
Expand Down
2 changes: 0 additions & 2 deletions lxd-agent/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ type Daemon struct {
serverPort uint32
serverCertificate string

localCID uint32

// The channel which is used to indicate that the lxd-agent was able to connect to LXD.
chConnected chan struct{}

Expand Down
33 changes: 0 additions & 33 deletions lxd-agent/main_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,45 +129,12 @@ func (c *cmdAgent) Run(cmd *cobra.Command, args []string) error {

d := newDaemon(c.global.flagLogDebug, c.global.flagLogVerbose)

// Wait up to 30s to get a valid local vsock context ID.
ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
d.localCID, err = waitVsockContextID(ctx)
if err != nil {
cancel()
return fmt.Errorf("Failed getting vsock context ID: %w", err)
}

cancel()

// Start the server.
err = startHTTPServer(d, c.global.flagLogDebug)
if err != nil {
return fmt.Errorf("Failed to start HTTP server: %w", err)
}

// Check context ID periodically, and restart the HTTP server if needed.
go func() {
for {
time.Sleep(30 * time.Second)
cid, err := waitVsockContextID(context.Background())
if err != nil || d.localCID == cid {
continue
}

// Restart server
logger.Warn("Restarting the vsock server due to context ID change", logger.Ctx{"oldID": d.localCID, "newID": cid})
servers["http"].Close()

// Update context ID.
d.localCID = cid

err = startHTTPServer(d, c.global.flagLogDebug)
if err != nil {
errChan <- err
}
}
}()

// Check whether we should start the devlxd server in the early setup. This way, /dev/lxd/sock
// will be available for any systemd services starting after the lxd-agent.
if shared.PathExists("agent.conf") {
Expand Down
5 changes: 0 additions & 5 deletions lxd/vsock/vsock.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ import (
"github.com/canonical/lxd/shared"
)

// ContextID returns the local VM sockets context ID.
func ContextID() (uint32, error) {
return vsock.ContextID()
}

// Dial connects to a remote vsock.
func Dial(cid, port uint32) (net.Conn, error) {
return vsock.Dial(cid, port, nil)
Expand Down
Loading