Skip to content

Commit

Permalink
Merge pull request #12405 from tomponline/tp-vm-vsockid
Browse files Browse the repository at this point in the history
lxd-agent: Fixes intermittent exec EOF closure when vsock listener is restarted just after boot
  • Loading branch information
tomponline authored Oct 18, 2023
2 parents bd9abdf + 4a98234 commit b0099f3
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 71 deletions.
38 changes: 7 additions & 31 deletions lxd-agent/api_1.0.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package main

import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"time"

"github.com/mdlayher/vsock"

Expand Down Expand Up @@ -193,41 +191,19 @@ func getClient(CID uint32, port int, serverCertificate string) (*http.Client, er
return client, nil
}

// waitVsockContextID checks for valid local context ID and returns it.
// If no valid context ID has been ascertained when the context is cancelled, the last error is returned.
func waitVsockContextID(ctx context.Context) (uint32, error) {
func startHTTPServer(d *Daemon, debug bool) error {
const CIDAny uint32 = 4294967295 // Equivalent to VMADDR_CID_ANY.

for {
cid, err := vsock.ContextID()
if cid == CIDAny {
// Ignore VMADDR_CID_ANY as this seems to indicate the vsock module is still initialising.
err = fmt.Errorf("Invalid context ID %d", cid)
} else if err == nil {
return cid, nil
}

ctxErr := ctx.Err()
if ctxErr != nil {
if err != nil {
return 0, err
}

return 0, ctxErr
}

time.Sleep(time.Second)
}
}

func startHTTPServer(d *Daemon, debug bool) error {
// Setup the listener on VM's context ID for inbound connections from LXD.
l, err := vsock.ListenContextID(d.localCID, shared.HTTPSDefaultPort, nil)
// Setup the listener on wildcard CID for inbound connections from LXD.
// We use the VMADDR_CID_ANY CID so that if the VM's CID changes in the future the listener still works.
// A CID change can occur when restoring a stateful VM that was previously using one CID but is
// subsequently restored using a different one.
l, err := vsock.ListenContextID(CIDAny, shared.HTTPSDefaultPort, nil)
if err != nil {
return fmt.Errorf("Failed to listen on vsock: %w", err)
}

logger.Info("Started vsock listener", logger.Ctx{"contextID": d.localCID})
logger.Info("Started vsock listener")

// Load the expected server certificate.
cert, err := shared.ReadCert("server.crt")
Expand Down
2 changes: 0 additions & 2 deletions lxd-agent/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ type Daemon struct {
serverPort uint32
serverCertificate string

localCID uint32

// The channel which is used to indicate that the lxd-agent was able to connect to LXD.
chConnected chan struct{}

Expand Down
33 changes: 0 additions & 33 deletions lxd-agent/main_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,45 +129,12 @@ func (c *cmdAgent) Run(cmd *cobra.Command, args []string) error {

d := newDaemon(c.global.flagLogDebug, c.global.flagLogVerbose)

// Wait up to 30s to get a valid local vsock context ID.
ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
d.localCID, err = waitVsockContextID(ctx)
if err != nil {
cancel()
return fmt.Errorf("Failed getting vsock context ID: %w", err)
}

cancel()

// Start the server.
err = startHTTPServer(d, c.global.flagLogDebug)
if err != nil {
return fmt.Errorf("Failed to start HTTP server: %w", err)
}

// Check context ID periodically, and restart the HTTP server if needed.
go func() {
for {
time.Sleep(30 * time.Second)
cid, err := waitVsockContextID(context.Background())
if err != nil || d.localCID == cid {
continue
}

// Restart server
logger.Warn("Restarting the vsock server due to context ID change", logger.Ctx{"oldID": d.localCID, "newID": cid})
servers["http"].Close()

// Update context ID.
d.localCID = cid

err = startHTTPServer(d, c.global.flagLogDebug)
if err != nil {
errChan <- err
}
}
}()

// Check whether we should start the devlxd server in the early setup. This way, /dev/lxd/sock
// will be available for any systemd services starting after the lxd-agent.
if shared.PathExists("agent.conf") {
Expand Down
5 changes: 0 additions & 5 deletions lxd/vsock/vsock.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ import (
"github.com/canonical/lxd/shared"
)

// ContextID returns the local VM sockets context ID.
func ContextID() (uint32, error) {
return vsock.ContextID()
}

// Dial connects to a remote vsock.
func Dial(cid, port uint32) (net.Conn, error) {
return vsock.Dial(cid, port, nil)
Expand Down

0 comments on commit b0099f3

Please sign in to comment.