Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add host preflights for all needed ports #1205

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions cmd/embedded-cluster/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,6 @@ var resetCommand = &cli.Command{
Usage: "Disable interactive prompts",
Value: false,
},
&cli.BoolFlag{
ricardomaraschini marked this conversation as resolved.
Show resolved Hide resolved
Name: "reboot",
Usage: "Reboot system after resetting the node",
Value: false,
},
},
Usage: fmt.Sprintf("Remove %s from the current node", binName),
Action: func(c *cli.Context) error {
Expand All @@ -378,7 +373,7 @@ var resetCommand = &cli.Command{
}

logrus.Info("This will remove this node from the cluster and completely reset it, removing all data stored on the node.")
logrus.Info("Do not reset another node until this is complete.")
logrus.Info("This action will cause the node to reboot. Do not reset another node until this is complete.")
if !c.Bool("force") && !c.Bool("no-prompt") && !prompts.New().Confirm("Do you want to continue?", false) {
return fmt.Errorf("Aborting")
}
Expand Down Expand Up @@ -449,10 +444,6 @@ var resetCommand = &cli.Command{
return err
}

if !c.Bool("reboot") {
logrus.Infof("Node has been reset. Please reboot to ensure transient configuration is also reset.")
}

if err := helpers.RemoveAll(defaults.PathToK0sConfig()); err != nil {
return fmt.Errorf("failed to remove k0s config: %w", err)
}
Expand Down Expand Up @@ -501,10 +492,8 @@ var resetCommand = &cli.Command{
return fmt.Errorf("failed to remove k0s binary: %w", err)
}

if c.Bool("reboot") {
if _, err := exec.Command("reboot").Output(); err != nil {
return err
}
if _, err := exec.Command("reboot").Output(); err != nil {
return err
}

return nil
Expand Down
21 changes: 21 additions & 0 deletions e2e/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
lxd "github.com/canonical/lxd/client"
"github.com/canonical/lxd/shared/api"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
)

var networkaddr chan string
Expand Down Expand Up @@ -131,6 +132,26 @@ func (o *Output) Destroy() {
networkaddr <- o.network
}

// WaitForNodeRunning waits until the node is in "running" state.
func (o *Output) WaitForNodeRunning(node int) {
client, err := lxd.ConnectLXDUnix(lxdSocket, nil)
require.NoError(o.T, err, "failed to connect to lxd")

var counter int
state := &api.InstanceState{}
for state.Status != "Running" {
time.Sleep(5 * time.Second)
o.T.Logf("waiting for node %d to start (running)", node)

state, _, err = client.GetInstanceState(o.Nodes[node])
require.NoError(o.T, err, "failed to get node state %d", node)
o.T.Logf("node %d is in state %q", node, state.Status)

require.True(o.T, counter < 10, "node %d failed to start", node)
counter++
}
}

// Command is a command to be run in a node.
type Command struct {
Node string
Expand Down
8 changes: 8 additions & 0 deletions e2e/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,10 @@ func TestResetAndReinstall(t *testing.T) {
t.Fatalf("fail to reset the installation: %v", err)
}

// reset will reboot the node, this waits until the node reports back
// as running.
tc.WaitForNodeRunning(0)

t.Logf("%s: installing embedded-cluster on node 0 after reset", time.Now().Format(time.RFC3339))
line = []string{"single-node-install.sh", "ui"}
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
Expand Down Expand Up @@ -698,6 +702,10 @@ func TestResetAndReinstallAirgap(t *testing.T) {
t.Fatalf("fail to reset the installation: %v", err)
}

// reset will reboot the node, this waits until the node reports back
// as running.
tc.WaitForNodeRunning(0)

t.Logf("%s: installing embedded-cluster on node 0", time.Now().Format(time.RFC3339))
line = []string{"single-node-airgap-install.sh"}
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
Expand Down
45 changes: 40 additions & 5 deletions e2e/preflights_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,24 @@ func TestPreflights(t *testing.T) {
})

_, stderr, err := container.Exec(cli,
"apt-get update && apt-get install -y apt-utils kmod",
"apt-get update && apt-get install -y apt-utils kmod netcat-traditional",
)
if err != nil {
t.Fatalf("failed to install deps: err=%v, stderr=%s", err, stderr)
}

if _, stderr, err = container.Exec(cli, "nohup netcat -l -p 10250 &"); err != nil {
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
}

if _, stderr, err = container.Exec(cli, "nohup netcat -l 127.0.0.1 -p 50000 &"); err != nil {
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
}

if _, stderr, err = container.Exec(cli, "nohup netcat -l -u -p 4789 &"); err != nil {
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
}

runCmd := fmt.Sprintf("%s install run-preflights --no-prompt", container.GetECBinaryPath())
if os.Getenv("LICENSE_PATH") != "" {
runCmd = fmt.Sprintf("%s --license %s", runCmd, container.GetLicensePath())
Expand Down Expand Up @@ -93,10 +105,13 @@ func TestPreflights(t *testing.T) {
assert: func(t *testing.T, results *preflights.Output) {
expected := map[string]bool{
// TODO: work to remove these
"System Clock": true,
"'devices' Cgroup Controller": true,
"API Access": true,
"Proxy Registry Access": true,
"System Clock": true,
"'devices' Cgroup Controller": true,
"API Access": true,
"Proxy Registry Access": true,
"Kubelet Port Availability": true,
"Calico Communication Port Availability": true,
"Local Artifact Mirror Port Availability": true,
// as long as fio ran successfully, we're good
"Filesystem Write Latency": true,
}
Expand Down Expand Up @@ -124,6 +139,26 @@ func TestPreflights(t *testing.T) {
}
},
},
{
name: "Should contain port failures",
assert: func(t *testing.T, results *preflights.Output) {
expected := map[string]bool{
"Kubelet Port Availability": false,
"Calico Communication Port Availability": false,
"Local Artifact Mirror Port Availability": false,
}
for _, res := range results.Fail {
if _, ok := expected[res.Title]; ok {
expected[res.Title] = true
}
}
for title, found := range expected {
if !found {
t.Errorf("expected port failure not found: %q", title)
}
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
Loading
Loading