Skip to content

Commit

Permalink
Check new controllers against etcd member-list to detect replaced hosts
Browse files Browse the repository at this point in the history
Signed-off-by: Kimmo Lehto <klehto@mirantis.com>
  • Loading branch information
kke committed May 16, 2024
1 parent 0fb74be commit d2a2a6b
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 7 deletions.
46 changes: 46 additions & 0 deletions phase/gather_k0s_facts.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"encoding/json"
"fmt"
"net"
"net/url"
"path"
"strings"

Expand Down Expand Up @@ -83,6 +85,50 @@ func (p *GatherK0sFacts) Run() error {
return err
}

if p.leader.Metadata.K0sRunningVersion != nil {
if err := p.listEtcdMembers(p.leader); err != nil {
return err
}
}

return nil
}

func (p *GatherK0sFacts) listEtcdMembers(h *cluster.Host) error {
log.Infof("%s: listing etcd members", h)
// etcd member-list outputs json like:
// {"members":{"controller0":"https://172.17.0.2:2380","controller1":"https://172.17.0.3:2380"}}
// on versions like ~1.21.x etcd member-list outputs to stderr.
output, err := h.ExecOutput(h.Configurer.K0sCmdf("etcd member-list --data-dir=%s 2>&1", h.K0sDataDir()), exec.Sudo(h))
if err != nil {
return fmt.Errorf("failed to run list etcd members command: %w", err)
}

result := make(map[string]any)
if err := json.Unmarshal([]byte(output), &result); err != nil {
return fmt.Errorf("failed to decode etcd member-list output: %w", err)
}

etcdMembers := []string{}
if members, ok := result["members"].(map[string]any); ok {
for _, urlField := range members {
urlFieldStr, ok := urlField.(string)
if ok {
memberURL, err := url.Parse(urlFieldStr)
if err != nil {
return fmt.Errorf("failed to parse etcd member URL: %w", err)
}
memberHost, _, err := net.SplitHostPort(memberURL.Host)
if err != nil {
return fmt.Errorf("failed to split etcd member URL: %w", err)
}
log.Debugf("%s: detected etcd member %s", h, memberHost)
etcdMembers = append(etcdMembers, memberHost)
}
}
}

p.Config.Metadata.EtcdMembers = etcdMembers
return nil
}

Expand Down
6 changes: 1 addition & 5 deletions phase/reset_controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,7 @@ func (p *ResetControllers) Run() error {
if !p.NoLeave {
log.Debugf("%s: leaving etcd...", h)

etcdAddress := h.SSH.Address
if h.PrivateAddress != "" {
etcdAddress = h.PrivateAddress
}
if err := h.Exec(h.Configurer.K0sCmdf("etcd leave --peer-address %s --datadir %s", etcdAddress, h.K0sDataDir()), exec.Sudo(h)); err != nil {
if err := h.Exec(h.Configurer.K0sCmdf("etcd leave --peer-address %s --datadir %s", h.PrivateAddress, h.K0sDataDir()), exec.Sudo(h)); err != nil {
log.Warnf("%s: failed to leave etcd: %s", h, err.Error())
}
log.Debugf("%s: leaving etcd completed", h)
Expand Down
34 changes: 34 additions & 0 deletions phase/validate_facts.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package phase

import (
"fmt"
"slices"

log "github.com/sirupsen/logrus"
)
Expand All @@ -27,6 +28,10 @@ func (p *ValidateFacts) Run() error {
return err
}

if err := p.validateControllerSwap(); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -69,3 +74,32 @@ func (p *ValidateFacts) validateDefaultVersion() error {

return nil
}

func (p *ValidateFacts) validateControllerSwap() error {
log.Debugf("validating controller list vs etcd member list")
if p.Config.Spec.K0sLeader().Metadata.K0sRunningVersion == nil {
log.Debugf("%s: leader has no k0s running, assuming a fresh cluster", p.Config.Spec.K0sLeader())
return nil
}

if p.Config.Spec.K0sLeader().Role == "single" {
log.Debugf("%s: leader is a single node, assuming no etcd", p.Config.Spec.K0sLeader())
}

if len(p.Config.Metadata.EtcdMembers) > len(p.Config.Spec.Hosts.Controllers()) {
log.Warnf("there are more etcd members in the cluster than controllers listed in the k0sctl configuration")
}

for _, h := range p.Config.Spec.Hosts.Controllers() {
if h.Metadata.K0sRunningVersion != nil {
continue
}
log.Debugf("%s: host is new, checking if etcd members list %+v already contains %s", h, p.Config.Metadata.EtcdMembers, h.PrivateAddress)
if slices.Contains(p.Config.Metadata.EtcdMembers, h.PrivateAddress) {
return fmt.Errorf("controller %s is listed as an etcd member but k0s is not running on it, host may have been replaced", h)
}
log.Debugf("%s: no match, assuming its safe to install", h)
}

return nil
}
5 changes: 3 additions & 2 deletions pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ const APIVersion = "k0sctl.k0sproject.io/v1beta1"

// ClusterMetadata defines cluster metadata
type ClusterMetadata struct {
Name string `yaml:"name" validate:"required" default:"k0s-cluster"`
Kubeconfig string `yaml:"-"`
Name string `yaml:"name" validate:"required" default:"k0s-cluster"`
Kubeconfig string `yaml:"-"`
EtcdMembers []string `yaml:"-"`
}

// Cluster describes launchpad.yaml configuration
Expand Down

0 comments on commit d2a2a6b

Please sign in to comment.