From 4ed54066b734b924fe0b5f3d23390cd996a60746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=94=A6=E5=8D=97=E8=B7=AF=E4=B9=8B=E8=8A=B1?= Date: Fri, 31 May 2024 23:01:36 +0200 Subject: [PATCH] feat: unified minikube cluster status query --- cmd/minikube/cmd/config/profile_list.go | 79 +--- cmd/minikube/cmd/status.go | 447 +-------------------- cmd/minikube/cmd/status_test.go | 34 +- pkg/minikube/cluster/status.go | 510 ++++++++++++++++++++++++ test/integration/ha_test.go | 4 +- test/integration/multinode_test.go | 4 +- test/integration/pause_test.go | 6 +- test/integration/status_test.go | 8 +- 8 files changed, 558 insertions(+), 534 deletions(-) create mode 100644 pkg/minikube/cluster/status.go diff --git a/cmd/minikube/cmd/config/profile_list.go b/cmd/minikube/cmd/config/profile_list.go index 2c9faac95c64..f5c12b594800 100644 --- a/cmd/minikube/cmd/config/profile_list.go +++ b/cmd/minikube/cmd/config/profile_list.go @@ -23,10 +23,9 @@ import ( "strconv" "strings" - "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify" + "k8s.io/minikube/pkg/minikube/cluster" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/minikube/constants" - "k8s.io/minikube/pkg/minikube/driver" "k8s.io/minikube/pkg/minikube/exit" "k8s.io/minikube/pkg/minikube/machine" "k8s.io/minikube/pkg/minikube/notify" @@ -35,7 +34,6 @@ import ( "k8s.io/minikube/pkg/minikube/style" "github.com/docker/machine/libmachine" - "github.com/docker/machine/libmachine/state" "github.com/olekukonko/tablewriter" "github.com/spf13/cobra" @@ -115,77 +113,14 @@ func profileStatus(p *config.Profile, api libmachine.API) string { if len(cps) == 0 { exit.Message(reason.GuestCpConfig, "No control-plane nodes found.") } - - status := "Unknown" - healthyCPs := 0 - for _, cp := range cps { - machineName := config.MachineName(*p.Config, cp) - - ms, err := machine.Status(api, machineName) - if err != nil { - klog.Warningf("error loading profile (will continue): machine status for %s: %v", machineName, err) - continue - } - if ms != state.Running.String() { - klog.Warningf("error loading profile (will continue): machine %s is not running: %q", machineName, ms) - status = ms - continue - } - - host, err := machine.LoadHost(api, machineName) - if err != nil { - klog.Warningf("error loading profile (will continue): load host for %s: %v", machineName, err) - continue - } - - hs, err := host.Driver.GetState() - if err != nil { - klog.Warningf("error loading profile (will continue): host state for %s: %v", machineName, err) - continue - } - if hs != state.Running { - klog.Warningf("error loading profile (will continue): host %s is not running: %q", machineName, hs) - status = hs.String() - continue - } - - cr, err := machine.CommandRunner(host) - if err != nil { - klog.Warningf("error loading profile (will continue): command runner for %s: %v", machineName, err) - continue - } - - hostname, _, port, err := driver.ControlPlaneEndpoint(p.Config, &cp, host.DriverName) - if err != nil { - klog.Warningf("error loading profile (will continue): control-plane endpoint for %s: %v", machineName, err) - continue - } - - as, err := kverify.APIServerStatus(cr, hostname, port) - if err != nil { - klog.Warningf("error loading profile (will continue): apiserver status for %s: %v", machineName, err) - continue - } - status = as.String() - if as != state.Running { - klog.Warningf("error loading profile (will continue): apiserver %s is not running: %q", machineName, hs) - continue - } - - healthyCPs++ + statuses, err := cluster.GetStatus(api, p.Config) + if err != nil { + klog.Errorf("error getting statuses: %v", err) + return "Unknown" } + clusterStatus := cluster.GetClusterState(statuses, ClusterFlagValue(), p.Config) - if config.IsHA(*p.Config) { - switch { - case healthyCPs < 2: - return state.Stopped.String() - case healthyCPs == 2: - return "Degraded" - default: - return "HAppy" - } - } - return status + return clusterStatus.StatusName } func renderProfilesTable(ps [][]string) { diff --git a/cmd/minikube/cmd/status.go b/cmd/minikube/cmd/status.go index 0a3a3fa303a5..8ee641edf25a 100644 --- a/cmd/minikube/cmd/status.go +++ b/cmd/minikube/cmd/status.go @@ -17,39 +17,27 @@ limitations under the License. package cmd import ( - "bufio" "encoding/json" "fmt" "io" "os" - "strconv" "strings" "text/template" "time" - cloudevents "github.com/cloudevents/sdk-go/v2" - "github.com/docker/machine/libmachine" "github.com/docker/machine/libmachine/state" "github.com/pkg/errors" "github.com/spf13/cobra" "k8s.io/klog/v2" - "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify" "k8s.io/minikube/pkg/minikube/cluster" "k8s.io/minikube/pkg/minikube/config" - "k8s.io/minikube/pkg/minikube/constants" - "k8s.io/minikube/pkg/minikube/driver" "k8s.io/minikube/pkg/minikube/exit" - "k8s.io/minikube/pkg/minikube/kubeconfig" - "k8s.io/minikube/pkg/minikube/localpath" - "k8s.io/minikube/pkg/minikube/machine" "k8s.io/minikube/pkg/minikube/mustload" "k8s.io/minikube/pkg/minikube/node" "k8s.io/minikube/pkg/minikube/notify" "k8s.io/minikube/pkg/minikube/out" - "k8s.io/minikube/pkg/minikube/out/register" "k8s.io/minikube/pkg/minikube/reason" - "k8s.io/minikube/pkg/version" ) var ( @@ -71,112 +59,6 @@ const ( Irrelevant = "Irrelevant" ) -// New status modes, based roughly on HTTP/SMTP standards -const ( - - // 1xx signifies a transitional state. If retried, it will soon return a 2xx, 4xx, or 5xx - - Starting = 100 - Pausing = 101 - Unpausing = 102 - Stopping = 110 - Deleting = 120 - - // 2xx signifies that the API Server is able to service requests - - OK = 200 - Warning = 203 - - // 4xx signifies an error that requires help from the client to resolve - - NotFound = 404 - Stopped = 405 - Paused = 418 // I'm a teapot! - - // 5xx signifies a server-side error (that may be retryable) - - Error = 500 - InsufficientStorage = 507 - Unknown = 520 -) - -var ( - exitCodeToHTTPCode = map[int]int{ - // exit code 26 corresponds to insufficient storage - 26: 507, - } - - codeNames = map[int]string{ - 100: "Starting", - 101: "Pausing", - 102: "Unpausing", - 110: "Stopping", - 103: "Deleting", - - 200: "OK", - 203: "Warning", - - 404: "NotFound", - 405: "Stopped", - 418: "Paused", - - 500: "Error", - 507: "InsufficientStorage", - 520: "Unknown", - } - - codeDetails = map[int]string{ - 507: "/var is almost out of disk space", - } -) - -// Status holds string representations of component states -type Status struct { - Name string - Host string - Kubelet string - APIServer string - Kubeconfig string - Worker bool - TimeToStop string `json:",omitempty"` - DockerEnv string `json:",omitempty"` - PodManEnv string `json:",omitempty"` -} - -// ClusterState holds a cluster state representation -type ClusterState struct { - BaseState - - BinaryVersion string - TimeToStop string `json:",omitempty"` - Components map[string]BaseState - Nodes []NodeState -} - -// NodeState holds a node state representation -type NodeState struct { - BaseState - Components map[string]BaseState `json:",omitempty"` -} - -// BaseState holds a component state representation, such as "apiserver" or "kubeconfig" -type BaseState struct { - // Name is the name of the object - Name string - - // StatusCode is an HTTP-like status code for this object - StatusCode int - // Name is a human-readable name for the status code - StatusName string - // StatusDetail is long human-readable string describing why this particular status code was chosen - StatusDetail string `json:",omitempty"` // Not yet implemented - - // Step is which workflow step the object is at. - Step string `json:",omitempty"` - // StepDetail is a long human-readable string describing the step - StepDetail string `json:",omitempty"` -} - const ( minikubeNotRunningStatusFlag = 1 << 0 clusterNotRunningStatusFlag = 1 << 1 @@ -236,7 +118,7 @@ var statusCmd = &cobra.Command{ // writeStatusesAtInterval writes statuses in a given output format - at intervals defined by duration func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *config.ClusterConfig) { for { - var statuses []*Status + var statuses []*cluster.Status if nodeName != "" || statusFormat != defaultStatusFormat && len(cc.Nodes) > 1 { n, _, err := node.Retrieve(*cc, nodeName) @@ -244,25 +126,16 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con exit.Error(reason.GuestNodeRetrieve, "retrieving node", err) } - st, err := nodeStatus(api, *cc, *n) + st, err := cluster.NodeStatus(api, *cc, *n) if err != nil { klog.Errorf("status error: %v", err) } statuses = append(statuses, st) } else { - for _, n := range cc.Nodes { - machineName := config.MachineName(*cc, n) - klog.Infof("checking status of %s ...", machineName) - st, err := nodeStatus(api, *cc, n) - klog.Infof("%s status: %+v", machineName, st) - - if err != nil { - klog.Errorf("status error: %v", err) - } - if st.Host == Nonexistent { - klog.Errorf("The %q host does not exist!", machineName) - } - statuses = append(statuses, st) + var err error + statuses, err = cluster.GetStatus(api, cc) + if err != nil { + klog.Errorf("status error: %v", err) } } @@ -276,7 +149,7 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con case "json": // Layout is currently only supported for JSON mode if layout == "cluster" { - if err := clusterStatusJSON(statuses, os.Stdout); err != nil { + if err := clusterStatusJSON(statuses, os.Stdout, cc); err != nil { exit.Error(reason.InternalStatusJSON, "status json failure", err) } } else { @@ -296,7 +169,7 @@ func writeStatusesAtInterval(duration time.Duration, api libmachine.API, cc *con } // exitCode calculates the appropriate exit code given a set of status messages -func exitCode(statuses []*Status) int { +func exitCode(statuses []*cluster.Status) int { c := 0 for _, st := range statuses { if st.Host != state.Running.String() { @@ -312,125 +185,6 @@ func exitCode(statuses []*Status) int { return c } -// nodeStatus looks up the status of a node -func nodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*Status, error) { - controlPlane := n.ControlPlane - name := config.MachineName(cc, n) - - st := &Status{ - Name: name, - Host: Nonexistent, - APIServer: Nonexistent, - Kubelet: Nonexistent, - Kubeconfig: Nonexistent, - Worker: !controlPlane, - } - - hs, err := machine.Status(api, name) - klog.Infof("%s host status = %q (err=%v)", name, hs, err) - if err != nil { - return st, errors.Wrap(err, "host") - } - - // We have no record of this host. Return nonexistent struct - if hs == state.None.String() { - return st, nil - } - st.Host = hs - - // If it's not running, quickly bail out rather than delivering conflicting messages - if st.Host != state.Running.String() { - klog.Infof("host is not running, skipping remaining checks") - st.APIServer = st.Host - st.Kubelet = st.Host - st.Kubeconfig = st.Host - return st, nil - } - - // We have a fully operational host, now we can check for details - if _, err := cluster.DriverIP(api, name); err != nil { - klog.Errorf("failed to get driver ip: %v", err) - st.Host = state.Error.String() - return st, err - } - - st.Kubeconfig = Configured - if !controlPlane { - st.Kubeconfig = Irrelevant - st.APIServer = Irrelevant - } - - host, err := machine.LoadHost(api, name) - if err != nil { - return st, err - } - - cr, err := machine.CommandRunner(host) - if err != nil { - return st, err - } - - // Check storage - p, err := machine.DiskUsed(cr, "/var") - if err != nil { - klog.Errorf("failed to get storage capacity of /var: %v", err) - st.Host = state.Error.String() - return st, err - } - if p >= 99 { - st.Host = codeNames[InsufficientStorage] - } - - stk := kverify.ServiceStatus(cr, "kubelet") - st.Kubelet = stk.String() - if cc.ScheduledStop != nil { - initiationTime := time.Unix(cc.ScheduledStop.InitiationTime, 0) - st.TimeToStop = time.Until(initiationTime.Add(cc.ScheduledStop.Duration)).String() - } - if os.Getenv(constants.MinikubeActiveDockerdEnv) != "" { - st.DockerEnv = "in-use" - } - if os.Getenv(constants.MinikubeActivePodmanEnv) != "" { - st.PodManEnv = "in-use" - } - // Early exit for worker nodes - if !controlPlane { - return st, nil - } - - var hostname string - var port int - if cc.Addons["auto-pause"] { - hostname, _, port, err = driver.AutoPauseProxyEndpoint(&cc, &n, host.DriverName) - } else { - hostname = cc.KubernetesConfig.APIServerHAVIP - port = cc.APIServerPort - if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) { - hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName) - } - } - - if err != nil { - klog.Errorf("forwarded endpoint: %v", err) - st.Kubeconfig = Misconfigured - } else if err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port, ""); err != nil && st.Host != state.Starting.String() { - klog.Errorf("kubeconfig endpoint: %v", err) - st.Kubeconfig = Misconfigured - } - - sta, err := kverify.APIServerStatus(cr, hostname, port) - klog.Infof("%s apiserver status = %s (err=%v)", name, stk, err) - - if err != nil { - klog.Errorln("Error apiserver status:", err) - st.APIServer = state.Error.String() - } else { - st.APIServer = sta.String() - } - - return st, nil -} - func init() { statusCmd.Flags().StringVarP(&statusFormat, "format", "f", defaultStatusFormat, `Go template format string for the status output. The format for Go templates can be found here: https://pkg.go.dev/text/template @@ -444,7 +198,7 @@ For the list accessible variables for the template, see the struct values here: statusCmd.Flags().Lookup("watch").NoOptDefVal = "1s" } -func statusText(st *Status, w io.Writer) error { +func statusText(st *cluster.Status, w io.Writer) error { tmpl, err := template.New("status").Parse(statusFormat) if st.Worker && statusFormat == defaultStatusFormat { tmpl, err = template.New("worker-status").Parse(workerStatusFormat) @@ -462,7 +216,7 @@ func statusText(st *Status, w io.Writer) error { return nil } -func statusJSON(st []*Status, w io.Writer) error { +func statusJSON(st []*cluster.Status, w io.Writer) error { var js []byte var err error // Keep backwards compat with single node clusters to not break anyone @@ -478,185 +232,8 @@ func statusJSON(st []*Status, w io.Writer) error { return err } -// readEventLog reads cloudevent logs from $MINIKUBE_HOME/profiles//events.json -func readEventLog(name string) ([]cloudevents.Event, time.Time, error) { - path := localpath.EventLog(name) - - st, err := os.Stat(path) - if err != nil { - return nil, time.Time{}, errors.Wrap(err, "stat") - } - - f, err := os.Open(path) - if err != nil { - return nil, st.ModTime(), errors.Wrap(err, "open") - } - var events []cloudevents.Event - - scanner := bufio.NewScanner(f) - for scanner.Scan() { - ev := cloudevents.NewEvent() - if err = json.Unmarshal(scanner.Bytes(), &ev); err != nil { - return events, st.ModTime(), err - } - events = append(events, ev) - } - - return events, st.ModTime(), scanner.Err() -} - -// clusterState converts Status structs into a ClusterState struct -func clusterState(sts []*Status) ClusterState { - statusName := sts[0].APIServer - if sts[0].Host == codeNames[InsufficientStorage] { - statusName = sts[0].Host - } - sc := statusCode(statusName) - - cs := ClusterState{ - BinaryVersion: version.GetVersion(), - - BaseState: BaseState{ - Name: ClusterFlagValue(), - StatusCode: sc, - StatusName: statusName, - StatusDetail: codeDetails[sc], - }, - - TimeToStop: sts[0].TimeToStop, - - Components: map[string]BaseState{ - "kubeconfig": {Name: "kubeconfig", StatusCode: statusCode(sts[0].Kubeconfig), StatusName: codeNames[statusCode(sts[0].Kubeconfig)]}, - }, - } - - for _, st := range sts { - ns := NodeState{ - BaseState: BaseState{ - Name: st.Name, - StatusCode: statusCode(st.Host), - }, - Components: map[string]BaseState{ - "kubelet": {Name: "kubelet", StatusCode: statusCode(st.Kubelet)}, - }, - } - - if st.APIServer != Irrelevant { - ns.Components["apiserver"] = BaseState{Name: "apiserver", StatusCode: statusCode(st.APIServer)} - } - - // Convert status codes to status names - ns.StatusName = codeNames[ns.StatusCode] - for k, v := range ns.Components { - v.StatusName = codeNames[v.StatusCode] - ns.Components[k] = v - } - - cs.Nodes = append(cs.Nodes, ns) - } - - evs, mtime, err := readEventLog(sts[0].Name) - if err != nil { - klog.Errorf("unable to read event log: %v", err) - return cs - } - - transientCode := 0 - var finalStep map[string]string - - for _, ev := range evs { - // klog.Infof("read event: %+v", ev) - if ev.Type() == "io.k8s.sigs.minikube.step" { - var data map[string]string - err := ev.DataAs(&data) - if err != nil { - klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data()) - continue - } - - switch data["name"] { - case string(register.InitialSetup): - transientCode = Starting - case string(register.Done): - transientCode = 0 - case string(register.Stopping): - klog.Infof("%q == %q", data["name"], register.Stopping) - transientCode = Stopping - case string(register.Deleting): - transientCode = Deleting - case string(register.Pausing): - transientCode = Pausing - case string(register.Unpausing): - transientCode = Unpausing - } - - finalStep = data - klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data) - } - if ev.Type() == "io.k8s.sigs.minikube.error" { - var data map[string]string - err := ev.DataAs(&data) - if err != nil { - klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data()) - continue - } - exitCode, err := strconv.Atoi(data["exitcode"]) - if err != nil { - klog.Errorf("exit code not found: %v", err) - continue - } - if val, ok := exitCodeToHTTPCode[exitCode]; ok { - exitCode = val - } - transientCode = exitCode - for _, n := range cs.Nodes { - n.StatusCode = transientCode - n.StatusName = codeNames[n.StatusCode] - } - - klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data) - } - } - - if finalStep != nil { - if mtime.Before(time.Now().Add(-10 * time.Minute)) { - klog.Warningf("event stream is too old (%s) to be considered a transient state", mtime) - } else { - cs.Step = strings.TrimSpace(finalStep["name"]) - cs.StepDetail = strings.TrimSpace(finalStep["message"]) - if transientCode != 0 { - cs.StatusCode = transientCode - } - } - } - - cs.StatusName = codeNames[cs.StatusCode] - cs.StatusDetail = codeDetails[cs.StatusCode] - return cs -} - -// statusCode returns a status code number given a name -func statusCode(st string) int { - // legacy names - switch st { - case "Running", "Configured": - return OK - case "Misconfigured": - return Error - } - - // new names - for code, name := range codeNames { - if name == st { - return code - } - } - - return Unknown -} - -func clusterStatusJSON(statuses []*Status, w io.Writer) error { - cs := clusterState(statuses) +func clusterStatusJSON(statuses []*cluster.Status, w io.Writer, cc *config.ClusterConfig) error { + cs := cluster.GetClusterState(statuses, ClusterFlagValue(), cc) bs, err := json.Marshal(cs) if err != nil { diff --git a/cmd/minikube/cmd/status_test.go b/cmd/minikube/cmd/status_test.go index bd794cbe32d0..f507fd9b3574 100644 --- a/cmd/minikube/cmd/status_test.go +++ b/cmd/minikube/cmd/status_test.go @@ -20,22 +20,24 @@ import ( "bytes" "encoding/json" "testing" + + "k8s.io/minikube/pkg/minikube/cluster" ) func TestExitCode(t *testing.T) { var tests = []struct { name string want int - state *Status + state *cluster.Status }{ - {"ok", 0, &Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured}}, - {"paused", 2, &Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}}, - {"down", 7, &Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}}, - {"missing", 7, &Status{Host: "Nonexistent", Kubelet: "Nonexistent", APIServer: "Nonexistent", Kubeconfig: "Nonexistent"}}, + {"ok", 0, &cluster.Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured}}, + {"paused", 2, &cluster.Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}}, + {"down", 7, &cluster.Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}}, + {"missing", 7, &cluster.Status{Host: "Nonexistent", Kubelet: "Nonexistent", APIServer: "Nonexistent", Kubeconfig: "Nonexistent"}}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - got := exitCode([]*Status{tc.state}) + got := exitCode([]*cluster.Status{tc.state}) if got != tc.want { t.Errorf("exitcode(%+v) = %d, want: %d", tc.state, got, tc.want) } @@ -46,22 +48,22 @@ func TestExitCode(t *testing.T) { func TestStatusText(t *testing.T) { var tests = []struct { name string - state *Status + state *cluster.Status want string }{ { name: "ok", - state: &Status{Name: "minikube", Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"}, + state: &cluster.Status{Name: "minikube", Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"}, want: "minikube\ntype: Control Plane\nhost: Running\nkubelet: Running\napiserver: Running\nkubeconfig: Configured\ntimeToStop: 10m\n\n", }, { name: "paused", - state: &Status{Name: "minikube", Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}, + state: &cluster.Status{Name: "minikube", Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}, want: "minikube\ntype: Control Plane\nhost: Running\nkubelet: Stopped\napiserver: Paused\nkubeconfig: Configured\n\n", }, { name: "down", - state: &Status{Name: "minikube", Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}, + state: &cluster.Status{Name: "minikube", Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}, want: "minikube\ntype: Control Plane\nhost: Stopped\nkubelet: Stopped\napiserver: Stopped\nkubeconfig: Misconfigured\n\n\nWARNING: Your kubectl is pointing to stale minikube-vm.\nTo fix the kubectl context, run `minikube update-context`\n", }, } @@ -84,21 +86,21 @@ func TestStatusText(t *testing.T) { func TestStatusJSON(t *testing.T) { var tests = []struct { name string - state *Status + state *cluster.Status }{ - {"ok", &Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"}}, - {"paused", &Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}}, - {"down", &Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}}, + {"ok", &cluster.Status{Host: "Running", Kubelet: "Running", APIServer: "Running", Kubeconfig: Configured, TimeToStop: "10m"}}, + {"paused", &cluster.Status{Host: "Running", Kubelet: "Stopped", APIServer: "Paused", Kubeconfig: Configured}}, + {"down", &cluster.Status{Host: "Stopped", Kubelet: "Stopped", APIServer: "Stopped", Kubeconfig: Misconfigured}}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { var b bytes.Buffer - err := statusJSON([]*Status{tc.state}, &b) + err := statusJSON([]*cluster.Status{tc.state}, &b) if err != nil { t.Errorf("json(%+v) error: %v", tc.state, err) } - st := &Status{} + st := &cluster.Status{} if err := json.Unmarshal(b.Bytes(), st); err != nil { t.Errorf("json(%+v) unmarshal error: %v", tc.state, err) } diff --git a/pkg/minikube/cluster/status.go b/pkg/minikube/cluster/status.go new file mode 100644 index 000000000000..34d1700a94e9 --- /dev/null +++ b/pkg/minikube/cluster/status.go @@ -0,0 +1,510 @@ +/* +Copyright 2024 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "strconv" + "strings" + "time" + + cloudevents "github.com/cloudevents/sdk-go/v2" + "github.com/docker/machine/libmachine" + "github.com/docker/machine/libmachine/state" + "github.com/pkg/errors" + "k8s.io/klog/v2" + "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify" + "k8s.io/minikube/pkg/minikube/config" + "k8s.io/minikube/pkg/minikube/constants" + "k8s.io/minikube/pkg/minikube/driver" + "k8s.io/minikube/pkg/minikube/kubeconfig" + "k8s.io/minikube/pkg/minikube/localpath" + "k8s.io/minikube/pkg/minikube/machine" + "k8s.io/minikube/pkg/minikube/out/register" + "k8s.io/minikube/pkg/version" +) + +// Additional legacy states +const ( + // Configured means configured + Configured = "Configured" // ~state.Saved + // Misconfigured means misconfigured + Misconfigured = "Misconfigured" // ~state.Error + // Nonexistent means the resource does not exist + Nonexistent = "Nonexistent" // ~state.None + // Irrelevant is used for statuses that aren't meaningful for worker nodes + Irrelevant = "Irrelevant" +) + +// New status modes, based roughly on HTTP/SMTP standards +const ( + + // 1xx signifies a transitional state. If retried, it will soon return a 2xx, 4xx, or 5xx + + Starting = 100 + Pausing = 101 + Unpausing = 102 + Stopping = 110 + Deleting = 120 + + // 2xx signifies that the API Server is able to service requests + + OK = 200 + OKHAppy = 201 + Warning = 203 + Degraded = 204 + + // 4xx signifies an error that requires help from the client to resolve + + NotFound = 404 + Stopped = 405 + Paused = 418 // I'm a teapot! + + // 5xx signifies a server-side error (that may be retryable) + + Error = 500 + InsufficientStorage = 507 + Unknown = 520 +) + +var ( + exitCodeToHTTPCode = map[int]int{ + // exit code 26 corresponds to insufficient storage + 26: 507, + } + + codeNames = map[int]string{ + 100: "Starting", + 101: "Pausing", + 102: "Unpausing", + 110: "Stopping", + 103: "Deleting", + + 200: "OK", + 201: "OKHAppy", + 203: "Warning", + 204: "Degraded", + + 404: "NotFound", + 405: "Stopped", + 418: "Paused", + + 500: "Error", + 507: "InsufficientStorage", + 520: "Unknown", + } + + codeDetails = map[int]string{ + 507: "/var is almost out of disk space", + } +) + +// Status holds string representations of component states +type Status struct { + Name string + Host string + Kubelet string + APIServer string + Kubeconfig string + Worker bool + TimeToStop string `json:",omitempty"` + DockerEnv string `json:",omitempty"` + PodManEnv string `json:",omitempty"` +} + +// ClusterState holds a cluster state representation +// +//nolint:revive +type ClusterState struct { + BaseState + + BinaryVersion string + TimeToStop string `json:",omitempty"` + Components map[string]BaseState + Nodes []NodeState +} + +// NodeState holds a node state representation +type NodeState struct { + BaseState + Components map[string]BaseState `json:",omitempty"` +} + +// BaseState holds a component state representation, such as "apiserver" or "kubeconfig" +type BaseState struct { + // Name is the name of the object + Name string + + // StatusCode is an HTTP-like status code for this object + StatusCode int + // Name is a human-readable name for the status code + StatusName string + // StatusDetail is long human-readable string describing why this particular status code was chosen + StatusDetail string `json:",omitempty"` // Not yet implemented + + // Step is which workflow step the object is at. + Step string `json:",omitempty"` + // StepDetail is a long human-readable string describing the step + StepDetail string `json:",omitempty"` +} + +func GetStatus(api libmachine.API, cc *config.ClusterConfig) ([]*Status, error) { + var statuses []*Status + for _, n := range cc.Nodes { + machineName := config.MachineName(*cc, n) + klog.Infof("checking status of %s ...", machineName) + st, err := NodeStatus(api, *cc, n) + klog.Infof("%s status: %+v", machineName, st) + + if err != nil { + klog.Errorf("status error: %v", err) + return nil, err + } + if st.Host == Nonexistent { + err := fmt.Errorf("the %q host does not exist", machineName) + klog.Errorf("%v", err) + return nil, err + } + statuses = append(statuses, st) + } + return statuses, nil +} + +// clusterState converts Status structs into a ClusterState struct +// +//nolint:gocyclo +func GetClusterState(sts []*Status, profile string, cc *config.ClusterConfig) ClusterState { + statusName := sts[0].APIServer + if sts[0].Host == codeNames[InsufficientStorage] { + statusName = sts[0].Host + } + sc := statusCode(statusName) + + cs := ClusterState{ + BinaryVersion: version.GetVersion(), + + BaseState: BaseState{ + Name: profile, + StatusCode: sc, + StatusName: statusName, + StatusDetail: codeDetails[sc], + }, + + TimeToStop: sts[0].TimeToStop, + + Components: map[string]BaseState{ + "kubeconfig": {Name: "kubeconfig", StatusCode: statusCode(sts[0].Kubeconfig), StatusName: codeNames[statusCode(sts[0].Kubeconfig)]}, + }, + } + healthyCPs := 0 + for _, st := range sts { + ns := NodeState{ + BaseState: BaseState{ + Name: st.Name, + StatusCode: statusCode(st.Host), + }, + Components: map[string]BaseState{ + "kubelet": {Name: "kubelet", StatusCode: statusCode(st.Kubelet)}, + }, + } + + if st.APIServer != Irrelevant { + ns.Components["apiserver"] = BaseState{Name: "apiserver", StatusCode: statusCode(st.APIServer)} + } + + // Convert status codes to status names + ns.StatusName = codeNames[ns.StatusCode] + for k, v := range ns.Components { + v.StatusName = codeNames[v.StatusCode] + ns.Components[k] = v + } + + cs.Nodes = append(cs.Nodes, ns) + + // we also need to calculate how many control plane node is healthy + if !st.Worker && + st.Host == state.Running.String() && + st.Kubeconfig == Configured && + st.Kubelet == state.Running.String() && + st.APIServer == state.Running.String() { + healthyCPs++ + } + } + + evs, mtime, err := readEventLog(sts[0].Name) + if err != nil { + klog.Errorf("unable to read event log: %v", err) + return cs + } + + transientCode := 0 + started := false + var finalStep map[string]string + + for _, ev := range evs { + // klog.Infof("read event: %+v", ev) + if ev.Type() == "io.k8s.sigs.minikube.step" { + var data map[string]string + err := ev.DataAs(&data) + if err != nil { + klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data()) + continue + } + + switch data["name"] { + case string(register.InitialSetup): + transientCode = Starting + case string(register.Done): + transientCode = 0 + started = true + case string(register.Stopping): + klog.Infof("%q == %q", data["name"], register.Stopping) + transientCode = Stopping + case string(register.Deleting): + transientCode = Deleting + case string(register.Pausing): + transientCode = Pausing + case string(register.Unpausing): + transientCode = Unpausing + } + + finalStep = data + klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data) + } + if ev.Type() == "io.k8s.sigs.minikube.error" { + var data map[string]string + err := ev.DataAs(&data) + if err != nil { + klog.Errorf("unable to parse data: %v\nraw data: %s", err, ev.Data()) + continue + } + exitCode, err := strconv.Atoi(data["exitcode"]) + if err != nil { + klog.Errorf("exit code not found: %v", err) + continue + } + if val, ok := exitCodeToHTTPCode[exitCode]; ok { + exitCode = val + } + transientCode = exitCode + for _, n := range cs.Nodes { + n.StatusCode = transientCode + n.StatusName = codeNames[n.StatusCode] + } + + klog.Infof("transient code %d (%q) for step: %+v", transientCode, codeNames[transientCode], data) + } + } + + if finalStep != nil { + if mtime.Before(time.Now().Add(-10 * time.Minute)) { + klog.Warningf("event stream is too old (%s) to be considered a transient state", mtime) + } else { + cs.Step = strings.TrimSpace(finalStep["name"]) + cs.StepDetail = strings.TrimSpace(finalStep["message"]) + if transientCode != 0 { + cs.StatusCode = transientCode + } + } + } + + // if it is + if config.IsHA(*cc) && started { + switch { + case healthyCPs < 2: + cs.StatusCode = Stopped + case healthyCPs == 2: + cs.StatusCode = Degraded + default: + cs.StatusCode = OKHAppy + } + } + + cs.StatusName = codeNames[cs.StatusCode] + cs.StatusDetail = codeDetails[cs.StatusCode] + + return cs +} + +// nodeStatus looks up the status of a node +func NodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*Status, error) { + controlPlane := n.ControlPlane + name := config.MachineName(cc, n) + + st := &Status{ + Name: name, + Host: Nonexistent, + APIServer: Nonexistent, + Kubelet: Nonexistent, + Kubeconfig: Nonexistent, + Worker: !controlPlane, + } + + hs, err := machine.Status(api, name) + klog.Infof("%s host status = %q (err=%v)", name, hs, err) + if err != nil { + return st, errors.Wrap(err, "host") + } + + // We have no record of this host. Return nonexistent struct + if hs == state.None.String() { + return st, nil + } + st.Host = hs + + // If it's not running, quickly bail out rather than delivering conflicting messages + if st.Host != state.Running.String() { + klog.Infof("host is not running, skipping remaining checks") + st.APIServer = st.Host + st.Kubelet = st.Host + st.Kubeconfig = st.Host + return st, nil + } + + // We have a fully operational host, now we can check for details + if _, err := DriverIP(api, name); err != nil { + klog.Errorf("failed to get driver ip: %v", err) + st.Host = state.Error.String() + return st, err + } + + st.Kubeconfig = Configured + if !controlPlane { + st.Kubeconfig = Irrelevant + st.APIServer = Irrelevant + } + + host, err := machine.LoadHost(api, name) + if err != nil { + return st, err + } + + cr, err := machine.CommandRunner(host) + if err != nil { + return st, err + } + + // Check storage + p, err := machine.DiskUsed(cr, "/var") + if err != nil { + klog.Errorf("failed to get storage capacity of /var: %v", err) + st.Host = state.Error.String() + return st, err + } + if p >= 99 { + st.Host = codeNames[InsufficientStorage] + } + + stk := kverify.ServiceStatus(cr, "kubelet") + st.Kubelet = stk.String() + if cc.ScheduledStop != nil { + initiationTime := time.Unix(cc.ScheduledStop.InitiationTime, 0) + st.TimeToStop = time.Until(initiationTime.Add(cc.ScheduledStop.Duration)).String() + } + if os.Getenv(constants.MinikubeActiveDockerdEnv) != "" { + st.DockerEnv = "in-use" + } + if os.Getenv(constants.MinikubeActivePodmanEnv) != "" { + st.PodManEnv = "in-use" + } + // Early exit for worker nodes + if !controlPlane { + return st, nil + } + + var hostname string + var port int + if cc.Addons["auto-pause"] { + hostname, _, port, err = driver.AutoPauseProxyEndpoint(&cc, &n, host.DriverName) + } else { + hostname = cc.KubernetesConfig.APIServerHAVIP + port = cc.APIServerPort + if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) { + hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName) + } + } + + if err != nil { + klog.Errorf("forwarded endpoint: %v", err) + st.Kubeconfig = Misconfigured + } else if err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port, ""); err != nil && st.Host != state.Starting.String() { + klog.Errorf("kubeconfig endpoint: %v", err) + st.Kubeconfig = Misconfigured + } + + sta, err := kverify.APIServerStatus(cr, hostname, port) + klog.Infof("%s apiserver status = %s (err=%v)", name, stk, err) + + if err != nil { + klog.Errorln("Error apiserver status:", err) + st.APIServer = state.Error.String() + } else { + st.APIServer = sta.String() + } + + return st, nil +} + +// readEventLog reads cloudevent logs from $MINIKUBE_HOME/profiles//events.json +func readEventLog(name string) ([]cloudevents.Event, time.Time, error) { + path := localpath.EventLog(name) + + st, err := os.Stat(path) + if err != nil { + return nil, time.Time{}, errors.Wrap(err, "stat") + } + + f, err := os.Open(path) + if err != nil { + return nil, st.ModTime(), errors.Wrap(err, "open") + } + var events []cloudevents.Event + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + ev := cloudevents.NewEvent() + if err = json.Unmarshal(scanner.Bytes(), &ev); err != nil { + return events, st.ModTime(), err + } + events = append(events, ev) + } + + return events, st.ModTime(), scanner.Err() +} + +// statusCode returns a status code number given a name +func statusCode(st string) int { + // legacy names + switch st { + case "Running", "Configured": + return OK + case "Misconfigured": + return Error + } + + // new names + for code, name := range codeNames { + if name == st { + return code + } + } + + return Unknown +} diff --git a/test/integration/ha_test.go b/test/integration/ha_test.go index 258c4be88fd8..cd20b967efc1 100644 --- a/test/integration/ha_test.go +++ b/test/integration/ha_test.go @@ -30,7 +30,7 @@ import ( "testing" "time" - "k8s.io/minikube/cmd/minikube/cmd" + "k8s.io/minikube/pkg/minikube/cluster" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/util/retry" ) @@ -328,7 +328,7 @@ func validateHACopyFile(ctx context.Context, t *testing.T, profile string) { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } - var statuses []cmd.Status + var statuses []cluster.Status if err = json.Unmarshal(rr.Stdout.Bytes(), &statuses); err != nil { t.Errorf("failed to decode json from status: args %q: %v", rr.Command(), err) } diff --git a/test/integration/multinode_test.go b/test/integration/multinode_test.go index 3800f4b4f9b4..b4140fce8abf 100644 --- a/test/integration/multinode_test.go +++ b/test/integration/multinode_test.go @@ -30,7 +30,7 @@ import ( "testing" "time" - "k8s.io/minikube/cmd/minikube/cmd" + "k8s.io/minikube/pkg/minikube/cluster" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/util/retry" ) @@ -186,7 +186,7 @@ func validateCopyFileWithMultiNode(ctx context.Context, t *testing.T, profile st t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } - var statuses []cmd.Status + var statuses []cluster.Status if err = json.Unmarshal(rr.Stdout.Bytes(), &statuses); err != nil { t.Errorf("failed to decode json from status: args %q: %v", rr.Command(), err) } diff --git a/test/integration/pause_test.go b/test/integration/pause_test.go index ffc952252565..72357e88c8c6 100644 --- a/test/integration/pause_test.go +++ b/test/integration/pause_test.go @@ -25,7 +25,7 @@ import ( "strings" "testing" - "k8s.io/minikube/cmd/minikube/cmd" + "k8s.io/minikube/pkg/minikube/cluster" ) // TestPause tests minikube pause functionality @@ -191,12 +191,12 @@ func validateStatus(ctx context.Context, t *testing.T, profile string) { defer PostMortemLogs(t, profile) statusOutput := runStatusCmd(ctx, t, profile, false) - var cs cmd.ClusterState + var cs cluster.ClusterState if err := json.Unmarshal(statusOutput, &cs); err != nil { t.Fatalf("unmarshalling: %v", err) } // verify the status looks as we expect - if cs.StatusCode != cmd.Paused { + if cs.StatusCode != cluster.Paused { t.Fatalf("incorrect status code: %v", cs.StatusCode) } if cs.StatusName != "Paused" { diff --git a/test/integration/status_test.go b/test/integration/status_test.go index 67c8250d349a..a1d8783b35ad 100644 --- a/test/integration/status_test.go +++ b/test/integration/status_test.go @@ -27,7 +27,7 @@ import ( "path" "testing" - "k8s.io/minikube/cmd/minikube/cmd" + "k8s.io/minikube/pkg/minikube/cluster" "k8s.io/minikube/pkg/minikube/constants" "k8s.io/minikube/pkg/minikube/localpath" ) @@ -82,19 +82,19 @@ func runStatusCmd(ctx context.Context, t *testing.T, profile string, increaseEnv } func verifyClusterState(t *testing.T, contents []byte) { - var cs cmd.ClusterState + var cs cluster.ClusterState if err := json.Unmarshal(contents, &cs); err != nil { t.Fatalf("unmarshalling: %v", err) } // verify the status looks as we expect - if cs.StatusCode != cmd.InsufficientStorage { + if cs.StatusCode != cluster.InsufficientStorage { t.Fatalf("incorrect status code: %v", cs.StatusCode) } if cs.StatusName != "InsufficientStorage" { t.Fatalf("incorrect status name: %v", cs.StatusName) } for _, n := range cs.Nodes { - if n.StatusCode != cmd.InsufficientStorage { + if n.StatusCode != cluster.InsufficientStorage { t.Fatalf("incorrect node status code: %v", cs.StatusCode) } }