Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--wait, wait for more components #7375

Merged
merged 20 commits into from
Apr 4, 2020
48 changes: 46 additions & 2 deletions cmd/minikube/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
cmdcfg "k8s.io/minikube/cmd/minikube/cmd/config"
"k8s.io/minikube/pkg/drivers/kic/oci"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil"
"k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify"
"k8s.io/minikube/pkg/minikube/bootstrapper/images"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/constants"
Expand Down Expand Up @@ -109,7 +110,7 @@ const (
downloadOnly = "download-only"
dnsProxy = "dns-proxy"
hostDNSResolver = "host-dns-resolver"
waitUntilHealthy = "wait"
waitComponents = "wait"
force = "force"
dryRun = "dry-run"
interactive = "interactive"
Expand Down Expand Up @@ -171,7 +172,7 @@ func initMinikubeFlags() {
startCmd.Flags().String(criSocket, "", "The cri socket path to be used.")
startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.")
startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".")
startCmd.Flags().Bool(waitUntilHealthy, true, "Block until the apiserver is servicing API requests")
startCmd.Flags().StringSlice(waitComponents, kverify.DefaultWaitList, fmt.Sprintf("comma separated list of kubernetes components to verify and wait for after starting a cluster. defaults to %q, available options: %q . other acceptable values are 'all' or 'none', 'true' and 'false'", strings.Join(kverify.DefaultWaitList, ","), strings.Join(kverify.AllComponentsList, ",")))
startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.")
startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.")
startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.")
Expand Down Expand Up @@ -1068,6 +1069,7 @@ func createNode(cmd *cobra.Command, k8sVersion, kubeNodeName, drvName, repositor
},
Nodes: []config.Node{cp},
}
cfg.WaitForCompos = interpretWaitFlag(*cmd)
return cfg, cp, nil
}

Expand Down Expand Up @@ -1200,3 +1202,45 @@ func getKubernetesVersion(old *config.ClusterConfig) string {
}
return nv
}

// interpretWaitFlag interprets the wait flag and respects the legacy minikube users
// returns map of components to wait for
func interpretWaitFlag(cmd cobra.Command) map[string]bool {
if !cmd.Flags().Changed(waitComponents) {
glog.Infof("Wait Components : %+v", kverify.DefaultComponents)
return kverify.DefaultComponents
}

waitFlags, err := cmd.Flags().GetStringSlice(waitComponents)
if err != nil {
glog.Infof("failed to get wait from flags, will use default wait components : %+v", kverify.DefaultComponents)
return kverify.DefaultComponents
}

if len(waitFlags) == 1 {
// respecting legacy flag before minikube 1.9.0, wait flag was boolean
if waitFlags[0] == "false" || waitFlags[0] == "none" {
return kverify.NoComponents
}
// respecting legacy flag before minikube 1.9.0, wait flag was boolean
if waitFlags[0] == "true" || waitFlags[0] == "all" {
return kverify.AllComponents
}
}

waitComponents := kverify.NoComponents
for _, wc := range waitFlags {
seen := false
for _, valid := range kverify.AllComponentsList {
if wc == valid {
waitComponents[wc] = true
seen = true
continue
}
}
if !seen {
glog.Warningf("The value %q is invalid for --wait flag. valid options are %q", wc, strings.Join(kverify.AllComponentsList, ","))
}
}
return waitComponents
}
206 changes: 206 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/api_server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"crypto/tls"
"fmt"
"net"
"net/http"
"os/exec"
"path"
"strconv"
"strings"
"time"

"github.com/docker/machine/libmachine/state"
"github.com/golang/glog"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/version"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/minikube/pkg/minikube/bootstrapper"
"k8s.io/minikube/pkg/minikube/command"
"k8s.io/minikube/pkg/minikube/config"
"k8s.io/minikube/pkg/minikube/cruntime"
)

// WaitForAPIServerProcess waits for api server to be healthy returns error if it doesn't
func WaitForAPIServerProcess(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, start time.Time, timeout time.Duration) error {
glog.Infof("waiting for apiserver process to appear ...")
err := wait.PollImmediate(time.Millisecond*500, timeout, func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during process check")
}

if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

if _, ierr := apiServerPID(cr); ierr != nil {
return false, nil
}

return true, nil
})
if err != nil {
return fmt.Errorf("apiserver process never appeared")
}
glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start))
return nil
}

// apiServerPID returns our best guess to the apiserver pid
func apiServerPID(cr command.Runner) (int, error) {
rr, err := cr.RunCmd(exec.Command("sudo", "pgrep", "-xnf", "kube-apiserver.*minikube.*"))
if err != nil {
return 0, err
}
s := strings.TrimSpace(rr.Stdout.String())
return strconv.Atoi(s)
}

// WaitForHealthyAPIServer waits for api server status to be running
func WaitForHealthyAPIServer(r cruntime.Manager, bs bootstrapper.Bootstrapper, cfg config.ClusterConfig, cr command.Runner, client *kubernetes.Clientset, start time.Time, hostname string, port int, timeout time.Duration) error {
glog.Infof("waiting for apiserver healthz status ...")
hStart := time.Now()

healthz := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during healthz check")
}

if time.Since(start) > minLogCheckTime {
announceProblems(r, bs, cfg, cr)
time.Sleep(kconst.APICallRetryInterval * 5)
}

status, err := apiServerHealthz(hostname, port)
if err != nil {
glog.Warningf("status: %v", err)
return false, nil
}
if status != state.Running {
return false, nil
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil {
return fmt.Errorf("apiserver healthz never reported healthy")
}

vcheck := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("cluster wait timed out during version check")
}
if err := APIServerVersionMatch(client, cfg.KubernetesConfig.KubernetesVersion); err != nil {
glog.Warningf("api server version match failed: %v", err)
return false, nil
}
return true, nil
}

if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, vcheck); err != nil {
return fmt.Errorf("controlPlane never updated to %s", cfg.KubernetesConfig.KubernetesVersion)
}

glog.Infof("duration metric: took %s to wait for apiserver health ...", time.Since(hStart))
return nil
}

// APIServerVersionMatch checks if the server version matches the expected
func APIServerVersionMatch(client *kubernetes.Clientset, expected string) error {
vi, err := client.ServerVersion()
if err != nil {
return errors.Wrap(err, "server version")
}
glog.Infof("control plane version: %s", vi)
if version.CompareKubeAwareVersionStrings(vi.String(), expected) != 0 {
return fmt.Errorf("controlPane = %q, expected: %q", vi.String(), expected)
}
return nil
}

// APIServerStatus returns apiserver status in libmachine style state.State
func APIServerStatus(cr command.Runner, hostname string, port int) (state.State, error) {
glog.Infof("Checking apiserver status ...")

pid, err := apiServerPID(cr)
if err != nil {
glog.Warningf("stopped: unable to get apiserver pid: %v", err)
return state.Stopped, nil
}

// Get the freezer cgroup entry for this pid
rr, err := cr.RunCmd(exec.Command("sudo", "egrep", "^[0-9]+:freezer:", fmt.Sprintf("/proc/%d/cgroup", pid)))
if err != nil {
glog.Warningf("unable to find freezer cgroup: %v", err)
return apiServerHealthz(hostname, port)

}
freezer := strings.TrimSpace(rr.Stdout.String())
glog.Infof("apiserver freezer: %q", freezer)
fparts := strings.Split(freezer, ":")
if len(fparts) != 3 {
glog.Warningf("unable to parse freezer - found %d parts: %s", len(fparts), freezer)
return apiServerHealthz(hostname, port)
}

rr, err = cr.RunCmd(exec.Command("sudo", "cat", path.Join("/sys/fs/cgroup/freezer", fparts[2], "freezer.state")))
if err != nil {
glog.Errorf("unable to get freezer state: %s", rr.Stderr.String())
return apiServerHealthz(hostname, port)
}

fs := strings.TrimSpace(rr.Stdout.String())
glog.Infof("freezer state: %q", fs)
if fs == "FREEZING" || fs == "FROZEN" {
return state.Paused, nil
}
return apiServerHealthz(hostname, port)
}

// apiServerHealthz hits the /healthz endpoint and returns libmachine style state.State
func apiServerHealthz(hostname string, port int) (state.State, error) {
url := fmt.Sprintf("https://%s/healthz", net.JoinHostPort(hostname, fmt.Sprint(port)))
glog.Infof("Checking apiserver healthz at %s ...", url)
// To avoid: x509: certificate signed by unknown authority
tr := &http.Transport{
Proxy: nil, // To avoid connectiv issue if http(s)_proxy is set.
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr}
resp, err := client.Get(url)
// Connection refused, usually.
if err != nil {
glog.Infof("stopped: %s: %v", url, err)
return state.Stopped, nil
}
if resp.StatusCode == http.StatusUnauthorized {
glog.Errorf("%s returned code %d (unauthorized). Please ensure that your apiserver authorization settings make sense!", url, resp.StatusCode)
return state.Error, nil
}
if resp.StatusCode != http.StatusOK {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Error, nil
}
return state.Running, nil
}
56 changes: 56 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/default_sa.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/minikube/pkg/util/retry"
)

// WaitForDefaultSA waits for the default service account to be created.
func WaitForDefaultSA(cs *kubernetes.Clientset, timeout time.Duration) error {
glog.Info("waiting for default service account to be created ...")
start := time.Now()
saReady := func() error {
// equivalent to manual check of 'kubectl --context profile get serviceaccount default'
sas, err := cs.CoreV1().ServiceAccounts("default").List(meta.ListOptions{})
if err != nil {
glog.Infof("temproary error waiting for default SA: %v", err)
return err
}
for _, sa := range sas.Items {
if sa.Name == "default" {
glog.Infof("found service account: %q", sa.Name)
return nil
}
}
return fmt.Errorf("couldn't find default service account")
}
if err := retry.Expo(saReady, 500*time.Millisecond, timeout); err != nil {
return errors.Wrapf(err, "waited %s for SA", time.Since(start))
}

glog.Infof("duration metric: took %s for default service account to be created ...", time.Since(start))
return nil
}
Loading