diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 602146e6..c1f56f81 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -14,6 +14,6 @@ FROM arm64v8/alpine:3.7 LABEL maintainer "Casey Davenport " -ADD bin/kube-controllers-linux-amd64 /usr/bin/kube-controllers -ADD bin/check-status-linux-amd64 /usr/bin/check-status +ADD bin/kube-controllers-linux-arm64 /usr/bin/kube-controllers +ADD bin/check-status-linux-arm64 /usr/bin/check-status ENTRYPOINT ["/usr/bin/kube-controllers"] diff --git a/Makefile b/Makefile index 794dd61f..6bc60074 100644 --- a/Makefile +++ b/Makefile @@ -138,7 +138,7 @@ image-all: $(addprefix sub-image-,$(ARCHES)) sub-image-%: $(MAKE) image ARCH=$* -image.created-$(ARCH): bin/kube-controllers-linux-$(ARCH) +image.created-$(ARCH): bin/kube-controllers-linux-$(ARCH) bin/check-status-linux-$(ARCH) # Build the docker image for the policy controller. docker build -t $(CONTAINER_NAME):latest-$(ARCH) -f Dockerfile.$(ARCH) . ifeq ($(ARCH),amd64) diff --git a/cmd/check-status/main.go b/cmd/check-status/main.go index 5eaa284d..0426822a 100644 --- a/cmd/check-status/main.go +++ b/cmd/check-status/main.go @@ -18,7 +18,6 @@ import ( "flag" "fmt" "os" - "time" "github.com/projectcalico/kube-controllers/pkg/status" ) @@ -49,15 +48,8 @@ func main() { if *checkReady { var st *status.Status var err error - // Try reading the status file up to 3 times. - for i := 0; i < 3; i++ { - st, err = status.ReadStatusFile(*file) - if err != nil { - time.Sleep(1 * time.Second) - continue - } - break - } + + st, err = status.ReadStatusFile(*file) if err != nil { fmt.Printf("Failed to read status file %s: %v\n", *file, err) os.Exit(1) diff --git a/cmd/kube-controllers/main.go b/cmd/kube-controllers/main.go index 13796d87..e0311e93 100644 --- a/cmd/kube-controllers/main.go +++ b/cmd/kube-controllers/main.go @@ -18,6 +18,7 @@ import ( "context" "flag" "fmt" + "net/http" "os" "strings" "time" @@ -30,6 +31,7 @@ import ( "github.com/projectcalico/kube-controllers/pkg/controllers/node" "github.com/projectcalico/kube-controllers/pkg/controllers/pod" "github.com/projectcalico/kube-controllers/pkg/controllers/serviceaccount" + "github.com/projectcalico/kube-controllers/pkg/status" "github.com/projectcalico/libcalico-go/lib/apiconfig" client "github.com/projectcalico/libcalico-go/lib/clientv3" "github.com/projectcalico/libcalico-go/lib/logutils" @@ -95,6 +97,12 @@ func main() { if err != nil { log.WithError(err).Fatal("Failed to initialize Calico datastore") } + // Initialize readiness to false if enabled + s := status.New(status.DefaultStatusFile) + if config.HealthEnabled { + s.SetReady("CalicoDatastore", false, "initialized to false") + s.SetReady("KubeAPIServer", false, "initialized to false") + } for _, controllerType := range strings.Split(config.EnabledControllers, ",") { switch controllerType { @@ -121,8 +129,43 @@ func main() { // If configured to do so, start an etcdv3 compaction. startCompactor(ctx, config) - // Wait forever. - select {} + // Wait forever and perform healthchecks. + for { + // skip healthchecks if configured + if !config.HealthEnabled { + select {} + } + // Datastore HealthCheck + healthCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + err = calicoClient.EnsureInitialized(healthCtx, "", "k8s") + if err != nil { + log.WithError(err).Errorf("Failed to verify datastore") + s.SetReady( + "CalicoDatastore", + false, + fmt.Sprintf("Error verifying datastore: %v", err), + ) + } else { + s.SetReady("CalicoDatastore", true, "") + } + cancel() + + // Kube-apiserver HealthCheck + healthStatus := 0 + k8sClientset.Discovery().RESTClient().Get().AbsPath("/healthz").Do().StatusCode(&healthStatus) + if healthStatus != http.StatusOK { + log.WithError(err).Errorf("Failed to reach apiserver") + s.SetReady( + "KubeAPIServer", + false, + fmt.Sprintf("Error reaching apiserver: %v with http status code: %d", err, healthStatus), + ) + } else { + s.SetReady("KubeAPIServer", true, "") + } + + time.Sleep(10 * time.Second) + } } // Starts an etcdv3 compaction goroutine with the given config. diff --git a/pkg/config/config.go b/pkg/config/config.go index 61b8cf70..d7273138 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -39,6 +39,9 @@ type Config struct { // Path to a kubeconfig file to use for accessing the k8s API. Kubeconfig string `default:"" split_words:"false"` + + // Enable healthchecks + HealthEnabled bool `default:"true"` } // Parse parses envconfig and stores in Config struct diff --git a/pkg/status/status.go b/pkg/status/status.go index fcf2c573..9b1af0c0 100644 --- a/pkg/status/status.go +++ b/pkg/status/status.go @@ -111,7 +111,7 @@ func (s *Status) GetNotReadyConditions() string { // WriteStatus writes out the status in json format. func (c *Status) WriteStatus() error { - b, err := json.Marshal(*c) + b, err := json.Marshal(c) if err != nil { logrus.Errorf("Failed to marshal readiness: %s", err) return err diff --git a/tests/fv/fv_test.go b/tests/fv/fv_test.go index b5aaaaa0..36d7d08a 100644 --- a/tests/fv/fv_test.go +++ b/tests/fv/fv_test.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "net" "os" + "os/exec" "time" "k8s.io/api/core/v1" @@ -108,6 +109,60 @@ var _ = Describe("kube-controllers FV tests", func() { Expect(*info.Spec.DatastoreReady).To(BeTrue()) }) + Context("Healthcheck FV tests", func() { + It("should pass health check", func() { + // wait for a health check cycle to pass + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false")) + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("Error")) + }) + + It("should fail health check if apiserver is not running", func() { + // wait for a health check cycle to pass + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false")) + + apiserver.Stop() + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).Should(ContainSubstring("Error reaching apiserver")) + }) + + It("should fail health check if etcd not running", func() { + // wait for a health check cycle to pass + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false")) + + etcd.Stop() + Eventually(func() []byte { + cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r") + stdoutStderr, _ := cmd.CombinedOutput() + + return stdoutStderr + }, 20*time.Second, 500*time.Millisecond).Should(ContainSubstring("Error verifying datastore")) + }) + }) + Context("Node FV tests", func() { It("should be removed in response to a k8s node delete [Release]", func() { kn := &v1.Node{