Skip to content

Commit

Permalink
add healthchecks to kube-controller
Browse files Browse the repository at this point in the history
checks datastore and apiserver. Includes fv test for healthcheck

Signed-off-by: derek mcquay <derek@tigera.io>
  • Loading branch information
dmmcquay committed Jul 12, 2018
1 parent 6feb62d commit 5bb0edc
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 16 deletions.
4 changes: 2 additions & 2 deletions Dockerfile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
FROM arm64v8/alpine:3.7
LABEL maintainer "Casey Davenport <casey@tigera.io>"

ADD bin/kube-controllers-linux-amd64 /usr/bin/kube-controllers
ADD bin/check-status-linux-amd64 /usr/bin/check-status
ADD bin/kube-controllers-linux-arm64 /usr/bin/kube-controllers
ADD bin/check-status-linux-arm64 /usr/bin/check-status
ENTRYPOINT ["/usr/bin/kube-controllers"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ image-all: $(addprefix sub-image-,$(ARCHES))
sub-image-%:
$(MAKE) image ARCH=$*

image.created-$(ARCH): bin/kube-controllers-linux-$(ARCH)
image.created-$(ARCH): bin/kube-controllers-linux-$(ARCH) bin/check-status-linux-$(ARCH)
# Build the docker image for the policy controller.
docker build -t $(CONTAINER_NAME):latest-$(ARCH) -f Dockerfile.$(ARCH) .
ifeq ($(ARCH),amd64)
Expand Down
12 changes: 2 additions & 10 deletions cmd/check-status/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"flag"
"fmt"
"os"
"time"

"github.com/projectcalico/kube-controllers/pkg/status"
)
Expand Down Expand Up @@ -49,15 +48,8 @@ func main() {
if *checkReady {
var st *status.Status
var err error
// Try reading the status file up to 3 times.
for i := 0; i < 3; i++ {
st, err = status.ReadStatusFile(*file)
if err != nil {
time.Sleep(1 * time.Second)
continue
}
break
}

st, err = status.ReadStatusFile(*file)
if err != nil {
fmt.Printf("Failed to read status file %s: %v\n", *file, err)
os.Exit(1)
Expand Down
47 changes: 45 additions & 2 deletions cmd/kube-controllers/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"flag"
"fmt"
"net/http"
"os"
"strings"
"time"
Expand All @@ -30,6 +31,7 @@ import (
"github.com/projectcalico/kube-controllers/pkg/controllers/node"
"github.com/projectcalico/kube-controllers/pkg/controllers/pod"
"github.com/projectcalico/kube-controllers/pkg/controllers/serviceaccount"
"github.com/projectcalico/kube-controllers/pkg/status"
"github.com/projectcalico/libcalico-go/lib/apiconfig"
client "github.com/projectcalico/libcalico-go/lib/clientv3"
"github.com/projectcalico/libcalico-go/lib/logutils"
Expand Down Expand Up @@ -95,6 +97,12 @@ func main() {
if err != nil {
log.WithError(err).Fatal("Failed to initialize Calico datastore")
}
// Initialize readiness to false if enabled
s := status.New(status.DefaultStatusFile)
if config.HealthEnabled {
s.SetReady("CalicoDatastore", false, "initialized to false")
s.SetReady("KubeAPIServer", false, "initialized to false")
}

for _, controllerType := range strings.Split(config.EnabledControllers, ",") {
switch controllerType {
Expand All @@ -121,8 +129,43 @@ func main() {
// If configured to do so, start an etcdv3 compaction.
startCompactor(ctx, config)

// Wait forever.
select {}
// Wait forever and perform healthchecks.
for {
// skip healthchecks if configured
if !config.HealthEnabled {
select {}
}
// Datastore HealthCheck
healthCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
err = calicoClient.EnsureInitialized(healthCtx, "", "k8s")
if err != nil {
log.WithError(err).Errorf("Failed to verify datastore")
s.SetReady(
"CalicoDatastore",
false,
fmt.Sprintf("Error verifying datastore: %v", err),
)
} else {
s.SetReady("CalicoDatastore", true, "")
}
cancel()

// Kube-apiserver HealthCheck
healthStatus := 0
k8sClientset.Discovery().RESTClient().Get().AbsPath("/healthz").Do().StatusCode(&healthStatus)
if healthStatus != http.StatusOK {
log.WithError(err).Errorf("Failed to reach apiserver")
s.SetReady(
"KubeAPIServer",
false,
fmt.Sprintf("Error reaching apiserver: %v with http status code: %d", err, healthStatus),
)
} else {
s.SetReady("KubeAPIServer", true, "")
}

time.Sleep(10 * time.Second)
}
}

// Starts an etcdv3 compaction goroutine with the given config.
Expand Down
3 changes: 3 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ type Config struct {

// Path to a kubeconfig file to use for accessing the k8s API.
Kubeconfig string `default:"" split_words:"false"`

// Enable healthchecks
HealthEnabled bool `default:"true"`
}

// Parse parses envconfig and stores in Config struct
Expand Down
2 changes: 1 addition & 1 deletion pkg/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (s *Status) GetNotReadyConditions() string {

// WriteStatus writes out the status in json format.
func (c *Status) WriteStatus() error {
b, err := json.Marshal(*c)
b, err := json.Marshal(c)
if err != nil {
logrus.Errorf("Failed to marshal readiness: %s", err)
return err
Expand Down
55 changes: 55 additions & 0 deletions tests/fv/fv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io/ioutil"
"net"
"os"
"os/exec"
"time"

"k8s.io/api/core/v1"
Expand Down Expand Up @@ -108,6 +109,60 @@ var _ = Describe("kube-controllers FV tests", func() {
Expect(*info.Spec.DatastoreReady).To(BeTrue())
})

Context("Healthcheck FV tests", func() {
It("should pass health check", func() {
// wait for a health check cycle to pass
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false"))
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("Error"))
})

It("should fail health check if apiserver is not running", func() {
// wait for a health check cycle to pass
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false"))

apiserver.Stop()
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).Should(ContainSubstring("Error reaching apiserver"))
})

It("should fail health check if etcd not running", func() {
// wait for a health check cycle to pass
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).ShouldNot(ContainSubstring("initialized to false"))

etcd.Stop()
Eventually(func() []byte {
cmd := exec.Command("docker", "exec", policyController.Name, "/usr/bin/check-status", "-r")
stdoutStderr, _ := cmd.CombinedOutput()

return stdoutStderr
}, 20*time.Second, 500*time.Millisecond).Should(ContainSubstring("Error verifying datastore"))
})
})

Context("Node FV tests", func() {
It("should be removed in response to a k8s node delete [Release]", func() {
kn := &v1.Node{
Expand Down

0 comments on commit 5bb0edc

Please sign in to comment.