Skip to content
This repository has been archived by the owner on Feb 19, 2021. It is now read-only.

Commit

Permalink
Manage RRs per zone on healthy nodes only
Browse files Browse the repository at this point in the history
  • Loading branch information
mhmxs committed May 21, 2020
1 parent e1ba572 commit 8b41978
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 102 deletions.
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
### Proposal documentation found here: https://github.com/mhmxs/calico-route-reflector-operator-proposal. Please feel free to share your ideas!!!

This Kubernetes operator can monitor and scale Calico route refloctor pods based on cluster size. The operator has a few environment variable:
* `ROUTE_REFLECTOR_MIN` Minimum number of route reflector pods, default `3`
* `ROUTE_REFLECTOR_MAX` Maximum number of route reflector pods, default `10`
* `ROUTE_REFLECTOR_RATIO` Node / route reflector pod ratio, default `0.2` (`100 * 0.2 = 20`)
* `ROUTE_REFLECTOR_MIN` Minimum number of route reflector pods per zone, default `3`
* `ROUTE_REFLECTOR_MAX` Maximum number of route reflector pods per zone, default `25`
* `ROUTE_REFLECTOR_RATIO` Node / route reflector pod ratio, default `0.005` (`1000 * 0.005 = 5`)
* `ROUTE_REFLECTOR_NODE_LABEL` Node label of the route reflector nodes, default `calico-route-reflector=`
* `ROUTE_REFLECTOR_ZONE_LABEL` Node label of the zone, default ``

During the `api/core/v1/Node` reconcile phases it calculates the right number of route refloctor pods by multiply the number of nodes with the given ratio.
It updates the route reflector replicas to the expected number.
Expand All @@ -32,11 +33,9 @@ Build your own image:
## Roadmap

* Use custom resource instead of environment variables
* Auto balancing of route reflector pods between zones
* Take care on Node status, current POC checks only number of nodes
* Dedicated or preferred node label
* Disallow node label
* More sophisticated ratio calculation
* Handle taints and tolerations

# Contributing

Expand Down
184 changes: 109 additions & 75 deletions controllers/routereflectorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package controllers
import (
"context"
"math"
"strings"

"github.com/go-logr/logr"
"github.com/prometheus/common/log"
Expand All @@ -32,19 +31,23 @@ import (
)

var (
nodeUnderDelete = ctrl.Result{}
finished = ctrl.Result{}

nodeGetError = ctrl.Result{}
nodeListError = ctrl.Result{}
nodeUpdateError = ctrl.Result{}
nodeNotFound = ctrl.Result{}
nodeCleaned = ctrl.Result{Requeue: true}
finished = ctrl.Result{}

nodeGetError = ctrl.Result{}
nodeCleanupError = ctrl.Result{}
nodeListError = ctrl.Result{}
nodeUpdateError = ctrl.Result{}
)

type RouteReflectorConfig struct {
Min int
Max int
Ration float64
NodeLabel string
Min int
Max int
Ration float64
NodeLabelKey string
NodeLabelValue string
ZoneLabel string
}

// RouteReflectorConfigReconciler reconciles a RouteReflectorConfig object
Expand All @@ -61,95 +64,126 @@ type reconcileImplClient interface {
List(context.Context, runtime.Object, ...client.ListOption) error
}

type reconcileImplParams struct {
request ctrl.Request
client reconcileImplClient
config RouteReflectorConfig
}

// +kubebuilder:rbac:groups=route-reflector.calico-route-reflector-operator.mhmxs.github.com,resources=routereflectorconfigs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=route-reflector.calico-route-reflector-operator.mhmxs.github.com,resources=routereflectorconfigs/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;update;watch

func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("routereflectorconfig", req.NamespacedName)

return reconcileImpl(&reconcileImplParams{
request: req,
client: r.Client,
config: r.config,
})
}

func reconcileImpl(params *reconcileImplParams) (ctrl.Result, error) {
node := &corev1.Node{}
err := params.client.Get(context.Background(), params.request.NamespacedName, node)

node := corev1.Node{}
err := r.Client.Get(context.Background(), req.NamespacedName, &node)
if err != nil && !errors.IsNotFound(err) {
log.Errorf("Unable to fetch node %s reason %s", params.request.NamespacedName, err.Error())
log.Errorf("Unable to fetch node %s reason %s", req.NamespacedName, err.Error())
return nodeGetError, err
} else if err == nil && node.GetDeletionTimestamp() != nil {
return nodeUnderDelete, nil
} else if errors.IsNotFound(err) {
log.Debugf("Node not found %s", req.NamespacedName)
return nodeNotFound, nil
} else if err == nil && node.GetDeletionTimestamp() != nil || !isNodeReady(&node) {
// Node is deleted right now or has some issues, better to remove form RRs
if err := r.cleanupLabel(req, &node, r.config.NodeLabelKey); err != nil {
log.Errorf("Unable to cleanup label on %s because of %s", req.NamespacedName, err.Error())
return nodeCleanupError, err
}

return nodeCleaned, nil
}

nodes := &corev1.NodeList{}
if err := params.client.List(context.Background(), nodes, &client.ListOptions{}); err != nil {
listOptions := client.ListOptions{}
if r.config.ZoneLabel != "" {
if nodeZone, ok := node.GetLabels()[r.config.ZoneLabel]; ok {
labels := client.MatchingLabels{r.config.ZoneLabel: nodeZone}
labels.ApplyToList(&listOptions)
}
}
log.Debugf("List options are %v", listOptions)
nodeList := corev1.NodeList{}
if err := r.Client.List(context.Background(), &nodeList, &listOptions); err != nil {
log.Errorf("Unable to list nodes ,reason %s", err.Error())
return nodeListError, err
}

expectedNumber := int(math.Round(float64(len(nodes.Items)) * params.config.Ration))
if expectedNumber < params.config.Min {
expectedNumber = params.config.Min
} else if expectedNumber > params.config.Max {
expectedNumber = params.config.Max
readyNodes := 0
actualReadyNumber := 0
nodes := map[*corev1.Node]bool{}
for _, n := range nodeList.Items {
nodes[&n] = isNodeReady(&n)
if nodes[&n] {
readyNodes++
if isLabeled(n.GetLabels(), r.config.NodeLabelKey, r.config.NodeLabelValue) {
actualReadyNumber++
}
}
}
log.Infof("Nodes are ready %d", readyNodes)
log.Infof("Actual number of healthy route reflector nodes are %d", actualReadyNumber)

expectedNumber := int(math.Round(float64(readyNodes) * r.config.Ration))
if expectedNumber < r.config.Min {
expectedNumber = r.config.Min
} else if expectedNumber > r.config.Max {
expectedNumber = r.config.Max
}
log.Infof("Expected number of route reflector pods are %d", expectedNumber)
log.Infof("Expected number of route reflector nodes are %d", expectedNumber)

for n, isReady := range nodes {
if !isReady {
// Node has some issues, better to remove form RRs
if err := r.cleanupLabel(req, n, r.config.NodeLabelKey); err != nil {
log.Errorf("Unable to cleanup label on %s because of %s", req.NamespacedName, err.Error())
return nodeCleanupError, err
}

key, value := getKeyValue(params.config.NodeLabel)
actualNumber := 0
for _, n := range nodes.Items {
if isLabeled(n.GetLabels(), key, value) {
actualNumber++
continue
} else if expectedNumber == actualReadyNumber {
continue
}
}
log.Infof("Actual number of route reflector pods are %d", actualNumber)

if expectedNumber != actualNumber {
for _, n := range nodes.Items {
if expectedNumber == actualNumber {
break
}
labeled := isLabeled(n.GetLabels(), key, value)
if expectedNumber > actualNumber && !labeled {
log.Infof("Label node %s as route reflector", n.GetName())
n.Labels[key] = value
actualNumber++
} else if expectedNumber < actualNumber && labeled {
log.Infof("Remove node %s role route reflector", n.GetName())
delete(n.Labels, key)
actualNumber--
} else {
continue
}
labeled := isLabeled(n.GetLabels(), r.config.NodeLabelKey, r.config.NodeLabelValue)
if !labeled && expectedNumber > actualReadyNumber {
log.Infof("Label node %s as route reflector", n.GetName())
n.Labels[r.config.NodeLabelKey] = r.config.NodeLabelValue
actualReadyNumber++
} else if labeled && expectedNumber < actualReadyNumber {
log.Infof("Remove node %s role route reflector", n.GetName())
delete(n.Labels, r.config.NodeLabelKey)
actualReadyNumber--
} else {
continue
}

if err = params.client.Update(context.Background(), &n); err != nil {
log.Errorf("Unable to update node %s, reason %s", params.request.NamespacedName, err.Error())
return nodeUpdateError, err
}
log.Infof("Updating labels on node %s to %v", req.NamespacedName, n.Labels)
if err = r.Client.Update(context.Background(), n); err != nil {
log.Errorf("Unable to update node %s, reason %s", req.NamespacedName, err.Error())
return nodeUpdateError, err
}
}

return finished, nil
}

func getKeyValue(label string) (string, string) {
keyValue := strings.Split(label, "=")
if len(keyValue) == 1 {
keyValue[1] = ""
func (r *RouteReflectorConfigReconciler) cleanupLabel(req ctrl.Request, node *corev1.Node, labelKey string) error {
if _, ok := node.GetLabels()[labelKey]; ok {
delete(node.Labels, labelKey)

log.Infof("Removing route reflector label from %s", req.NamespacedName)
if err := r.Client.Update(context.Background(), node); err != nil {
log.Errorf("Unable to cleanup node %s, reason %s", req.NamespacedName, err.Error())
return err
}
}

return nil
}

func isNodeReady(node *corev1.Node) bool {
for _, c := range node.Status.Conditions {
if c.Type == corev1.NodeReady {
return true
}
}

return keyValue[0], keyValue[1]
return false
}

func isLabeled(labels map[string]string, key, value string) bool {
Expand All @@ -160,19 +194,19 @@ func isLabeled(labels map[string]string, key, value string) bool {
type eventFilter struct{}

func (ef eventFilter) Create(event.CreateEvent) bool {
return true
return false
}

func (ef eventFilter) Delete(e event.DeleteEvent) bool {
return true
}

func (ef eventFilter) Update(event.UpdateEvent) bool {
return false
return true
}

func (ef eventFilter) Generic(event.GenericEvent) bool {
return false
return true
}

func (r *RouteReflectorConfigReconciler) SetupWithManager(mgr ctrl.Manager, config RouteReflectorConfig) error {
Expand Down
56 changes: 35 additions & 21 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"os"
"strconv"
"strings"

"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
Expand All @@ -36,11 +37,10 @@ import (
)

const (
routeReflectorMin = 3
routeReflectorMax = 10
routeReflectorRatio = 0.2
routeReflectorMaxRatio = 0.5
routeReflectorLabel = "calico-route-reflector="
routeReflectorMin = 3
routeReflectorMax = 10
routeReflectorRatio = 0.005
routeReflectorLabel = "calico-route-reflector"
)

var (
Expand Down Expand Up @@ -85,17 +85,19 @@ func main() {
panic(err)
}

min, max, ratio, nodeLabel := parseEnv()
min, max, ratio, nodeLabelKey, nodeLabelValue, zoneLabel := parseEnv()

if err = (&controllers.RouteReflectorConfigReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RouteReflectorConfig"),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr, controllers.RouteReflectorConfig{
Min: min,
Max: max,
Ration: ratio,
NodeLabel: nodeLabel,
Min: min,
Max: max,
Ration: ratio,
NodeLabelKey: nodeLabelKey,
NodeLabelValue: nodeLabelValue,
ZoneLabel: zoneLabel,
}); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RouteReflectorConfig")
panic(err)
Expand All @@ -109,16 +111,16 @@ func main() {
}
}

func parseEnv() (int, int, float64, string) {
func parseEnv() (int, int, float64, string, string, string) {
var err error
min := routeReflectorMin
if v, ok := os.LookupEnv("ROUTE_REFLECTOR_MIN"); ok {
min, err = strconv.Atoi(v)
if err != nil {
setupLog.Error(err, "ROUTE_REFLECTOR_MIN is not an integer")
panic(err)
} else if min < 3 || min > 999 {
err = errors.New("ROUTE_REFLECTOR_MIN must be positive number between 3 and 999")
} else if min < 3 || min > 50 {
err = errors.New("ROUTE_REFLECTOR_MIN must be positive number between 3 and 50")
setupLog.Error(err, err.Error())
panic(err)
}
Expand All @@ -129,8 +131,8 @@ func parseEnv() (int, int, float64, string) {
if err != nil {
setupLog.Error(err, "ROUTE_REFLECTOR_MAX is not an integer")
panic(err)
} else if max < 5 || max > 2500 {
err = errors.New("ROUTE_REFLECTOR_MIN must be positive number between 5 and 2500")
} else if max < 5 || max > 50 {
err = errors.New("ROUTE_REFLECTOR_MIN must be positive number between 5 and 50")
setupLog.Error(err, err.Error())
panic(err)
}
Expand All @@ -139,18 +141,30 @@ func parseEnv() (int, int, float64, string) {
if v, ok := os.LookupEnv("ROUTE_REFLECTOR_RATIO"); ok {
ratio, err = strconv.ParseFloat(v, 32)
if err != nil {
setupLog.Error(err, "ROUTE_REFLECTOR_RATIO is not a float")
setupLog.Error(err, "ROUTE_REFLECTOR_RATIO is not a valid number")
panic(err)
} else if ratio > routeReflectorMaxRatio {
err = fmt.Errorf("ROUTE_REFLECTOR_RATIO is bigger than %f", routeReflectorMaxRatio)
} else if ratio < 0.001 || ratio > 0.05 {
err = errors.New("ROUTE_REFLECTOR_MIN must be a number between 0.001 and 0.05")
setupLog.Error(err, err.Error())
panic(err)
}
}
nodeLabel := routeReflectorLabel
nodeLabelKey := routeReflectorLabel
nodeLabelValue := ""
if v, ok := os.LookupEnv("ROUTE_REFLECTOR_NODE_LABEL"); ok {
nodeLabel = v
nodeLabelKey, nodeLabelValue = getKeyValue(v)
}

return min, max, ratio, nodeLabel
zoneLable := os.Getenv("ROUTE_REFLECTOR_ZONE_LABEL")

return min, max, ratio, nodeLabelKey, nodeLabelValue, zoneLable
}

func getKeyValue(label string) (string, string) {
keyValue := strings.Split(label, "=")
if len(keyValue) == 1 {
keyValue[1] = ""
}

return keyValue[0], keyValue[1]
}

0 comments on commit 8b41978

Please sign in to comment.