Set pod condition for pod readiness gate in order to only mark pod `R…

…eady` when it's healthy and taking traffic from ALB
kubernetes-sigs · k8s-ci-robot · Mar 9, 2020 · May 19, 2019 · Feb 19, 2020 · Feb 19, 2020
commit b134750214101e069334afbdc9eb38dcfe736bc5
diff --git a/docs/guide/ingress/pod-conditions.md b/docs/guide/ingress/pod-conditions.md
@@ -0,0 +1,41 @@
+# Using pod conditions / pod readiness gates
+
+One can add so-called [»Pod readiness gates«](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-readiness-gate) to Kubernetes pods. A readiness gate can be used by e.g. a controller to mark a pod as ready or as unready by setting a custom condition on the pod.
+
+The AWS ALB ingress controller can set such a condition on your pods. This is needed under certain circumstances to achieve full zero downtime rolling deployments. Consider the following example:
+* low number of replicas in a deployment (e.g. one to three)
+* start a rolling update of the deployment
+* rollout of new pods takes less time than it takes the ALB ingress controller to register the new pods and for their health state turn »Health« in the target group
+* at some point during this rolling update, the target group might only have registered targets that are in »Initial« or »Draining« state; this results in a downtime of your service
+
+In order to avoid this situation, the AWS ALB ingress controller can set the before mentioned condition on the pods that constitute your ingress backend services. The condition status on a pod will only be set to `true` when the corresponding target in the ALB target group shows a health state of »Healthy«. This prevents the rolling update of a deployment from terminating old pods until the newly created pods are »Healthy« in the ALB target group and ready to take traffic.
+
+
+## Pod configuration
+
+Add a readiness gate with `conditionType: target-health.alb.ingress.kubernetes.io/<ingress name>` to your pod:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: nginx-deployment
+spec:
+  selector:
+    matchLabels:
+      app: nginx
+  replicas: 2
+  template:
+    metadata:
+      labels:
+        app: nginx
+    spec:
+      readinessGates:
+      - conditionType: target-health.alb.ingress.kubernetes.io/nginx-ingress_nginx-service_80
+      containers:
+      - name: nginx
+        image: nginx
+        ports:
+        - containerPort: 80
+
+When the pods of this deployment are selected by service which is a backend for the ingress `nginx-ingress`, the ALB ingress controller will set the condition on the pods according to their health state in the ALB target group, making sure the pods do not appear as »Ready« in Kubernetes unless the corresponding target in the ALB target group is consider »Healthy«.
diff --git a/internal/alb/tg/targetgroup.go b/internal/alb/tg/targetgroup.go
@@ -19,6 +19,7 @@ import (
 	extensions "k8s.io/api/extensions/v1beta1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 // The port used when creating targetGroup serves as a default value for targets registered without port specified.
@@ -32,11 +33,12 @@ const targetGroupDefaultPort = 1
 type Controller interface {
 	// Reconcile ensures an targetGroup exists for specified backend of ingress.
 	Reconcile(ctx context.Context, ingress *extensions.Ingress, backend extensions.IngressBackend) (TargetGroup, error)
+	StopReconcilingPodConditionStatus(tgArn string)
 }
 
-func NewController(cloud aws.CloudAPI, store store.Storer, nameTagGen NameTagGenerator, tagsController tags.Controller, endpointResolver backend.EndpointResolver) Controller {
+func NewController(cloud aws.CloudAPI, store store.Storer, nameTagGen NameTagGenerator, tagsController tags.Controller, endpointResolver backend.EndpointResolver, client client.Client) Controller {
 	attrsController := NewAttributesController(cloud)
-	targetsController := NewTargetsController(cloud, endpointResolver)
+	targetsController := NewTargetsController(cloud, endpointResolver, client)
 	return &defaultController{
 		cloud:             cloud,
 		store:             store,
@@ -115,6 +117,10 @@ func (controller *defaultController) Reconcile(ctx context.Context, ingress *ext
 	}, nil
 }
 
+func (controller *defaultController) StopReconcilingPodConditionStatus(tgArn string) {
+	controller.targetsController.StopReconcilingPodConditionStatus(tgArn)
+}
+
 func (controller *defaultController) newTGInstance(ctx context.Context, name string, serviceAnnos *annotations.Service, healthCheckPort string) (*elbv2.TargetGroup, error) {
 	albctx.GetLogger(ctx).Infof("creating target group %v", name)
 	resp, err := controller.cloud.CreateTargetGroupWithContext(ctx, &elbv2.CreateTargetGroupInput{

diff --git a/internal/alb/tg/targetgroup_group.go b/internal/alb/tg/targetgroup_group.go
@@ -17,6 +17,7 @@ import (
 	extensions "k8s.io/api/extensions/v1beta1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 // GroupController manages all target groups for one ingress.
@@ -37,8 +38,9 @@ func NewGroupController(
 	store store.Storer,
 	nameTagGen NameTagGenerator,
 	tagsController tags.Controller,
-	endpointResolver backend.EndpointResolver) GroupController {
-	tgController := NewController(cloud, store, nameTagGen, tagsController, endpointResolver)
+	endpointResolver backend.EndpointResolver,
+	client client.Client) GroupController {
+	tgController := NewController(cloud, store, nameTagGen, tagsController, endpointResolver, client)
 	return &defaultGroupController{
 		cloud:        cloud,
 		store:        store,
@@ -97,6 +99,7 @@ func (controller *defaultGroupController) GC(ctx context.Context, tgGroup Target
 	unusedTgArns := currentTgArns.Difference(usedTgArns)
 	for arn := range unusedTgArns {
 		albctx.GetLogger(ctx).Infof("deleting target group %v", arn)
+		controller.tgController.StopReconcilingPodConditionStatus(arn)
 		if err := controller.cloud.DeleteTargetGroupByArn(ctx, arn); err != nil {
 			return fmt.Errorf("failed to delete targetGroup due to %v", err)
 		}