Skip to content

Commit

Permalink
Reduce kublet http2 timeouts. (gardener#10223)
Browse files Browse the repository at this point in the history
* Reduce kublet http2 timeouts.

In the event that the connection from kubelet to kube-apiserver is silently dropped, it might take 45s to establish a new connection due to the default http2 timeouts.
During this period, node leases are not renewed. On the other hand, the default for node-monitor-grace period in kube-controller-manager is 40s.
After this period, the condition of endpoints in endpointslices will become unready and won't receive any traffic.
This could result in service disruption of user workload following short connection issues from kubelet to kube-apiserver.
By reducing the sum of http timeouts to below 40s, this scenario can be prevented.

* Calculate http2 timeouts depending on nodeMonitorGracePeriod.

* Address review feedback.

Co-authored-by: Rafael Franzke <rafael.franzke@sap.com>

---------

Co-authored-by: Johannes Scheerer <johannes.scheerer@sap.com>
Co-authored-by: Rafael Franzke <rafael.franzke@sap.com>
  • Loading branch information
3 people authored Aug 12, 2024
1 parent 2f5ad80 commit de12dd6
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ type OriginalValues struct {
ValitailEnabled bool
// ValiIngressHostName is the ingress host name of the shoot's Vali.
ValiIngressHostName string
// NodeMonitorGracePeriod defines the grace period before an unresponsive node is marked unhealthy.
NodeMonitorGracePeriod metav1.Duration
// NodeLocalDNSEnabled indicates whether node local dns is enabled or not.
NodeLocalDNSEnabled bool
// PrimaryIPFamily represents the preferred IP family (IPv4 or IPv6) to be used.
Expand Down Expand Up @@ -738,6 +740,7 @@ func (o *operatingSystemConfig) newDeployer(version int, osc *extensionsv1alpha1
sshAccessEnabled: o.values.SSHAccessEnabled,
valiIngressHostName: o.values.ValiIngressHostName,
valitailEnabled: o.values.ValitailEnabled,
nodeMonitorGracePeriod: o.values.NodeMonitorGracePeriod,
nodeLocalDNSEnabled: o.values.NodeLocalDNSEnabled,
primaryIPFamily: o.values.PrimaryIPFamily,
taints: worker.Taints,
Expand Down Expand Up @@ -803,6 +806,7 @@ type deployer struct {
valiIngressHostName string
valitailEnabled bool
nodeLocalDNSEnabled bool
nodeMonitorGracePeriod metav1.Duration
primaryIPFamily gardencorev1beta1.IPFamily
taints []corev1.Taint
}
Expand Down Expand Up @@ -830,6 +834,7 @@ func (d *deployer) deploy(ctx context.Context, operation string) (extensionsv1al
CRIName: d.criName,
Images: d.images,
NodeLabels: gardenerutils.NodeLabelsForWorkerPool(d.worker, d.nodeLocalDNSEnabled, d.key),
NodeMonitorGracePeriod: d.nodeMonitorGracePeriod,
KubeletCABundle: d.kubeletCABundle,
KubeletConfigParameters: d.kubeletConfigParameters,
KubeletCLIFlags: d.kubeletCLIFlags,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package components
import (
"github.com/Masterminds/semver/v3"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

extensionsv1alpha1 "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1"
"github.com/gardener/gardener/pkg/utils/imagevector"
Expand All @@ -29,6 +30,7 @@ type Context struct {
CRIName extensionsv1alpha1.CRIName
Images map[string]*imagevector.Image
NodeLabels map[string]string
NodeMonitorGracePeriod metav1.Duration
KubeletCABundle []byte
KubeletCLIFlags ConfigurableKubeletCLIFlags
KubeletConfigParameters ConfigurableKubeletConfigParameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ package kubelet

import (
_ "embed"
"strconv"
"strings"

"github.com/Masterminds/semver/v3"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

"github.com/gardener/gardener/imagevector"
Expand Down Expand Up @@ -90,6 +92,8 @@ func (component) Config(ctx components.Context) ([]extensionsv1alpha1.Unit, []ex

cliFlags := CLIFlags(ctx.KubernetesVersion, ctx.NodeLabels, ctx.CRIName, ctx.KubeletCLIFlags, ctx.PreferIPv6)

http2ReadIdleTimeSeconds, http2PingTimeSeconds := calcKubeletHTTP2Timeouts(ctx.NodeMonitorGracePeriod)

kubeletUnit := extensionsv1alpha1.Unit{
Name: UnitName,
Command: ptr.To(extensionsv1alpha1.CommandStart),
Expand All @@ -103,6 +107,7 @@ WantedBy=multi-user.target
[Service]
Restart=always
RestartSec=5
Environment="HTTP2_READ_IDLE_TIMEOUT_SECONDS=` + strconv.Itoa(http2ReadIdleTimeSeconds) + `" "HTTP2_PING_TIMEOUT_SECONDS=` + strconv.Itoa(http2PingTimeSeconds) + `"
EnvironmentFile=/etc/environment
EnvironmentFile=-/var/lib/kubelet/extra_args
ExecStart=` + v1beta1constants.OperatingSystemConfigFilePathBinaries + `/kubelet \
Expand All @@ -122,3 +127,25 @@ func getFileContentKubeletConfig(kubernetesVersion *semver.Version, clusterDNSAd

return kcCodec.Encode(kubeletConfig, configFCI.Encoding)
}

// The default for HTTP2_READ_IDLE_TIMEOUT_SECONDS is 30 and for HTTP2_PING_TIMEOUT_SECONDS 15.
// This results in issues if the tcp connection to kube-apiserver is silently dropped,
// as node-monitor-grace-period is only 40s.
// HTTP2_READ_IDLE_TIMEOUT_SECONDS + HTTP2_PING_TIMEOUT_SECONDS should be less than node-monitor-grace-period.
func calcKubeletHTTP2Timeouts(nodeMonitorGracePeriod metav1.Duration) (int, int) {
http2ReadIdleTimeSeconds := int(30)
http2PingTimeSeconds := int(15)

if nodeMonitorGracePeriod.Duration.Seconds() < 46 {
http2ReadIdleTimeSeconds = positiveOrZero(int((nodeMonitorGracePeriod.Duration.Seconds() - 2) * 2 / 3))
http2PingTimeSeconds = positiveOrZero(int((nodeMonitorGracePeriod.Duration.Seconds() - 2) * 1 / 3))
}
return http2ReadIdleTimeSeconds, http2PingTimeSeconds
}

func positiveOrZero(v int) int {
if v > 0 {
return v
}
return 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package kubelet_test

import (
"strings"
"time"

"github.com/Masterminds/semver/v3"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -57,6 +58,7 @@ var _ = Describe("Component", func() {
}
ctx.PreferIPv6 = preferIPv6
ctx.ClusterDNSAddresses = []string{"2001::db8:1", "2001::db8:2"}
ctx.NodeMonitorGracePeriod.Duration = time.Duration(40) * time.Second

cliFlags := CLIFlags(ctx.KubernetesVersion, ctx.NodeLabels, ctx.CRIName, ctx.KubeletCLIFlags, ctx.PreferIPv6)
units, files, err := component.Config(ctx)
Expand Down Expand Up @@ -230,6 +232,7 @@ WantedBy=multi-user.target
[Service]
Restart=always
RestartSec=5
Environment="HTTP2_READ_IDLE_TIMEOUT_SECONDS=25" "HTTP2_PING_TIMEOUT_SECONDS=12"
EnvironmentFile=/etc/environment
EnvironmentFile=-/var/lib/kubelet/extra_args` + kubeletStartPre + `
ExecStart=/opt/bin/kubelet \
Expand Down
19 changes: 10 additions & 9 deletions pkg/gardenlet/operation/botanist/operatingsystemconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,16 @@ func (b *Botanist) DefaultOperatingSystemConfig() (operatingsystemconfig.Interfa
KubernetesVersion: b.Shoot.KubernetesVersion,
Workers: b.Shoot.GetInfo().Spec.Provider.Workers,
OriginalValues: operatingsystemconfig.OriginalValues{
ClusterDomain: gardencorev1beta1.DefaultDomain,
Images: oscImages,
KubeletConfig: b.Shoot.GetInfo().Spec.Kubernetes.Kubelet,
MachineTypes: b.Shoot.CloudProfile.Spec.MachineTypes,
SSHAccessEnabled: v1beta1helper.ShootEnablesSSHAccess(b.Shoot.GetInfo()),
ValitailEnabled: valitailEnabled,
ValiIngressHostName: valiIngressHost,
NodeLocalDNSEnabled: v1beta1helper.IsNodeLocalDNSEnabled(b.Shoot.GetInfo().Spec.SystemComponents),
PrimaryIPFamily: b.Shoot.GetInfo().Spec.Networking.IPFamilies[0],
ClusterDomain: gardencorev1beta1.DefaultDomain,
Images: oscImages,
KubeletConfig: b.Shoot.GetInfo().Spec.Kubernetes.Kubelet,
MachineTypes: b.Shoot.CloudProfile.Spec.MachineTypes,
SSHAccessEnabled: v1beta1helper.ShootEnablesSSHAccess(b.Shoot.GetInfo()),
ValitailEnabled: valitailEnabled,
ValiIngressHostName: valiIngressHost,
NodeLocalDNSEnabled: v1beta1helper.IsNodeLocalDNSEnabled(b.Shoot.GetInfo().Spec.SystemComponents),
NodeMonitorGracePeriod: *b.Shoot.GetInfo().Spec.Kubernetes.KubeControllerManager.NodeMonitorGracePeriod,
PrimaryIPFamily: b.Shoot.GetInfo().Spec.Networking.IPFamilies[0],
},
},
operatingsystemconfig.DefaultInterval,
Expand Down

0 comments on commit de12dd6

Please sign in to comment.