From de12dd677d37e7072f1bcf5bb90e85996e3649f2 Mon Sep 17 00:00:00 2001 From: Axel Siebenborn Date: Mon, 12 Aug 2024 17:53:15 +0200 Subject: [PATCH] Reduce kublet http2 timeouts. (#10223) * Reduce kublet http2 timeouts. In the event that the connection from kubelet to kube-apiserver is silently dropped, it might take 45s to establish a new connection due to the default http2 timeouts. During this period, node leases are not renewed. On the other hand, the default for node-monitor-grace period in kube-controller-manager is 40s. After this period, the condition of endpoints in endpointslices will become unready and won't receive any traffic. This could result in service disruption of user workload following short connection issues from kubelet to kube-apiserver. By reducing the sum of http timeouts to below 40s, this scenario can be prevented. * Calculate http2 timeouts depending on nodeMonitorGracePeriod. * Address review feedback. Co-authored-by: Rafael Franzke --------- Co-authored-by: Johannes Scheerer Co-authored-by: Rafael Franzke --- .../operatingsystemconfig.go | 5 ++++ .../original/components/components.go | 2 ++ .../original/components/kubelet/component.go | 27 +++++++++++++++++++ .../components/kubelet/component_test.go | 3 +++ .../botanist/operatingsystemconfig.go | 19 ++++++------- 5 files changed, 47 insertions(+), 9 deletions(-) diff --git a/pkg/component/extensions/operatingsystemconfig/operatingsystemconfig.go b/pkg/component/extensions/operatingsystemconfig/operatingsystemconfig.go index 64e1abd955c..9762ed51052 100644 --- a/pkg/component/extensions/operatingsystemconfig/operatingsystemconfig.go +++ b/pkg/component/extensions/operatingsystemconfig/operatingsystemconfig.go @@ -141,6 +141,8 @@ type OriginalValues struct { ValitailEnabled bool // ValiIngressHostName is the ingress host name of the shoot's Vali. ValiIngressHostName string + // NodeMonitorGracePeriod defines the grace period before an unresponsive node is marked unhealthy. + NodeMonitorGracePeriod metav1.Duration // NodeLocalDNSEnabled indicates whether node local dns is enabled or not. NodeLocalDNSEnabled bool // PrimaryIPFamily represents the preferred IP family (IPv4 or IPv6) to be used. @@ -738,6 +740,7 @@ func (o *operatingSystemConfig) newDeployer(version int, osc *extensionsv1alpha1 sshAccessEnabled: o.values.SSHAccessEnabled, valiIngressHostName: o.values.ValiIngressHostName, valitailEnabled: o.values.ValitailEnabled, + nodeMonitorGracePeriod: o.values.NodeMonitorGracePeriod, nodeLocalDNSEnabled: o.values.NodeLocalDNSEnabled, primaryIPFamily: o.values.PrimaryIPFamily, taints: worker.Taints, @@ -803,6 +806,7 @@ type deployer struct { valiIngressHostName string valitailEnabled bool nodeLocalDNSEnabled bool + nodeMonitorGracePeriod metav1.Duration primaryIPFamily gardencorev1beta1.IPFamily taints []corev1.Taint } @@ -830,6 +834,7 @@ func (d *deployer) deploy(ctx context.Context, operation string) (extensionsv1al CRIName: d.criName, Images: d.images, NodeLabels: gardenerutils.NodeLabelsForWorkerPool(d.worker, d.nodeLocalDNSEnabled, d.key), + NodeMonitorGracePeriod: d.nodeMonitorGracePeriod, KubeletCABundle: d.kubeletCABundle, KubeletConfigParameters: d.kubeletConfigParameters, KubeletCLIFlags: d.kubeletCLIFlags, diff --git a/pkg/component/extensions/operatingsystemconfig/original/components/components.go b/pkg/component/extensions/operatingsystemconfig/original/components/components.go index 247c8414992..58a70983c13 100644 --- a/pkg/component/extensions/operatingsystemconfig/original/components/components.go +++ b/pkg/component/extensions/operatingsystemconfig/original/components/components.go @@ -7,6 +7,7 @@ package components import ( "github.com/Masterminds/semver/v3" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" extensionsv1alpha1 "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1" "github.com/gardener/gardener/pkg/utils/imagevector" @@ -29,6 +30,7 @@ type Context struct { CRIName extensionsv1alpha1.CRIName Images map[string]*imagevector.Image NodeLabels map[string]string + NodeMonitorGracePeriod metav1.Duration KubeletCABundle []byte KubeletCLIFlags ConfigurableKubeletCLIFlags KubeletConfigParameters ConfigurableKubeletConfigParameters diff --git a/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component.go b/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component.go index bb27f77104c..c57f7959165 100644 --- a/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component.go +++ b/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component.go @@ -6,10 +6,12 @@ package kubelet import ( _ "embed" + "strconv" "strings" "github.com/Masterminds/semver/v3" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "github.com/gardener/gardener/imagevector" @@ -90,6 +92,8 @@ func (component) Config(ctx components.Context) ([]extensionsv1alpha1.Unit, []ex cliFlags := CLIFlags(ctx.KubernetesVersion, ctx.NodeLabels, ctx.CRIName, ctx.KubeletCLIFlags, ctx.PreferIPv6) + http2ReadIdleTimeSeconds, http2PingTimeSeconds := calcKubeletHTTP2Timeouts(ctx.NodeMonitorGracePeriod) + kubeletUnit := extensionsv1alpha1.Unit{ Name: UnitName, Command: ptr.To(extensionsv1alpha1.CommandStart), @@ -103,6 +107,7 @@ WantedBy=multi-user.target [Service] Restart=always RestartSec=5 +Environment="HTTP2_READ_IDLE_TIMEOUT_SECONDS=` + strconv.Itoa(http2ReadIdleTimeSeconds) + `" "HTTP2_PING_TIMEOUT_SECONDS=` + strconv.Itoa(http2PingTimeSeconds) + `" EnvironmentFile=/etc/environment EnvironmentFile=-/var/lib/kubelet/extra_args ExecStart=` + v1beta1constants.OperatingSystemConfigFilePathBinaries + `/kubelet \ @@ -122,3 +127,25 @@ func getFileContentKubeletConfig(kubernetesVersion *semver.Version, clusterDNSAd return kcCodec.Encode(kubeletConfig, configFCI.Encoding) } + +// The default for HTTP2_READ_IDLE_TIMEOUT_SECONDS is 30 and for HTTP2_PING_TIMEOUT_SECONDS 15. +// This results in issues if the tcp connection to kube-apiserver is silently dropped, +// as node-monitor-grace-period is only 40s. +// HTTP2_READ_IDLE_TIMEOUT_SECONDS + HTTP2_PING_TIMEOUT_SECONDS should be less than node-monitor-grace-period. +func calcKubeletHTTP2Timeouts(nodeMonitorGracePeriod metav1.Duration) (int, int) { + http2ReadIdleTimeSeconds := int(30) + http2PingTimeSeconds := int(15) + + if nodeMonitorGracePeriod.Duration.Seconds() < 46 { + http2ReadIdleTimeSeconds = positiveOrZero(int((nodeMonitorGracePeriod.Duration.Seconds() - 2) * 2 / 3)) + http2PingTimeSeconds = positiveOrZero(int((nodeMonitorGracePeriod.Duration.Seconds() - 2) * 1 / 3)) + } + return http2ReadIdleTimeSeconds, http2PingTimeSeconds +} + +func positiveOrZero(v int) int { + if v > 0 { + return v + } + return 0 +} diff --git a/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component_test.go b/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component_test.go index 320b721f5fe..2003cbc8925 100644 --- a/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component_test.go +++ b/pkg/component/extensions/operatingsystemconfig/original/components/kubelet/component_test.go @@ -6,6 +6,7 @@ package kubelet_test import ( "strings" + "time" "github.com/Masterminds/semver/v3" . "github.com/onsi/ginkgo/v2" @@ -57,6 +58,7 @@ var _ = Describe("Component", func() { } ctx.PreferIPv6 = preferIPv6 ctx.ClusterDNSAddresses = []string{"2001::db8:1", "2001::db8:2"} + ctx.NodeMonitorGracePeriod.Duration = time.Duration(40) * time.Second cliFlags := CLIFlags(ctx.KubernetesVersion, ctx.NodeLabels, ctx.CRIName, ctx.KubeletCLIFlags, ctx.PreferIPv6) units, files, err := component.Config(ctx) @@ -230,6 +232,7 @@ WantedBy=multi-user.target [Service] Restart=always RestartSec=5 +Environment="HTTP2_READ_IDLE_TIMEOUT_SECONDS=25" "HTTP2_PING_TIMEOUT_SECONDS=12" EnvironmentFile=/etc/environment EnvironmentFile=-/var/lib/kubelet/extra_args` + kubeletStartPre + ` ExecStart=/opt/bin/kubelet \ diff --git a/pkg/gardenlet/operation/botanist/operatingsystemconfig.go b/pkg/gardenlet/operation/botanist/operatingsystemconfig.go index a55703d9a76..d2303f875d2 100644 --- a/pkg/gardenlet/operation/botanist/operatingsystemconfig.go +++ b/pkg/gardenlet/operation/botanist/operatingsystemconfig.go @@ -66,15 +66,16 @@ func (b *Botanist) DefaultOperatingSystemConfig() (operatingsystemconfig.Interfa KubernetesVersion: b.Shoot.KubernetesVersion, Workers: b.Shoot.GetInfo().Spec.Provider.Workers, OriginalValues: operatingsystemconfig.OriginalValues{ - ClusterDomain: gardencorev1beta1.DefaultDomain, - Images: oscImages, - KubeletConfig: b.Shoot.GetInfo().Spec.Kubernetes.Kubelet, - MachineTypes: b.Shoot.CloudProfile.Spec.MachineTypes, - SSHAccessEnabled: v1beta1helper.ShootEnablesSSHAccess(b.Shoot.GetInfo()), - ValitailEnabled: valitailEnabled, - ValiIngressHostName: valiIngressHost, - NodeLocalDNSEnabled: v1beta1helper.IsNodeLocalDNSEnabled(b.Shoot.GetInfo().Spec.SystemComponents), - PrimaryIPFamily: b.Shoot.GetInfo().Spec.Networking.IPFamilies[0], + ClusterDomain: gardencorev1beta1.DefaultDomain, + Images: oscImages, + KubeletConfig: b.Shoot.GetInfo().Spec.Kubernetes.Kubelet, + MachineTypes: b.Shoot.CloudProfile.Spec.MachineTypes, + SSHAccessEnabled: v1beta1helper.ShootEnablesSSHAccess(b.Shoot.GetInfo()), + ValitailEnabled: valitailEnabled, + ValiIngressHostName: valiIngressHost, + NodeLocalDNSEnabled: v1beta1helper.IsNodeLocalDNSEnabled(b.Shoot.GetInfo().Spec.SystemComponents), + NodeMonitorGracePeriod: *b.Shoot.GetInfo().Spec.Kubernetes.KubeControllerManager.NodeMonitorGracePeriod, + PrimaryIPFamily: b.Shoot.GetInfo().Spec.Networking.IPFamilies[0], }, }, operatingsystemconfig.DefaultInterval,