Skip to content

Commit

Permalink
Add support for Service Traffic Distribution in Antrea Proxy (#6604)
Browse files Browse the repository at this point in the history
`ServiceTrafficDistribution` enables Traffic Distribution for Services in Antrea Proxy. This
feature allows for more flexible and intelligent routing decisions by considering both
topology and non-topology factors. For more details, refer to:
https://kubernetes.io/docs/reference/networking/virtual-ips/#traffic-distribution

Note: To activate this feature in Antrea Proxy, Kubernetes must be version 1.30 or higher, with
the `ServiceTrafficDistribution` feature gate (a Kubernetes-specific feature gate) enabled to add
Endpoint zone hints in the Kubernetes EndpointSlice controller. Without these prerequisites,
the feature may not function as intended in Antrea Proxy.

Signed-off-by: Hongliang Liu <lhongliang@vmware.com>
  • Loading branch information
hongliangl authored Aug 22, 2024
1 parent c76b38a commit b352435
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 79 deletions.
4 changes: 4 additions & 0 deletions build/charts/antrea/conf/antrea-agent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ featureGates:
# enabled, otherwise this flag will not take effect.
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "TopologyAwareHints" "default" true) }}

# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "ServiceTrafficDistribution" "default" true) }}

# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "CleanupStaleUDPSvcConntrack" "default" true) }}
Expand Down
8 changes: 6 additions & 2 deletions build/yamls/antrea-aks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3732,6 +3732,10 @@ data:
# enabled, otherwise this flag will not take effect.
# TopologyAwareHints: true
# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
# ServiceTrafficDistribution: true
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: true
Expand Down Expand Up @@ -5126,7 +5130,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491
checksum/config: 452b01033d20173605ec14cd64eea22a52287f84998c2d4fc7f9c3ce19c907b9
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5364,7 +5368,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491
checksum/config: 452b01033d20173605ec14cd64eea22a52287f84998c2d4fc7f9c3ce19c907b9
labels:
app: antrea
component: antrea-controller
Expand Down
8 changes: 6 additions & 2 deletions build/yamls/antrea-eks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3732,6 +3732,10 @@ data:
# enabled, otherwise this flag will not take effect.
# TopologyAwareHints: true
# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
# ServiceTrafficDistribution: true
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: true
Expand Down Expand Up @@ -5126,7 +5130,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491
checksum/config: 452b01033d20173605ec14cd64eea22a52287f84998c2d4fc7f9c3ce19c907b9
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5365,7 +5369,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491
checksum/config: 452b01033d20173605ec14cd64eea22a52287f84998c2d4fc7f9c3ce19c907b9
labels:
app: antrea
component: antrea-controller
Expand Down
8 changes: 6 additions & 2 deletions build/yamls/antrea-gke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3732,6 +3732,10 @@ data:
# enabled, otherwise this flag will not take effect.
# TopologyAwareHints: true
# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
# ServiceTrafficDistribution: true
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: true
Expand Down Expand Up @@ -5126,7 +5130,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5bab13c466e83f8a14191bfb9aad49229945c442808ea135f80cafe5e21be5f3
checksum/config: a72df8ca7bc976062ae8b3091bccd5aa2c5ee69f4ac3c7f3dc7d58e6765c4ebf
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5362,7 +5366,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5bab13c466e83f8a14191bfb9aad49229945c442808ea135f80cafe5e21be5f3
checksum/config: a72df8ca7bc976062ae8b3091bccd5aa2c5ee69f4ac3c7f3dc7d58e6765c4ebf
labels:
app: antrea
component: antrea-controller
Expand Down
8 changes: 6 additions & 2 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3745,6 +3745,10 @@ data:
# enabled, otherwise this flag will not take effect.
# TopologyAwareHints: true
# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
# ServiceTrafficDistribution: true
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: true
Expand Down Expand Up @@ -5139,7 +5143,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 7212fbcdde8fe4be00f31ebbbcd7b03a7335666b4af245eed7dac1ba9e99118c
checksum/config: 524a77a636d0093e1c1a41cc39402cee561cdd4479d9866f573082f905050ce5
checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4
labels:
app: antrea
Expand Down Expand Up @@ -5421,7 +5425,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 7212fbcdde8fe4be00f31ebbbcd7b03a7335666b4af245eed7dac1ba9e99118c
checksum/config: 524a77a636d0093e1c1a41cc39402cee561cdd4479d9866f573082f905050ce5
labels:
app: antrea
component: antrea-controller
Expand Down
8 changes: 6 additions & 2 deletions build/yamls/antrea.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3732,6 +3732,10 @@ data:
# enabled, otherwise this flag will not take effect.
# TopologyAwareHints: true
# Enable ServiceTrafficDistribution in AntreaProxy. This requires AntreaProxy and EndpointSlice to be
# enabled, otherwise this flag will not take effect.
# ServiceTrafficDistribution: true
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: true
Expand Down Expand Up @@ -5126,7 +5130,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 616b79b8deedba740ff992ca870b346c64c1dde5e3381436dc2cb24c0bd98ead
checksum/config: 2276e135e2f7d4d7d1ab070a94a28cf4207f2f9febe427012f41733b7e963c8e
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5362,7 +5366,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 616b79b8deedba740ff992ca870b346c64c1dde5e3381436dc2cb24c0bd98ead
checksum/config: 2276e135e2f7d4d7d1ab070a94a28cf4207f2f9febe427012f41733b7e963c8e
labels:
app: antrea
component: antrea-controller
Expand Down
14 changes: 14 additions & 0 deletions docs/feature-gates.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ edit the Agent configuration in the
| `AntreaProxy` | Agent | `true` | GA | v0.8 | v0.11 | v1.14 | Yes | Must be enabled for Windows. |
| `EndpointSlice` | Agent | `true` | GA | v0.13.0 | v1.11 | v1.14 | Yes | |
| `TopologyAwareHints` | Agent | `true` | Beta | v1.8 | v1.12 | N/A | Yes | |
| `ServiceTrafficDistribution` | Agent | `true` | Beta | N/A | v2.2 | N/A | Yes | |
| `CleanupStaleUDPSvcConntrack` | Agent | `true` | Beta | v1.13 | v2.1 | N/A | Yes | |
| `LoadBalancerModeDSR` | Agent | `false` | Alpha | v1.13 | N/A | N/A | Yes | |
| `AntreaPolicy` | Agent + Controller | `true` | Beta | v0.8 | v1.0 | N/A | No | Agent side config required from v0.9.0+. |
Expand Down Expand Up @@ -104,6 +105,19 @@ Refer to this [link](https://kubernetes.io/docs/concepts/services-networking/top
- Option `antreaProxy.enable` is set to true.
- EndpointSlice API version v1 is available in Kubernetes.

### ServiceTrafficDistribution

`ServiceTrafficDistribution` enables Traffic Distribution for Services in Antrea Proxy. This feature allows for more
flexible and intelligent routing decisions by considering both topology and non-topology factors. For more details,
refer to this [link](https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/4444-service-traffic-distribution).

#### Requirements for this Feature

- Option `antreaProxy.enable` is set to true.
- EndpointSlice API version v1 is available in Kubernetes.
- Kubernetes must be version 1.30 or higher, with the `ServiceTrafficDistribution` feature gate (a Kubernetes-specific
feature gate) enabled.

### LoadBalancerModeDSR

`LoadBalancerModeDSR` allows users to specify the load balancer mode as DSR (Direct Server Return). The load balancer
Expand Down
85 changes: 44 additions & 41 deletions pkg/agent/proxy/proxier.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,19 +124,20 @@ type proxier struct {
syncedOnce bool
syncedOnceMutex sync.RWMutex

runner *k8sproxy.BoundedFrequencyRunner
stopChan <-chan struct{}
ofClient openflow.Client
routeClient route.Interface
nodePortAddresses []net.IP
hostname string
isIPv6 bool
proxyAll bool
endpointSliceEnabled bool
proxyLoadBalancerIPs bool
topologyAwareHintsEnabled bool
supportNestedService bool
cleanupStaleUDPSvcConntrack bool
runner *k8sproxy.BoundedFrequencyRunner
stopChan <-chan struct{}
ofClient openflow.Client
routeClient route.Interface
nodePortAddresses []net.IP
hostname string
isIPv6 bool
proxyAll bool
endpointSliceEnabled bool
proxyLoadBalancerIPs bool
topologyAwareHintsEnabled bool
serviceTrafficDistributionEnabled bool
supportNestedService bool
cleanupStaleUDPSvcConntrack bool

// When a Service's LoadBalancerMode is DSR, the following changes will be applied to the OpenFlow flows and groups:
// 1. ClusterGroup will be used by traffic working in DSR mode on ingress Node.
Expand Down Expand Up @@ -1221,7 +1222,7 @@ func (p *proxier) Run(stopCh <-chan struct{}) {
go p.serviceConfig.Run(stopCh)
if p.endpointSliceEnabled {
go p.endpointSliceConfig.Run(stopCh)
if p.topologyAwareHintsEnabled {
if p.topologyAwareHintsEnabled || p.serviceTrafficDistributionEnabled {
go p.nodeConfig.Run(stopCh)
}
} else {
Expand Down Expand Up @@ -1372,6 +1373,7 @@ func newProxier(
}
}
topologyAwareHintsEnabled := endpointSliceEnabled && features.DefaultFeatureGate.Enabled(features.TopologyAwareHints)
serviceTrafficDistributionEnabled := endpointSliceEnabled && features.DefaultFeatureGate.Enabled(features.ServiceTrafficDistribution)
ipFamily := corev1.IPv4Protocol
if isIPv6 {
ipFamily = corev1.IPv6Protocol
Expand Down Expand Up @@ -1401,40 +1403,41 @@ func newProxier(
serviceLabelSelector = serviceLabelSelector.Add(*serviceProxyNameSelector, *nonHeadlessServiceSelector)

p := &proxier{
nodeIPChecker: nodeIPChecker,
serviceConfig: config.NewServiceConfig(serviceInformer, resyncPeriod),
endpointsChanges: newEndpointsChangesTracker(hostname, endpointSliceEnabled, isIPv6),
serviceChanges: newServiceChangesTracker(recorder, ipFamily, serviceLabelSelector, skipServices),
serviceMap: k8sproxy.ServiceMap{},
serviceInstalledMap: k8sproxy.ServiceMap{},
endpointsInstalledMap: types.EndpointsMap{},
endpointsMap: types.EndpointsMap{},
endpointReferenceCounter: map[string]int{},
nodeLabels: map[string]string{},
serviceStringMap: map[string]k8sproxy.ServicePortName{},
groupCounter: groupCounter,
ofClient: ofClient,
routeClient: routeClient,
nodePortAddresses: nodePortAddresses,
isIPv6: isIPv6,
proxyAll: proxyAllEnabled,
endpointSliceEnabled: endpointSliceEnabled,
topologyAwareHintsEnabled: topologyAwareHintsEnabled,
cleanupStaleUDPSvcConntrack: features.DefaultFeatureGate.Enabled(features.CleanupStaleUDPSvcConntrack),
proxyLoadBalancerIPs: proxyLoadBalancerIPs,
hostname: hostname,
serviceHealthServer: serviceHealthServer,
numLocalEndpoints: map[apimachinerytypes.NamespacedName]int{},
supportNestedService: supportNestedService,
defaultLoadBalancerMode: defaultLoadBalancerMode,
nodeIPChecker: nodeIPChecker,
serviceConfig: config.NewServiceConfig(serviceInformer, resyncPeriod),
endpointsChanges: newEndpointsChangesTracker(hostname, endpointSliceEnabled, isIPv6),
serviceChanges: newServiceChangesTracker(recorder, ipFamily, serviceLabelSelector, skipServices),
serviceMap: k8sproxy.ServiceMap{},
serviceInstalledMap: k8sproxy.ServiceMap{},
endpointsInstalledMap: types.EndpointsMap{},
endpointsMap: types.EndpointsMap{},
endpointReferenceCounter: map[string]int{},
nodeLabels: map[string]string{},
serviceStringMap: map[string]k8sproxy.ServicePortName{},
groupCounter: groupCounter,
ofClient: ofClient,
routeClient: routeClient,
nodePortAddresses: nodePortAddresses,
isIPv6: isIPv6,
proxyAll: proxyAllEnabled,
endpointSliceEnabled: endpointSliceEnabled,
topologyAwareHintsEnabled: topologyAwareHintsEnabled,
serviceTrafficDistributionEnabled: serviceTrafficDistributionEnabled,
cleanupStaleUDPSvcConntrack: features.DefaultFeatureGate.Enabled(features.CleanupStaleUDPSvcConntrack),
proxyLoadBalancerIPs: proxyLoadBalancerIPs,
hostname: hostname,
serviceHealthServer: serviceHealthServer,
numLocalEndpoints: map[apimachinerytypes.NamespacedName]int{},
supportNestedService: supportNestedService,
defaultLoadBalancerMode: defaultLoadBalancerMode,
}

p.serviceConfig.RegisterEventHandler(p)
p.runner = k8sproxy.NewBoundedFrequencyRunner(componentName, p.syncProxyRules, time.Second, 30*time.Second, 2)
if endpointSliceEnabled {
p.endpointSliceConfig = config.NewEndpointSliceConfig(endpointSliceInformer, resyncPeriod)
p.endpointSliceConfig.RegisterEventHandler(p)
if p.topologyAwareHintsEnabled {
if p.topologyAwareHintsEnabled || p.serviceTrafficDistributionEnabled {
p.nodeConfig = config.NewNodeConfig(nodeInformer, resyncPeriod)
p.nodeConfig.RegisterEventHandler(p)
}
Expand Down
23 changes: 13 additions & 10 deletions pkg/agent/proxy/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,22 @@ func (p *proxier) categorizeEndpoints(endpoints map[string]k8sproxy.Endpoint, sv

// canUseTopology returns true if topology aware routing is enabled and properly configured in this cluster. That is,
// it checks that:
// - The TopologyAwareHints feature is enabled.
// - The "service.kubernetes.io/topology-aware-hints" annotation on this Service is set to "Auto".
// - The node's labels include "topology.kubernetes.io/zone".
// - All of the Endpoints for this Service have a topology hint.
// - At least one Endpoint for this Service is hinted for this Node's zone.
// - The TopologyAwareHints or ServiceTrafficDistribution feature is enabled.
// - If ServiceTrafficDistribution feature is not enabled, then the "service.kubernetes.io/topology-aware-hints"
// annotation on this Service should be set to "Auto" or "auto".
// - The node's labels include "topology.kubernetes.io/zone".
// - All of the Endpoints for this Service have a topology hint.
// - At least one Endpoint for this Service is hinted for this Node's zone.
func (p *proxier) canUseTopology(endpoints map[string]k8sproxy.Endpoint, svcInfo k8sproxy.ServicePort) bool {
if !p.topologyAwareHintsEnabled {
if !p.topologyAwareHintsEnabled && !p.serviceTrafficDistributionEnabled {
return false
}
// Any non-empty and non-disabled values for the hints annotation are acceptable.
hintsAnnotation := svcInfo.HintsAnnotation()
if hintsAnnotation == "" || hintsAnnotation == "disabled" || hintsAnnotation == "Disabled" {
return false
if !p.serviceTrafficDistributionEnabled {
// Any non-empty and non-disabled values for the hints annotation are acceptable.
hintsAnnotation := svcInfo.HintsAnnotation()
if hintsAnnotation == "" || hintsAnnotation == "disabled" || hintsAnnotation == "Disabled" {
return false
}
}

zone, ok := p.nodeLabels[v1.LabelTopologyZone]
Expand Down
Loading

0 comments on commit b352435

Please sign in to comment.