Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #5880: Fix incorrect MTU configurations (#5880) #5926: Ensure MTU is set correctly when WireGuard interface already #5997: Use 65000 MTU upper bound for interfaces in encap mode #6145

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func run(o *Options) error {
enableNodePortLocal := features.DefaultFeatureGate.Enabled(features.NodePortLocal) && o.config.NodePortLocal.Enable
l7NetworkPolicyEnabled := features.DefaultFeatureGate.Enabled(features.L7NetworkPolicy)
enableMulticlusterGW := features.DefaultFeatureGate.Enabled(features.Multicluster) && o.config.Multicluster.EnableGateway
_, multiclusterEncryptionMode := config.GetTrafficEncryptionModeFromStr(o.config.Multicluster.TrafficEncryptionMode)
enableMulticlusterNP := features.DefaultFeatureGate.Enabled(features.Multicluster) && o.config.Multicluster.EnableStretchedNetworkPolicy
enableFLowExporter := features.DefaultFeatureGate.Enabled(features.FlowExporter) && o.config.FlowExporter.Enable

Expand Down Expand Up @@ -199,7 +200,8 @@ func run(o *Options) error {
IPsecConfig: config.IPsecConfig{
AuthenticationMode: ipsecAuthenticationMode,
},
EnableMulticlusterGW: enableMulticlusterGW,
EnableMulticlusterGW: enableMulticlusterGW,
MulticlusterEncryptionMode: multiclusterEncryptionMode,
}

wireguardConfig := &config.WireGuardConfig{
Expand Down
19 changes: 15 additions & 4 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ const (
roundNumKey = "roundNum" // round number key in externalIDs.
initialRoundNum = 1
maxRetryForRoundNumSave = 5
// On Linux, OVS configures the MTU for tunnel interfaces to 65000.
// See https://github.com/openvswitch/ovs/blame/3e666ba000b5eff58da8abb4e8c694ac3f7b08d6/lib/dpif-netlink-rtnl.c#L348-L360
// There are some edge cases (e.g., Kind clusters) where the transport Node's MTU may be
// larger than that (e.g., 65535), and packets may be dropped. To account for this, we use
// 65000 as an upper bound for the MTU calculated in getInterfaceMTU, when encap is
// supported. For simplicity's sake, we also use this upper bound for Windows, even if it
// does not apply.
ovsTunnelMaxMTU = 65000
)

var (
Expand Down Expand Up @@ -1092,7 +1100,7 @@ func (i *Initializer) waitForIPsecMonitorDaemon() error {

// initializeWireguard checks if preconditions are met for using WireGuard and initializes WireGuard client or cleans up.
func (i *Initializer) initializeWireGuard() error {
i.wireGuardConfig.MTU = i.nodeConfig.NodeTransportInterfaceMTU - config.WireGuardOverhead
i.wireGuardConfig.MTU = i.nodeConfig.NodeTransportInterfaceMTU - i.networkConfig.WireGuardMTUDeduction
wgClient, err := wireguard.New(i.nodeConfig, i.wireGuardConfig)
if err != nil {
return err
Expand Down Expand Up @@ -1195,10 +1203,13 @@ func (i *Initializer) getInterfaceMTU(transportInterface *net.Interface) (int, e

isIPv6 := i.nodeConfig.NodeIPv6Addr != nil
mtu -= i.networkConfig.CalculateMTUDeduction(isIPv6)

if i.networkConfig.TrafficEncryptionMode == config.TrafficEncryptionModeIPSec {
mtu -= config.IPSecESPOverhead
if i.networkConfig.TrafficEncapMode.SupportsEncap() {
// See comment for ovsTunnelMaxMTU constant above.
if mtu > ovsTunnelMaxMTU {
mtu = ovsTunnelMaxMTU
}
}

return mtu, nil
}

Expand Down
76 changes: 55 additions & 21 deletions pkg/agent/config/node_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@ const (
)

const (
vxlanOverhead = 50
geneveOverhead = 50
greOverhead = 38
vxlanOverhead = 50
geneveOverhead = 50
// GRE overhead: 14-byte outer MAC, 20-byte outer IPv4, 8-byte GRE header (4-byte standard header + 4-byte key field)
greOverhead = 42

ipv6ExtraOverhead = 20

WireGuardOverhead = 80
// WireGuard overhead: 20-byte outer IPv4, 8-byte UDP header, 4-byte type, 4-byte key index, 8-byte nonce, 16-byte authentication tag
WireGuardOverhead = 60
// IPsec ESP can add a maximum of 38 bytes to the packet including the ESP
// header and trailer.
IPSecESPOverhead = 38
Expand Down Expand Up @@ -201,14 +204,19 @@ type NetworkConfig struct {
TransportIfaceCIDRs []string
IPv4Enabled bool
IPv6Enabled bool
// MTUDeduction only counts IPv4 tunnel overhead, no IPsec and WireGuard overhead.
// MTUDeduction is the MTU deduction for encapsulation and encryption in cluster.
MTUDeduction int
// WireGuardMTUDeduction is the MTU deduction for WireGuard encryption.
// It is calculated based on whether IPv6 is used.
WireGuardMTUDeduction int
// Set by the defaultMTU config option or auto discovered.
// Auto discovery will use MTU value of the Node's transport interface.
// For Encap and Hybrid mode, InterfaceMTU will be adjusted to account for
// encap header.
InterfaceMTU int
EnableMulticlusterGW bool
InterfaceMTU int

EnableMulticlusterGW bool
MulticlusterEncryptionMode TrafficEncryptionModeType
}

// IsIPv4Enabled returns true if the cluster network supports IPv4. Legal cases are:
Expand Down Expand Up @@ -264,24 +272,50 @@ func (nc *NetworkConfig) NeedsDirectRoutingToPeer(peerIP net.IP, localIP *net.IP
return (nc.TrafficEncapMode == TrafficEncapModeNoEncap || nc.TrafficEncapMode == TrafficEncapModeHybrid) && localIP.Contains(peerIP)
}

func (nc *NetworkConfig) getEncapMTUDeduction(isIPv6 bool) int {
var deduction int
if nc.TunnelType == ovsconfig.VXLANTunnel {
deduction = vxlanOverhead
} else if nc.TunnelType == ovsconfig.GeneveTunnel {
deduction = geneveOverhead
} else if nc.TunnelType == ovsconfig.GRETunnel {
deduction = greOverhead
} else {
return 0
}
if isIPv6 {
deduction += ipv6ExtraOverhead
}
return deduction
}

func (nc *NetworkConfig) CalculateMTUDeduction(isIPv6 bool) int {
var mtuDeduction int
// When Multi-cluster Gateway is enabled, we need to reduce MTU for potential cross-cluster traffic.
if nc.TrafficEncapMode.SupportsEncap() || nc.EnableMulticlusterGW {
if nc.TunnelType == ovsconfig.VXLANTunnel {
mtuDeduction = vxlanOverhead
} else if nc.TunnelType == ovsconfig.GeneveTunnel {
mtuDeduction = geneveOverhead
} else if nc.TunnelType == ovsconfig.GRETunnel {
mtuDeduction = greOverhead
}
nc.WireGuardMTUDeduction = WireGuardOverhead
if isIPv6 {
nc.WireGuardMTUDeduction += ipv6ExtraOverhead
}

if nc.TrafficEncapMode.SupportsEncap() && isIPv6 {
mtuDeduction += ipv6ExtraOverhead
if nc.EnableMulticlusterGW {
nc.MTUDeduction = nc.getEncapMTUDeduction(isIPv6)
// When multi-cluster WireGuard is enabled, cross-cluster traffic will be encapsulated and encrypted, we need to
// reduce MTU for both encapsulation and encryption.
if nc.MulticlusterEncryptionMode == TrafficEncryptionModeWireGuard {
nc.MTUDeduction += nc.WireGuardMTUDeduction
}
return nc.MTUDeduction
}
if nc.TrafficEncapMode.SupportsEncap() {
nc.MTUDeduction = nc.getEncapMTUDeduction(isIPv6)
}
if nc.TrafficEncryptionMode == TrafficEncryptionModeWireGuard {
// When WireGuard is enabled, cross-node traffic will only be encrypted, just reduce MTU for encryption.
nc.MTUDeduction = nc.WireGuardMTUDeduction
} else if nc.TrafficEncryptionMode == TrafficEncryptionModeIPSec {
// When IPsec is enabled, cross-node traffic will be encapsulated and encrypted, we need to reduce MTU for both
// encapsulation and encryption.
nc.MTUDeduction += IPSecESPOverhead
}
nc.MTUDeduction = mtuDeduction
return mtuDeduction
return nc.MTUDeduction
}

// ServiceConfig includes K8s Service CIDR and available IP addresses for NodePort.
Expand Down
48 changes: 47 additions & 1 deletion pkg/agent/config/node_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,14 +298,60 @@ func TestCalculateMTUDeduction(t *testing.T) {
{
name: "GRE encap without IPv6",
nc: &NetworkConfig{TunnelType: ovsconfig.GRETunnel},
expectedMTUDeduction: 38,
expectedMTUDeduction: 42,
},
{
name: "Default encap with IPv6",
nc: &NetworkConfig{TunnelType: ovsconfig.GeneveTunnel},
isIPv6: true,
expectedMTUDeduction: 70,
},
{
name: "WireGuard enabled",
nc: &NetworkConfig{TrafficEncryptionMode: TrafficEncryptionModeWireGuard},
expectedMTUDeduction: 60,
},
{
name: "IPv6 with WireGuard enabled",
nc: &NetworkConfig{TrafficEncryptionMode: TrafficEncryptionModeWireGuard},
isIPv6: true,
expectedMTUDeduction: 80,
},
{
name: "Multicluster enabled with Geneve encap",
nc: &NetworkConfig{TunnelType: ovsconfig.GeneveTunnel, EnableMulticlusterGW: true},
expectedMTUDeduction: 50,
},
{
name: "Geneve encap with Multicluster WireGuard enabled",
nc: &NetworkConfig{
TunnelType: ovsconfig.GeneveTunnel,
EnableMulticlusterGW: true,
MulticlusterEncryptionMode: TrafficEncryptionModeWireGuard,
},
expectedMTUDeduction: 110,
},
{
name: "Geneve encap with IPSec enabled",
nc: &NetworkConfig{TunnelType: ovsconfig.GeneveTunnel, TrafficEncryptionMode: TrafficEncryptionModeIPSec},
expectedMTUDeduction: 88,
},
{
name: "Geneve encap with IPSec enabled and IPv6",
nc: &NetworkConfig{TunnelType: ovsconfig.GeneveTunnel, TrafficEncryptionMode: TrafficEncryptionModeIPSec},
isIPv6: true,
expectedMTUDeduction: 108,
},
{
name: "VXLan encap with IPSec enabled",
nc: &NetworkConfig{TunnelType: ovsconfig.VXLANTunnel, TrafficEncryptionMode: TrafficEncryptionModeIPSec},
expectedMTUDeduction: 88,
},
{
name: "GRE encap with IPSec enabled",
nc: &NetworkConfig{TunnelType: ovsconfig.GRETunnel, TrafficEncryptionMode: TrafficEncryptionModeIPSec},
expectedMTUDeduction: 80,
},
}

for _, tt := range tests {
Expand Down
4 changes: 3 additions & 1 deletion pkg/agent/multicluster/mc_route_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ func NewMCDefaultRouteController(
controller.wireGuardConfig = &config.WireGuardConfig{
Port: multiclusterConfig.WireGuard.Port,
Name: multiclusterWireGuardInterface,
MTU: controller.nodeConfig.NodeTransportInterfaceMTU - controller.networkConfig.MTUDeduction - config.WireGuardOverhead,
// Regardless of the tunnel type, the WireGuard device must only reduce MTU for encryption because the
// packets it transmits have been encapsulated.
MTU: nodeConfig.NodeTransportInterfaceMTU - networkConfig.WireGuardMTUDeduction,
}
}
controller.gwInformer.Informer().AddEventHandlerWithResyncPeriod(
Expand Down
15 changes: 11 additions & 4 deletions pkg/agent/wireguard/client_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var _ Interface = (*client)(nil)
var (
linkAdd = netlink.LinkAdd
linkSetUp = netlink.LinkSetUp
linkSetMTU = netlink.LinkSetMTU
utilConfigureLinkAddresses = util.ConfigureLinkAddresses
)

Expand Down Expand Up @@ -85,12 +86,18 @@ func New(nodeConfig *config.NodeConfig, wireGuardConfig *config.WireGuardConfig)
func (client *client) Init(ipv4 net.IP, ipv6 net.IP) (string, error) {
link := &netlink.Wireguard{LinkAttrs: netlink.LinkAttrs{Name: client.wireGuardConfig.Name, MTU: client.wireGuardConfig.MTU}}
err := linkAdd(link)
// Ignore existing link as it may have already been created or managed by userspace process.
if err != nil && !errors.Is(err, unix.EEXIST) {
if errors.Is(err, unix.EOPNOTSUPP) {
if err != nil {
// Ignore existing link as it may have already been created or managed by userspace process, just ensure the MTU
// is set correctly.
if errors.Is(err, unix.EEXIST) {
if err := linkSetMTU(link, client.wireGuardConfig.MTU); err != nil {
return "", fmt.Errorf("failed to change WireGuard link MTU to %d: %w", client.wireGuardConfig.MTU, err)
}
} else if errors.Is(err, unix.EOPNOTSUPP) {
return "", fmt.Errorf("WireGuard not supported by the Linux kernel (netlink: %w), make sure the WireGuard kernel module is loaded", err)
} else {
return "", err
}
return "", err
}
if err := linkSetUp(link); err != nil {
return "", err
Expand Down
20 changes: 17 additions & 3 deletions pkg/agent/wireguard/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ func Test_Init(t *testing.T) {
tests := []struct {
name string
linkAddErr error
lindSetupErr error
linkSetUpErr error
linkSetMTUErr error
utilConfigErr error
expectedErr string
extraIPv4 net.IP
Expand All @@ -404,14 +405,24 @@ func Test_Init(t *testing.T) {
linkAddErr: unix.EOPNOTSUPP,
expectedErr: "WireGuard not supported by the Linux kernel (netlink: operation not supported), make sure the WireGuard kernel module is loaded",
},
{
name: "init successfully with unix.EEXIST error",
linkAddErr: unix.EEXIST,
},
{
name: "failed to init due to linkSetMTU error",
linkAddErr: unix.EEXIST,
linkSetMTUErr: errors.New("link set mtu failed"),
expectedErr: "failed to change WireGuard link MTU to 1420: link set mtu failed",
},
{
name: "failed to init due to link add error",
linkAddErr: errors.New("link add failed"),
expectedErr: "link add failed",
},
{
name: "failed to init due to link setup error",
lindSetupErr: errors.New("link setup failed"),
linkSetUpErr: errors.New("link setup failed"),
expectedErr: "link setup failed",
},
{
Expand Down Expand Up @@ -441,7 +452,10 @@ func Test_Init(t *testing.T) {
return tt.linkAddErr
}
linkSetUp = func(link netlink.Link) error {
return tt.lindSetupErr
return tt.linkSetUpErr
}
linkSetMTU = func(link netlink.Link, mtu int) error {
return tt.linkSetMTUErr
}
utilConfigureLinkAddresses = func(idx int, ipNets []*net.IPNet) error {
return tt.utilConfigErr
Expand Down
10 changes: 5 additions & 5 deletions test/e2e/antreaipam_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,16 +267,16 @@ func testAntreaIPAMPodConnectivitySameNode(t *testing.T, data *TestData) {
})
workerNode := workerNodeName(1)

t.Logf("Creating %d agnhost Pods on '%s'", numPods+1, workerNode)
t.Logf("Creating %d toolbox Pods on '%s'", numPods+1, workerNode)
for i := range podInfos {
podInfos[i].os = clusterInfo.nodesOS[workerNode]
if err := data.createAgnhostPodOnNodeWithAnnotations(podInfos[i].name, podInfos[i].namespace, workerNode, nil); err != nil {
t.Fatalf("Error when creating agnhost test Pod '%s': %v", podInfos[i], err)
if err := data.createToolboxPodOnNode(podInfos[i].name, podInfos[i].namespace, workerNode, false); err != nil {
t.Fatalf("Error when creating toolbox test Pod '%s': %v", podInfos[i], err)
}
defer deletePodWrapper(t, data, podInfos[i].namespace, podInfos[i].name)
}

data.runPingMesh(t, podInfos, agnhostContainerName)
data.runPingMesh(t, podInfos, toolboxContainerName, true)
}

func testAntreaIPAMPodConnectivityDifferentNodes(t *testing.T, data *TestData) {
Expand All @@ -290,7 +290,7 @@ func testAntreaIPAMPodConnectivityDifferentNodes(t *testing.T, data *TestData) {
}
podInfos = append(podInfos, createdPodInfos...)
}
data.runPingMesh(t, podInfos, agnhostContainerName)
data.runPingMesh(t, podInfos, toolboxContainerName, true)
}

func testAntreaIPAMStatefulSet(t *testing.T, data *TestData, dedicatedIPPoolKey *string) {
Expand Down
Loading
Loading