Skip to content

Commit

Permalink
Fix service traffic requiring SNAT (#2703)
Browse files Browse the repository at this point in the history
The service traffic requiring SNAT couldn't be transferred to peer Node
when the endpoint Pod is on another Node. This was because we didn't
set any address on WireGuard device antrea-wg0. Therefore, when
iptables MASQUERADE action took effect, it chose one IP from other
interfaces, which might not be the gateway address on antrea-gw0. This
caused two problems:

1. Peer wireguard didn't accept the packet as its source address was not
in its "allowed ips"

2. Peer Node wouldn't route the response back via the encrypted tunnel
as the destination IP was not in its "allowed ips"

This patch fixes it by assigning the gateway IPs on the WireGuard
device. But it uses "/32" mask for IPv4 address and "/128" mask for
IPv6 address to avoid impacting routes on Antrea gateway.

Signed-off-by: Quan Tian <qtian@vmware.com>
  • Loading branch information
tnqn authored Sep 2, 2021
1 parent aa6218f commit b8a6983
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 10 deletions.
17 changes: 9 additions & 8 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,15 @@ func (i *Initializer) Initialize() error {
return err
}

if err := i.prepareHostNetwork(); err != nil {
return err
}

if err := i.setupOVSBridge(); err != nil {
return err
}

// initializeWireGuard must be executed after setupOVSBridge as it requires gateway addresses on the OVS bridge.
switch i.networkConfig.TrafficEncryptionMode {
case config.TrafficEncryptionModeIPSec:
if err := i.initializeIPSec(); err != nil {
Expand All @@ -280,14 +289,6 @@ func (i *Initializer) Initialize() error {
}
}

if err := i.prepareHostNetwork(); err != nil {
return err
}

if err := i.setupOVSBridge(); err != nil {
return err
}

wg.Add(1)
// routeClient.Initialize() should be after i.setupOVSBridge() which
// creates the host gateway interface.
Expand Down
24 changes: 24 additions & 0 deletions pkg/agent/wireguard/client_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (

"antrea.io/antrea/pkg/agent/config"
"antrea.io/antrea/pkg/agent/types"
"antrea.io/antrea/pkg/agent/util"
)

const defaultWireGuardInterfaceName = "antrea-wg0"
Expand All @@ -61,6 +62,7 @@ type client struct {
privateKey wgtypes.Key
peerPublicKeyByNodeName *sync.Map
wireGuardConfig *config.WireGuardConfig
gatewayConfig *config.GatewayConfig
}

func New(clientSet clientset.Interface, nodeConfig *config.NodeConfig, wireGuardConfig *config.WireGuardConfig) (Interface, error) {
Expand All @@ -77,6 +79,7 @@ func New(clientSet clientset.Interface, nodeConfig *config.NodeConfig, wireGuard
k8sClient: clientSet,
wireGuardConfig: wireGuardConfig,
peerPublicKeyByNodeName: &sync.Map{},
gatewayConfig: nodeConfig.GatewayConfig,
}
return c, nil
}
Expand All @@ -94,6 +97,27 @@ func (client *client) Init() error {
if err := netlink.LinkSetUp(link); err != nil {
return err
}
// Configure the IP addresses same as Antrea gateway so iptables MASQUERADE target will select it as source address.
// It's necessary to make Service traffic requiring SNAT (e.g. host to ClusterIP, external to NodePort) accepted by
// peer Node and to make their response routed back correctly.
// It uses "/32" mask for IPv4 address and "/128" mask for IPv6 address to avoid impacting routes on Antrea gateway.
var gatewayIPs []*net.IPNet
if client.gatewayConfig.IPv4 != nil {
gatewayIPs = append(gatewayIPs, &net.IPNet{
IP: client.gatewayConfig.IPv4,
Mask: net.CIDRMask(32, 32),
})
}
if client.gatewayConfig.IPv6 != nil {
gatewayIPs = append(gatewayIPs, &net.IPNet{
IP: client.gatewayConfig.IPv6,
Mask: net.CIDRMask(128, 128),
})
}
// This must be executed after netlink.LinkSetUp as the latter ensures link.Attrs().Index is set.
if err := util.ConfigureLinkAddresses(link.Attrs().Index, gatewayIPs); err != nil {
return err
}
client.wireGuardConfig.LinkIndex = link.Attrs().Index
wgDev, err := client.wgClient.Device(client.wireGuardConfig.Name)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/wireguard/client_windows.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build windows
// +build windows

// Copyright 2021 Antrea Authors
Expand Down
29 changes: 27 additions & 2 deletions test/e2e/wireguard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ package e2e

import (
"fmt"
"net"
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"

"antrea.io/antrea/pkg/agent/config"
"antrea.io/antrea/pkg/apis"
Expand Down Expand Up @@ -66,7 +69,8 @@ func TestWireGuard(t *testing.T) {
defer data.redeployAntrea(t, deployAntreaDefault)
}

t.Run("testWireGuardTunnelConnectivity", func(t *testing.T) { testWireGuardTunnelConnectivity(t, data) })
t.Run("testPodConnectivity", func(t *testing.T) { testPodConnectivity(t, data) })
t.Run("testServiceConnectivity", func(t *testing.T) { testServiceConnectivity(t, data) })
}

func (data *TestData) getWireGuardPeerEndpointsWithHandshake(nodeName string) ([]string, error) {
Expand Down Expand Up @@ -101,7 +105,7 @@ func (data *TestData) getWireGuardPeerEndpointsWithHandshake(nodeName string) ([
return peerEndpoints, nil
}

func testWireGuardTunnelConnectivity(t *testing.T, data *TestData) {
func testPodConnectivity(t *testing.T, data *TestData) {
podInfos, deletePods := createPodsOnDifferentNodes(t, data, "differentnodes")
defer deletePods()
numPods := 2
Expand All @@ -123,3 +127,24 @@ func testWireGuardTunnelConnectivity(t *testing.T, data *TestData) {
assert.Contains(t, endpoints, fmt.Sprintf("%s:%d", nodeIP, apis.WireGuardListenPort))
}
}

// testServiceConnectivity verifies host-to-service can be transferred through the encrypted tunnel correctly.
func testServiceConnectivity(t *testing.T, data *TestData) {
clientPodName := "hostnetwork-pod"
svcName := "agnhost"
clientPodNode := nodeName(0)
serverPodNode := nodeName(1)
svc, cleanup := data.createAgnhostServiceAndBackendPods(t, svcName, serverPodNode, corev1.ServiceTypeNodePort)
defer cleanup()

// Create the a hostNetwork Pod on a Node different from the service's backend Pod, so the service traffic will be transferred across the tunnel.
require.NoError(t, data.createPodOnNode(clientPodName, testNamespace, clientPodNode, busyboxImage, []string{"sleep", strconv.Itoa(3600)}, nil, nil, nil, true, nil))
defer data.deletePodAndWait(defaultTimeout, clientPodName, testNamespace)
require.NoError(t, data.podWaitForRunning(defaultTimeout, clientPodName, testNamespace))

err := data.runNetcatCommandFromTestPod(clientPodName, testNamespace, svc.Spec.ClusterIP, 80)
require.NoError(t, err, "Pod %s should be able to connect the service's ClusterIP %s, but was not able to connect", clientPodName, net.JoinHostPort(svc.Spec.ClusterIP, fmt.Sprint(80)))

err = data.runNetcatCommandFromTestPod(clientPodName, testNamespace, "127.0.0.1", svc.Spec.Ports[0].NodePort)
require.NoError(t, err, "Pod %s should be able to connect the service's NodePort 127.0.0.1:%s, but was not able to connect", clientPodName, svc.Spec.Ports[0].NodePort)
}

0 comments on commit b8a6983

Please sign in to comment.