Skip to content

Commit

Permalink
Drop eth0 packets in PREROUTING on Kind Nodes (#2143)
Browse files Browse the repository at this point in the history
According to the OVS documentation:
On Linux, when a physical interface is in use by the userspace datapath,
packets received on the interface still also pass into the kernel TCP/IP
stack. This can cause surprising and incorrect behavior. You can use
"iptables" to avoid this behavior, by using it to drop received packets.

The OVS documentation suggests dropping packets in the INPUT and FORWARD
chains. However, this is not sufficient for some edge cases. For
example, when receiving a TCP RST packet, the packet will clear the
conntrack entry for the TCP connection before it can be dropped, which
can cause the "second" TCP RST packet (the one processed by OVS
userspace) to be marked as invalid when going through conntrack.

So instead we drop the packet in PREROUTING:
iptables -t raw -A PREROUTING -i eth0 -j DROP
This rule is added to the start_ovs_netdev script.

By adding this rule, we no longer need to skip TCP e2e tests for the
Reject NetworkPolicy Action in Kind clusters.

It's possible that this is also going to help with various connectivity
issues we observed with Antrea in Kind over time. For example, I believe
we are also able to remove the hack which reduces the value of the
tcp_retries2 sysctl parameter.

Fixes #2025

Signed-off-by: Antonin Bas <abas@vmware.com>
  • Loading branch information
antoninbas authored Jun 16, 2021
1 parent eeb89f6 commit 297bced
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 26 deletions.
24 changes: 18 additions & 6 deletions build/images/scripts/start_ovs_netdev
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ source daemon_status
CONTAINER_NAME="antrea-ovs"
OVS_DB_FILE="/var/run/openvswitch/conf.db"

set -euo pipefail

hwaddr=$(ip link show eth0 | grep link/ether | awk '{print $2}')
inet=$(ip addr show eth0 | grep "inet " | awk '{ print $2 }')
gw=$(ip route | grep default | awk '{ print $3 }')
if ! ip addr show eth0 > /dev/null 2>&1; then
log_error $CONTAINER_NAME "No eth0 uplink found, exiting"
exit 1
fi

# Modify ovs-ctl so that the kernel module is no longer loaded since it is not
# needed when using OVS in userspace mode. It also enables running OVS with the
Expand All @@ -23,7 +22,10 @@ function fix_ovs_ctl {
# See http://docs.openvswitch.org/en/latest/howto/userspace-tunneling/
function add_br_phy {
log_info $CONTAINER_NAME "Creating OVS br-phy bridge for netdev datapath type"
ovs-vsctl --may-exist add-br br-phy \
hwaddr=$(ip link show eth0 | grep link/ether | awk '{print $2}')
inet=$(ip addr show eth0 | grep "inet " | awk '{ print $2 }')
gw=$(ip route | grep default | awk '{ print $3 }')
ovs-vsctl add-br br-phy \
-- set Bridge br-phy datapath_type=netdev \
-- br-set-external-id br-phy bridge-id br-phy \
-- set bridge br-phy fail-mode=standalone \
Expand All @@ -35,15 +37,19 @@ function add_br_phy {
ip addr flush dev eth0 2>/dev/null
ip link set eth0 up
ip route add default via "$gw" dev br-phy
iptables -t raw -A PREROUTING -i eth0 -j DROP
}

function del_br_phy {
inet=$(ip addr show br-phy | grep "inet " | awk '{ print $2 }')
gw=$(ip route | grep default | awk '{ print $3 }')
log_info $CONTAINER_NAME "Deleting OVS br-phy bridge"
ovs-vsctl del-port br-phy eth0
ovs-vsctl del-br br-phy
ip addr add "$inet" dev eth0
ip link set eth0 up
ip route add default via "$gw" dev eth0
iptables -t raw -D PREROUTING -i eth0 -j DROP
}

function start_ovs {
Expand Down Expand Up @@ -73,6 +79,8 @@ function quit {
exit 0
}

set -euo pipefail

# Do not trap EXIT as it would then ignore the "exit 0" statement in quit and
# exit with code 128 + SIGNAL
trap "quit" INT TERM
Expand All @@ -87,6 +95,10 @@ chmod 0640 $OVS_DB_FILE
if [[ "$#" -ge 1 ]] && [[ "$1" == "--start-ovs-only" ]]; then
exit 0
fi
if ip addr show br-phy > /dev/null 2>&1; then
log_info $CONTAINER_NAME "OVS bridge br-phy already exists, attempting clean-up first"
del_br_phy || true
fi
add_br_phy

log_info $CONTAINER_NAME "Started the loop that checks OVS status every 30 seconds"
Expand Down
2 changes: 0 additions & 2 deletions ci/kind/kind-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ function modify {
peerName=$(docker run --net=host antrea/ethtool:latest ip link | grep ^"$peerIdx": | awk -F[:@] '{ print $2 }' | cut -c 2-)
echo "Disabling TX checksum offload for node $node ($peerName)"
docker run --net=host --privileged antrea/ethtool:latest ethtool -K "$peerName" tx off
# Workaround for https://github.com/antrea-io/antrea/issues/324
docker exec "$node" sysctl -w net.ipv4.tcp_retries2=4
}

function configure_networks {
Expand Down
2 changes: 0 additions & 2 deletions hack/kind-fix-networking.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,4 @@ for node in "$@"; do
peerName=$(docker run --net=host antrea/ethtool:latest ip link | grep ^"$peerIdx": | awk -F[:@] '{ print $2 }' | cut -c 2-)
echo "Disabling TX checksum offload for node $node ($peerName)"
docker run --net=host --privileged antrea/ethtool:latest ethtool -K "$peerName" tx off
# Workaround for https://github.com/antrea-io/antrea/issues/324
docker exec "$node" sysctl -w net.ipv4.tcp_retries2=4
done
6 changes: 0 additions & 6 deletions test/e2e/antreapolicy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"

"antrea.io/antrea/pkg/agent/config"
crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1"
crdv1alpha2 "antrea.io/antrea/pkg/apis/crd/v1alpha2"
crdv1alpha3 "antrea.io/antrea/pkg/apis/crd/v1alpha3"
Expand Down Expand Up @@ -1799,11 +1798,6 @@ func testACNPRejectEgress(t *testing.T) {

// testACNPRejectIngress tests that a ACNP is able to reject egress traffic from pods labelled A to namespace Z.
func testACNPRejectIngress(t *testing.T, data *TestData, protocol v1.Protocol) {
// TCP rejection can't work on Kind when the traffic mode is noEncap. Skip it.
// https://github.com/antrea-io/antrea/issues/2025
if protocol == v1.ProtocolTCP {
skipIfEncapModeIsNotAndProviderIs(t, data, config.TrafficEncapModeEncap, "kind")
}
builder := &ClusterNetworkPolicySpecBuilder{}
builder = builder.SetName("acnp-reject-a-from-z-ingress").
SetPriority(1.0).
Expand Down
10 changes: 0 additions & 10 deletions test/e2e/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,6 @@ func skipIfEncapModeIsNot(tb testing.TB, data *TestData, encapMode config.Traffi
}
}

func skipIfEncapModeIsNotAndProviderIs(tb testing.TB, data *TestData, encapMode config.TrafficEncapModeType, name string) {
currentEncapMode, err := data.GetEncapMode()
if err != nil {
tb.Fatalf("Failed to get encap mode: %v", err)
}
if currentEncapMode != encapMode && testOptions.providerName == name {
tb.Skipf("Skipping test when encap mode is '%s' and provider is '%s', test requires '%s'", currentEncapMode.String(), name, encapMode.String())
}
}

func skipIfHasWindowsNodes(tb testing.TB) {
if len(clusterInfo.windowsNodes) != 0 {
tb.Skipf("Skipping test as the cluster has Windows Nodes")
Expand Down

0 comments on commit 297bced

Please sign in to comment.