From 09a97d9be9ae87857577b18e5fd202ae71ca54d6 Mon Sep 17 00:00:00 2001 From: Jianjun Shen Date: Mon, 22 Feb 2021 01:37:32 -0500 Subject: [PATCH] Refactor Windows SNAT flows for SNAT policy implementation Separate common flows and Windows only flows for SNAT. Add a new snatTable for looking up the SNAT IPs of external traffic. --- pkg/agent/agent.go | 6 +- pkg/agent/agent_linux.go | 5 - pkg/agent/agent_windows.go | 13 -- pkg/agent/openflow/client.go | 33 +-- pkg/agent/openflow/pipeline.go | 222 +++---------------- pkg/agent/openflow/pipeline_other.go | 45 ++++ pkg/agent/openflow/pipeline_windows.go | 272 ++++++++++++++++++++++++ test/integration/agent/openflow_test.go | 16 +- 8 files changed, 366 insertions(+), 246 deletions(-) create mode 100644 pkg/agent/openflow/pipeline_other.go create mode 100644 pkg/agent/openflow/pipeline_windows.go diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 62ab7501895..6a69f6ba32f 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -294,9 +294,9 @@ func (i *Initializer) initOpenFlowPipeline() error { return err } - // On Windows platform, extra flows are needed to perform SNAT for the - // traffic to external network. - if err := i.initExternalConnectivityFlows(); err != nil { + // Install OpenFlow entries to enable Pod traffic to external IP + // addresses. + if err := i.ofClient.InstallExternalFlows(); err != nil { klog.Errorf("Failed to install openflow entries for external connectivity: %v", err) return err } diff --git a/pkg/agent/agent_linux.go b/pkg/agent/agent_linux.go index d6b899ee3c4..e9158572e92 100644 --- a/pkg/agent/agent_linux.go +++ b/pkg/agent/agent_linux.go @@ -35,11 +35,6 @@ func (i *Initializer) initHostNetworkFlows() error { return nil } -// initExternalConnectivityFlows returns immediately on Linux. The corresponding functions are provided in routeClient. -func (i *Initializer) initExternalConnectivityFlows() error { - return nil -} - // getTunnelLocalIP returns local_ip of tunnel port. // On linux platform, local_ip option is not needed. func (i *Initializer) getTunnelPortLocalIP() net.IP { diff --git a/pkg/agent/agent_windows.go b/pkg/agent/agent_windows.go index 799220d02ba..13a8ab745c5 100644 --- a/pkg/agent/agent_windows.go +++ b/pkg/agent/agent_windows.go @@ -188,19 +188,6 @@ func (i *Initializer) initHostNetworkFlows() error { return nil } -// initExternalConnectivityFlows installs OpenFlow entries to SNAT Pod traffic -// using Node IP, and then Pod could communicate to the external IP addresses. -func (i *Initializer) initExternalConnectivityFlows() error { - if i.nodeConfig.PodIPv4CIDR == nil { - return fmt.Errorf("Failed to find valid IPv4 PodCIDR") - } - // Install OpenFlow entries on the OVS to enable Pod traffic to communicate to external IP addresses. - if err := i.ofClient.InstallExternalFlows(); err != nil { - return err - } - return nil -} - // getTunnelLocalIP returns local_ip of tunnel port func (i *Initializer) getTunnelPortLocalIP() net.IP { return i.nodeConfig.NodeIPAddr.IP diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 4848b8682b8..0b63c8c2a8c 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -150,10 +150,11 @@ type Client interface { // This function is only used for Windows platform. InstallBridgeUplinkFlows() error - // InstallExternalFlows sets up flows to enable Pods to communicate to the external IP addresses. The corresponding - // OpenFlow entries include: 1) identify the packets from local Pods to the external IP address, 2) mark the traffic - // in the connection tracking context, and 3) SNAT the packets with Node IP. - // This function is only used for Windows platform. + // InstallExternalFlows sets up flows to enable Pods to communicate to + // the external IP addresses. The flows identify the packets from local + // Pods to the external IP address, and mark the packets to be SNAT'd + // with the configured SNAT IPs. On Windows Node, the flows also perform + // SNAT with the Openflow NAT action. InstallExternalFlows() error // Disconnect disconnects the connection between client and OFSwitch. @@ -465,19 +466,11 @@ func (c *client) UninstallServiceFlows(svcIP net.IP, svcPort uint16, protocol bi } func (c *client) InstallLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { - c.replayMutex.RLock() - defer c.replayMutex.RUnlock() - var flows []binding.Flow - flows = append(flows, c.loadBalancerServiceFromOutsideFlow(svcIP, svcPort, protocol)) - cacheKey := fmt.Sprintf("LoadBalancerService_%s_%d_%s", svcIP, svcPort, protocol) - return c.addFlows(c.serviceFlowCache, cacheKey, flows) + return c.installLoadBalancerServiceFromOutsideFlows(svcIP, svcPort, protocol) } func (c *client) UninstallLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { - c.replayMutex.RLock() - defer c.replayMutex.RUnlock() - cacheKey := fmt.Sprintf("LoadBalancerService_%s_%d_%s", svcIP, svcPort, protocol) - return c.deleteFlows(c.serviceFlowCache, cacheKey) + return c.uninstallLoadBalancerServiceFromOutsideFlows(svcIP, svcPort, protocol) } func (c *client) InstallClusterServiceFlows() error { @@ -557,13 +550,7 @@ func (c *client) InstallDefaultTunnelFlows() error { } func (c *client) InstallBridgeUplinkFlows() error { - flows := c.hostBridgeUplinkFlows(*c.nodeConfig.PodIPv4CIDR, cookie.Default) - c.hostNetworkingFlows = flows - if err := c.ofEntryOperations.AddAll(flows); err != nil { - return err - } - c.hostNetworkingFlows = flows - return nil + return c.installBridgeUplinkFlows() } func (c *client) initialize() error { @@ -633,8 +620,8 @@ func (c *client) Initialize(roundInfo types.RoundInfo, nodeConfig *config.NodeCo func (c *client) InstallExternalFlows() error { nodeIP := c.nodeConfig.NodeIPAddr.IP podSubnet := c.nodeConfig.PodIPv4CIDR - flows := c.uplinkSNATFlows(cookie.SNAT) - flows = append(flows, c.snatFlows(nodeIP, *podSubnet, cookie.SNAT)...) + localGatewayMAC := c.nodeConfig.GatewayConfig.MAC + flows := c.externalFlows(nodeIP, *podSubnet, localGatewayMAC) if err := c.ofEntryOperations.AddAll(flows); err != nil { return fmt.Errorf("failed to install flows for external communication: %v", err) } diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index e71c5af2b97..b64eabb8a77 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -57,7 +57,8 @@ const ( EgressDefaultTable binding.TableIDType = 60 EgressMetricTable binding.TableIDType = 61 l3ForwardingTable binding.TableIDType = 70 - l3DecTTLTable binding.TableIDType = 71 + snatTable binding.TableIDType = 71 + l3DecTTLTable binding.TableIDType = 72 l2ForwardingCalcTable binding.TableIDType = 80 AntreaPolicyIngressRuleTable binding.TableIDType = 85 DefaultTierIngressRuleTable binding.TableIDType = 89 @@ -83,7 +84,6 @@ const ( markTrafficFromGateway = 1 markTrafficFromLocal = 2 markTrafficFromUplink = 4 - markTrafficFromBridge = 5 // IPv6 multicast prefix ipv6MulticastAddr = "FF00::/8" @@ -120,7 +120,9 @@ var ( {EgressRuleTable, "EgressRule"}, {EgressDefaultTable, "EgressDefaultRule"}, {EgressMetricTable, "EgressMetric"}, - {l3ForwardingTable, "l3Forwarding"}, + {l3ForwardingTable, "L3Forwarding"}, + {snatTable, "SNAT"}, + {l3DecTTLTable, "IPTTLDec"}, {l2ForwardingCalcTable, "L2Forwarding"}, {AntreaPolicyIngressRuleTable, "AntreaPolicyIngressRule"}, {IngressRuleTable, "IngressRule"}, @@ -223,23 +225,9 @@ const ( CtZone = 0xfff0 CtZoneV6 = 0xffe6 - // CtZoneSNAT is only used on Windows and only when AntreaProxy is enabled. - // When a Pod access a ClusterIP Service, and the IP of the selected endpoint - // is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP) - // after have been DNAT'd(set dst IP to endpoint IP). - // For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint - // will is the current Node IP. - // We need to use a different ct_zone to track the SNAT'd connection because OVS - // does not support doing both DNAT and SNAT in the same ct_zone. - // - // An example of the connection is a Pod accesses kubernetes API service: - // Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> Endpoint(API server NodeIP) - // Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- Endpoint(API server NodeIP) - CtZoneSNAT = 0xffdc - - portFoundMark = 0b1 - snatRequiredMark = 0b1 - hairpinMark = 0b1 + + portFoundMark = 0b1 + hairpinMark = 0b1 // macRewriteMark indicates the destination and source MACs of the // packet should be rewritten in the l3ForwardingTable. macRewriteMark = 0b1 @@ -248,7 +236,6 @@ const ( // gatewayCTMark is used to to mark connections initiated through the host gateway interface // (i.e. for which the first packet of the connection was received through the gateway). gatewayCTMark = 0x20 - snatCTMark = 0x40 ServiceCTMark = 0x21 // disposition is loaded in marksReg [21] @@ -269,11 +256,8 @@ var ( // ofPortMarkRange takes the 16th bit of register marksReg to indicate if the ofPort number of an interface // is found or not. Its value is 0x1 if yes. ofPortMarkRange = binding.Range{16, 16} - // ofPortRegRange takes a 32-bit range of register PortCacheReg to cache the ofPort number of the interface. + // ofPortRegRange takes a 31-bit range of register PortCacheReg to cache the ofPort number of the interface. ofPortRegRange = binding.Range{0, 31} - // snatMarkRange takes the 17th bit of register marksReg to indicate if the packet needs to be SNATed with Node's IP - // or not. Its value is 0x1 if yes. - snatMarkRange = binding.Range{17, 17} // hairpinMarkRange takes the 18th bit of register marksReg to indicate // if the packet needs DNAT to virtual IP or not. Its value is 0x1 if yes. hairpinMarkRange = binding.Range{18, 18} @@ -335,6 +319,7 @@ func portToUint16(port int) uint16 { type client struct { enableProxy bool enableAntreaPolicy bool + enableSNATPolicy bool roundInfo types.RoundInfo cookieAllocator cookie.Allocator bridge binding.Bridge @@ -419,6 +404,9 @@ func (c *client) Delete(flow binding.Flow) error { } func (c *client) AddAll(flows []binding.Flow) error { + if len(flows) == 0 { + return nil + } startTime := time.Now() defer func() { d := time.Since(startTime) @@ -1504,125 +1492,13 @@ func (c *client) localProbeFlow(localGatewayIPs []net.IP, category cookie.Catego return flows } -// hostBridgeUplinkFlows generates the flows that forward traffic between the -// bridge local port and the uplink port to support the host traffic with -// outside. These flows are needed only on Windows Nodes. -func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Category) (flows []binding.Flow) { - bridgeOFPort := uint32(config.BridgeOFPort) - flows = []binding.Flow{ - c.pipeline[ClassifierTable].BuildFlow(priorityNormal). - MatchInPort(config.UplinkOFPort). - Action().LoadRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().GotoTable(uplinkTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - c.pipeline[ClassifierTable].BuildFlow(priorityNormal). - MatchInPort(config.BridgeOFPort). - Action().LoadRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). - Action().GotoTable(uplinkTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Output non-IP packets to the bridge port directly. IP packets - // are redirected to conntrackTable in uplinkSNATFlows() (in - // case they need unSNAT). - c.pipeline[uplinkTable].BuildFlow(priorityLow). - MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().Output(int(bridgeOFPort)). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Forward the packet to conntrackTable if it enters the OVS - // pipeline from the bridge interface and is sent to the local - // Pod subnet. Mark the packet to indicate its destination MAC - // should be rewritten to the real MAC in the L3Frowarding - // table. This is for the case a Pod accesses a NodePort Service - // using the local Node's IP, and then the return traffic after - // the kube-proxy processing will enter the bridge from the - // bridge interface (but not the gateway interface. This is - // probably because we do not enable IP forwarding on the bridge - // interface). - c.pipeline[uplinkTable].BuildFlow(priorityHigh). - MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). - MatchDstIPNet(localSubnet). - Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). - Action().GotoTable(conntrackTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Output other packets from the bridge port to the uplink port - // directly. - c.pipeline[uplinkTable].BuildFlow(priorityLow). - MatchRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). - Action().Output(config.UplinkOFPort). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - return flows -} - -// uplinkSNATFlows installs flows for traffic from the uplink port that help -// the SNAT implementation of the external traffic. It is for the Windows Nodes -// only. -func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow { - ctStateNext := dnatTable - if c.enableProxy { - ctStateNext = endpointDNATTable - } - bridgeOFPort := uint32(config.BridgeOFPort) - flows := []binding.Flow{ - // Mark the packet to indicate its destination MAC should be rewritten to the real MAC in the L3Forwarding - // table, if the packet is a reply to a Pod from an external address. - c.pipeline[conntrackStateTable].BuildFlow(priorityHigh). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(false).MatchCTStateTrk(true). - MatchCTMark(snatCTMark, nil). - MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). - Action().GotoTable(ctStateNext). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Output the non-SNAT packet to the bridge interface directly if it is received from the uplink interface. - c.pipeline[conntrackStateTable].BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().Output(int(bridgeOFPort)). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - } - // Forward the IP packets from the uplink interface to - // conntrackTable. This is for unSNAT the traffic from the local - // Pod subnet to the external network. Non-SNAT packets will be - // output to the bridge port in conntrackStateTable. - if c.enableProxy { - // For the connection which is both applied DNAT and SNAT, the reply packtets - // are received from uplink and need to enter CTZoneSNAT first to do unSNAT. - // Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> ExternalServer - // Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- ExternalServer - flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().CT(false, conntrackTable, CtZoneSNAT).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } else { - flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). - Action().GotoTable(conntrackTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } - return flows -} - -// snatFlows installs flows to perform SNAT for traffic to the external network. -// It is used on Windows Nodes only. -func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie.Category) []binding.Flow { - snatIPRange := &binding.IPRange{StartIP: nodeIP, EndIP: nodeIP} +// snatCommonFlows installs flows to perform SNAT for traffic to the external +// network. +func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewayMAC net.HardwareAddr, category cookie.Category) []binding.Flow { l3FwdTable := c.pipeline[l3ForwardingTable] nextTable := l3FwdTable.GetNext() flows := []binding.Flow{ // First install flows for traffic that should bypass SNAT. - // This flow is for traffic to the local Pod subnet. l3FwdTable.BuildFlow(priorityNormal). MatchProtocol(binding.ProtocolIP). @@ -1656,69 +1532,23 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - // Add the SNAT mark on the packet that is not filtered by other - // flow entries in the L3Forwarding table. + // Send the traffic to external to snatTable. l3FwdTable.BuildFlow(priorityLow). MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(true).MatchCTStateTrk(true). MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). - Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), - // Force IP packet into the conntrack zone with SNAT. If the connection is SNATed, the reply packet should use - // Pod IP as the destination, and then is forwarded to conntrackStateTable. - c.pipeline[conntrackTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). - Action().CT(false, conntrackStateTable, CtZone).NAT().CTDone(). + Action().GotoTable(snatTable). Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - // Redirect the packet into L2ForwardingOutput table after the packet is SNAT'd. A "SNAT" packet has these - // characteristics: 1) the ct_state is "+new+trk", 2) reg0[17] is set to 1; 3) Node IP is used as the target - // source IP in NAT action, 4) ct_mark is set to 0x40 in the conn_track context. - c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). + // For the traffic tunneled from a remote Node, rewrite the + // destination MAC to the gateway interface MAC. + l3FwdTable.BuildFlow(priorityLow). MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false). - MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). - Action().CT(true, L2ForwardingOutTable, CtZone). - SNAT(snatIPRange, nil). - LoadToMark(snatCTMark).CTDone(). + MatchRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().SetDstMAC(localGatewayMAC). + Action().GotoTable(snatTable). Cookie(c.cookieAllocator.Request(category).Raw()). Done(), } - // The following flows are for both apply DNAT + SNAT for packets. - // If AntreaProxy is disabled, no DNAT happens in OVS pipeline. - if c.enableProxy { - // If the SNAT is needed after DNAT, mark the snatRequiredMark even the connection is not new. - // Because this kind of packets need to enter CtZoneSNAT to make sure the SNAT can be applied - // before leaving the pipeline. - flows = append(flows, l3FwdTable.BuildFlow(priorityLow). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). - MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). - Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - // If SNAT is needed after DNAT: - // - For new connection: commit to CtZoneSNAT - // - For existing connection: enter CtZoneSNAT to apply SNAT - flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true). - MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). - Action().CT(true, L2ForwardingOutTable, CtZoneSNAT). - SNAT(snatIPRange, nil). - LoadToMark(snatCTMark).CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). - MatchProtocol(binding.ProtocolIP). - MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). - MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). - Action().CT(false, L2ForwardingOutTable, CtZoneSNAT).NAT().CTDone(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done()) - } return flows } @@ -1984,6 +1814,10 @@ func (c *client) generatePipeline() { c.pipeline[dnatTable] = bridge.CreateTable(dnatTable, c.egressEntryTable, binding.TableMissActionNext) c.pipeline[conntrackCommitTable] = bridge.CreateTable(conntrackCommitTable, L2ForwardingOutTable, binding.TableMissActionNext) } + // The default SNAT is implemented with OVS on Windows. + if c.enableSNATPolicy || runtime.IsWindowsPlatform() { + c.pipeline[snatTable] = bridge.CreateTable(snatTable, l2ForwardingCalcTable, binding.TableMissActionNext) + } if runtime.IsWindowsPlatform() { c.pipeline[uplinkTable] = bridge.CreateTable(uplinkTable, spoofGuardTable, binding.TableMissActionNone) } diff --git a/pkg/agent/openflow/pipeline_other.go b/pkg/agent/openflow/pipeline_other.go new file mode 100644 index 00000000000..eb7b2303db0 --- /dev/null +++ b/pkg/agent/openflow/pipeline_other.go @@ -0,0 +1,45 @@ +// +build !windows +// package openflow is needed by antctl which is compiled for macOS too. + +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "net" + + "github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie" + binding "github.com/vmware-tanzu/antrea/pkg/ovs/openflow" +) + +// externalFlows returns the flows needed to enable SNAT for external traffic. +func (c *client) externalFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewayMAC net.HardwareAddr) []binding.Flow { + if !c.enableSNATPolicy { + return nil + } + return c.snatCommonFlows(nodeIP, localSubnet, localGatewayMAC, cookie.SNAT) +} + +func (c *client) installBridgeUplinkFlows() error { + return nil +} + +func (c *client) installLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { + return nil +} + +func (c *client) uninstallLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { + return nil +} diff --git a/pkg/agent/openflow/pipeline_windows.go b/pkg/agent/openflow/pipeline_windows.go new file mode 100644 index 00000000000..18805d1c8d5 --- /dev/null +++ b/pkg/agent/openflow/pipeline_windows.go @@ -0,0 +1,272 @@ +// +build windows + +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package openflow + +import ( + "fmt" + "net" + + "github.com/vmware-tanzu/antrea/pkg/agent/config" + "github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie" + binding "github.com/vmware-tanzu/antrea/pkg/ovs/openflow" +) + +const ( + markTrafficFromBridge = 5 + + // ctZoneSNAT is only used on Windows and only when AntreaProxy is enabled. + // When a Pod access a ClusterIP Service, and the IP of the selected endpoint + // is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP) + // after have been DNAT'd(set dst IP to endpoint IP). + // For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint + // will is the current Node IP. + // We need to use a different ct_zone to track the SNAT'd connection because OVS + // does not support doing both DNAT and SNAT in the same ct_zone. + // + // An example of the connection is a Pod accesses kubernetes API service: + // Pod --> DNAT(CtZone) --> SNAT(ctZoneSNAT) --> Endpoint(API server NodeIP) + // Pod <-- unDNAT(CtZone) <-- unSNAT(ctZoneSNAT) <-- Endpoint(API server NodeIP) + ctZoneSNAT = 0xffdc + + snatRequiredMark = 0b1 + + snatCTMark = 0x40 +) + +var ( + // snatMarkRange takes the 17th bit of register marksReg to indicate if the packet needs to be SNATed with Node's IP + // or not. Its value is 0x1 if yes. + snatMarkRange = binding.Range{17, 17} +) + +// uplinkSNATFlows installs flows for traffic from the uplink port that help +// the SNAT implementation of the external traffic. +func (c *client) uplinkSNATFlows(localSubnet net.IPNet, category cookie.Category) []binding.Flow { + ctStateNext := dnatTable + if c.enableProxy { + ctStateNext = endpointDNATTable + } + bridgeOFPort := uint32(config.BridgeOFPort) + flows := []binding.Flow{ + // Mark the packet to indicate its destination MAC should be + // rewritten to the real MAC in the L3Forwarding table, if the + // packet is a reply to a local Pod from an external address. + c.pipeline[conntrackStateTable].BuildFlow(priorityHigh). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(false).MatchCTStateTrk(true). + MatchCTMark(snatCTMark, nil). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + // return packets to remote Pods will be handled by L3Forwarding flows. + MatchDstIPNet(localSubnet). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Cookie(c.cookieAllocator.Request(category).Raw()). + Action().GotoTable(ctStateNext). + Done(), + // Output the non-SNAT packet to the bridge interface directly + // if it is received from the uplink interface. + c.pipeline[conntrackStateTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + Action().Output(int(bridgeOFPort)). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + // Forward the IP packets from the uplink interface to + // conntrackTable. This is for unSNAT the traffic from the local + // Pod subnet to the external network. Non-SNAT packets will be + // output to the bridge port in conntrackStateTable. + if c.enableProxy { + // For the connection which is both applied DNAT and SNAT, the reply packtets + // are received from uplink and need to enter ctZoneSNAT first to do unSNAT. + // Pod --> DNAT(CtZone) --> SNAT(ctZoneSNAT) --> ExternalServer + // Pod <-- unDNAT(CtZone) <-- unSNAT(ctZoneSNAT) <-- ExternalServer + flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + Action().CT(false, conntrackTable, ctZoneSNAT).NAT().CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + } else { + flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + Action().GotoTable(conntrackTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + } + return flows +} + +// snatImplementationFlows installs flows that implement SNAT with OVS NAT. +func (c *client) snatImplementationFlows(nodeIP net.IP, category cookie.Category) []binding.Flow { + snatIPRange := &binding.IPRange{StartIP: nodeIP, EndIP: nodeIP} + l3FwdTable := c.pipeline[l3ForwardingTable] + nextTable := l3FwdTable.GetNext() + flows := []binding.Flow{ + // Default to using Node IP as the SNAT IP. + c.pipeline[snatTable].BuildFlow(priorityLow). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(true).MatchCTStateTrk(true). + MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). + Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange). + Action().GotoTable(nextTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // Force IP packet into the conntrack zone with SNAT. If the connection is SNATed, the reply packet should use + // Pod IP as the destination, and then is forwarded to conntrackStateTable. + c.pipeline[conntrackTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). + Action().CT(false, conntrackStateTable, CtZone).NAT().CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // Perform SNAT with the default SNAT IP (the Node IP). A SNAT + // packet has these characteristics: 1) the ct_state is + // "+new+trk", 2) reg0[17] is set to 1; 3) ct_mark is set to + // 0x40. + c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false). + MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). + Action().CT(true, L2ForwardingOutTable, CtZone). + SNAT(snatIPRange, nil). + LoadToMark(snatCTMark).CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + // The following flows are for both apply DNAT + SNAT for packets. + // If AntreaProxy is disabled, no DNAT happens in OVS pipeline. + if c.enableProxy { + // If the SNAT is needed after DNAT, mark the snatRequiredMark even the connection is not new. + // Because this kind of packets need to enter ctZoneSNAT to make sure the SNAT can be applied + // before leaving the pipeline. + flows = append(flows, l3FwdTable.BuildFlow(priorityLow). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). + MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). + Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange). + Action().GotoTable(nextTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + // If SNAT is needed after DNAT: + // - For new connection: commit to ctZoneSNAT + // - For existing connection: enter ctZoneSNAT to apply SNAT + flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true). + MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). + Action().CT(true, L2ForwardingOutTable, ctZoneSNAT). + SNAT(snatIPRange, nil). + LoadToMark(snatCTMark).CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). + MatchProtocol(binding.ProtocolIP). + MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). + MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). + Action().CT(false, L2ForwardingOutTable, ctZoneSNAT).NAT().CTDone(). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + } + return flows +} + +// externalFlows returns the flows needed to enable SNAT for external traffic. +func (c *client) externalFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewayMAC net.HardwareAddr) []binding.Flow { + flows := c.uplinkSNATFlows(localSubnet, cookie.SNAT) + flows = append(flows, c.snatCommonFlows(nodeIP, localSubnet, localGatewayMAC, cookie.SNAT)...) + flows = append(flows, c.snatImplementationFlows(nodeIP, cookie.SNAT)...) + return flows +} + +// hostBridgeUplinkFlows generates the flows that forward traffic between the +// bridge local port and the uplink port to support the host traffic with +// outside. +func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Category) (flows []binding.Flow) { + bridgeOFPort := uint32(config.BridgeOFPort) + flows = []binding.Flow{ + c.pipeline[ClassifierTable].BuildFlow(priorityNormal). + MatchInPort(config.UplinkOFPort). + Action().LoadRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + Action().GotoTable(uplinkTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + c.pipeline[ClassifierTable].BuildFlow(priorityNormal). + MatchInPort(config.BridgeOFPort). + Action().LoadRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). + Action().GotoTable(uplinkTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // Output non-IP packets to the bridge port directly. IP packets + // are redirected to conntrackTable in uplinkSNATFlows() (in + // case they need unSNAT). + c.pipeline[uplinkTable].BuildFlow(priorityLow). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + Action().Output(int(bridgeOFPort)). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // Forward the packet to conntrackTable if it enters the OVS + // pipeline from the bridge interface and is sent to the local + // Pod subnet. Mark the packet to indicate its destination MAC + // should be rewritten to the real MAC in the L3Frowarding + // table. This is for the case a Pod accesses a NodePort Service + // using the local Node's IP, and then the return traffic after + // the kube-proxy processing will enter the bridge from the + // bridge interface (but not the gateway interface. This is + // probably because we do not enable IP forwarding on the bridge + // interface). + c.pipeline[uplinkTable].BuildFlow(priorityHigh). + MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). + MatchDstIPNet(localSubnet). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().GotoTable(conntrackTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + // Output other packets from the bridge port to the uplink port + // directly. + c.pipeline[uplinkTable].BuildFlow(priorityLow). + MatchRegRange(int(marksReg), markTrafficFromBridge, binding.Range{0, 15}). + Action().Output(config.UplinkOFPort). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + return flows +} + +func (c *client) installBridgeUplinkFlows() error { + flows := c.hostBridgeUplinkFlows(*c.nodeConfig.PodIPv4CIDR, cookie.Default) + if err := c.ofEntryOperations.AddAll(flows); err != nil { + return err + } + c.hostNetworkingFlows = flows + return nil +} + +func (c *client) installLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { + c.replayMutex.RLock() + defer c.replayMutex.RUnlock() + var flows []binding.Flow + flows = append(flows, c.loadBalancerServiceFromOutsideFlow(svcIP, svcPort, protocol)) + cacheKey := fmt.Sprintf("LoadBalancerService_%s_%d_%s", svcIP, svcPort, protocol) + return c.addFlows(c.serviceFlowCache, cacheKey, flows) +} + +func (c *client) uninstallLoadBalancerServiceFromOutsideFlows(svcIP net.IP, svcPort uint16, protocol binding.Protocol) error { + c.replayMutex.RLock() + defer c.replayMutex.RUnlock() + cacheKey := fmt.Sprintf("LoadBalancerService_%s_%d_%s", svcIP, svcPort, protocol) + return c.deleteFlows(c.serviceFlowCache, cacheKey) +} diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index e5c1e890ab3..8ef2a447c02 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -128,7 +128,6 @@ func TestConnectivityFlows(t *testing.T) { testInstallPodFlows, testUninstallPodFlows, testUninstallNodeFlows, - testExternalFlows, } { f(t, config) } @@ -213,6 +212,7 @@ func TestReplayFlowsNetworkPolicyFlows(t *testing.T) { testReplayFlows(t) } +// TODO: change to test SNAT flows. func testExternalFlows(t *testing.T, config *testConfig) { nodeIP := config.nodeConfig.NodeIPAddr.IP localSubnet := config.nodeConfig.PodIPv4CIDR @@ -978,7 +978,7 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x80000/0x80000,%s=%s", ipProto, nwDstField, podIP.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,goto_table:71", gwMAC.String(), podMAC.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,goto_table:72", gwMAC.String(), podMAC.String()), }, }, }, @@ -1116,7 +1116,7 @@ func prepareNodeFlows(peerSubnet net.IPNet, peerGwIP, peerNodeIP net.IP, vMAC, l []*ofTestUtils.ExpectFlow{ { MatchStr: fmt.Sprintf("priority=200,%s,%s=%s", ipProtoStr, nwDstFieldName, peerSubnet.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->tun_dst,goto_table:71", localGwMAC.String(), vMAC.String(), peerNodeIP.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,set_field:%s->tun_dst,goto_table:72", localGwMAC.String(), vMAC.String(), peerNodeIP.String()), }, }, }) @@ -1147,8 +1147,8 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { tableID: 105, flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:106"}}, } - table71Flows := expectTableFlows{ - tableID: 71, + table72Flows := expectTableFlows{ + tableID: 72, flows: []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "goto_table:80"}}, } if config.enableIPv4 { @@ -1160,7 +1160,7 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ip,reg0=0x1/0xffff", ActStr: "ct(commit,table=106,zone=65520,exec(load:0x20->NXM_NX_CT_MARK[])"}, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ip", ActStr: "ct(commit,table=106,zone=65520)"}, ) - table71Flows.flows = append(table71Flows.flows, + table72Flows.flows = append(table72Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ip,reg0=0x1/0xffff", ActStr: "goto_table:80"}, &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ip", ActStr: "dec_ttl,goto_table:80"}, ) @@ -1174,13 +1174,13 @@ func prepareDefaultFlows(config *testConfig) []expectTableFlows { &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ct_state=+new+trk,ipv6,reg0=0x1/0xffff", ActStr: "ct(commit,table=106,zone=65510,exec(load:0x20->NXM_NX_CT_MARK[])"}, &ofTestUtils.ExpectFlow{MatchStr: "priority=190,ct_state=+new+trk,ipv6", ActStr: "ct(commit,table=106,zone=65510)"}, ) - table71Flows.flows = append(table71Flows.flows, + table72Flows.flows = append(table72Flows.flows, &ofTestUtils.ExpectFlow{MatchStr: "priority=210,ipv6,reg0=0x1/0xffff", ActStr: "goto_table:80"}, &ofTestUtils.ExpectFlow{MatchStr: "priority=200,ipv6", ActStr: "dec_ttl,goto_table:80"}, ) } return []expectTableFlows{ - table31Flows, table105Flows, table71Flows, + table31Flows, table105Flows, table72Flows, { uint8(0), []*ofTestUtils.ExpectFlow{{MatchStr: "priority=0", ActStr: "drop"}},