Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix tunnel interface name issue #2486

Merged
merged 1 commit into from
Aug 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions pkg/agent/controller/noderoute/node_route_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,8 @@ func (c *Controller) removeStaleTunnelPorts() error {
}

ifaceID := util.GenerateNodeTunnelInterfaceKey(node.Name)
validConfiguration := interfaceConfig.PSK == c.networkConfig.IPSecPSK &&
interfaceConfig.RemoteIP.Equal(peerNodeIP) &&
interfaceConfig.TunnelInterfaceConfig.Type == c.networkConfig.TunnelType
if validConfiguration {
ifaceName := util.GenerateNodeTunnelInterfaceName(node.Name)
if c.compareInterfaceConfig(interfaceConfig, peerNodeIP, ifaceName) {
desiredInterfaces[ifaceID] = true
}
}
Expand Down Expand Up @@ -258,6 +256,14 @@ func (c *Controller) removeStaleTunnelPorts() error {
return nil
}

func (c *Controller) compareInterfaceConfig(interfaceConfig *interfacestore.InterfaceConfig,
peerNodeIP net.IP, interfaceName string) bool {
return interfaceConfig.InterfaceName == interfaceName &&
interfaceConfig.PSK == c.networkConfig.IPSecPSK &&
interfaceConfig.RemoteIP.Equal(peerNodeIP) &&
interfaceConfig.TunnelInterfaceConfig.Type == c.networkConfig.TunnelType
}

func (c *Controller) reconcile() error {
klog.Infof("Reconciliation for %s", controllerName)
// reconciliation consists of removing stale routes and stale / invalid tunnel ports:
Expand Down Expand Up @@ -527,16 +533,20 @@ func getPodCIDRsOnNode(node *corev1.Node) []string {
// createIPSecTunnelPort creates an IPSec tunnel port for the remote Node if the
// tunnel does not exist, and returns the ofport number.
func (c *Controller) createIPSecTunnelPort(nodeName string, nodeIP net.IP) (int32, error) {
portName := util.GenerateNodeTunnelInterfaceName(nodeName)
interfaceConfig, ok := c.interfaceStore.GetNodeTunnelInterface(nodeName)
// check if Node IP, PSK, or tunnel type changes. This can
// happen if removeStaleTunnelPorts fails to remove a "stale"
// tunnel port for which the configuration has changed, return error to requeue the Node.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you removed the TODO, and according to the commit message, I suppose this is fixed. However, how requeuing the Node helps? I think the next attemp will still get here and requeue again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @tnqn, I think the TODO is highlighting it doesn't check the configuration change here but return directly. it's an assumption that it's a temporary issue if last remove action failed in removeStaleTunnelPorts, so re-queue will help because removeStaleTunnelPorts also checks configuration difference. but it indeed will go back here if failed again, is there any case in your mind port can't be removed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding was that we would call c.ovsBridgeClient.DeletePort and c.interfaceStore.DeleteInterface here, and in case of error during port removal, return an error and let the Node be requeued. removeStaleTunnelPorts is only called on controller start up and transient errors are always possible.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the issue is not really fixed. @luolanzone would you have a follow-up PR to fix it? and maybe have a test covering this scenario.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, I will fix this.

if ok {
// TODO: check if Node IP, PSK, or tunnel type changes. This can
// happen if removeStaleTunnelPorts fails to remove a "stale"
// tunnel port for which the configuration has changed.
if !c.compareInterfaceConfig(interfaceConfig, nodeIP, portName) {
return 0, fmt.Errorf("IPSec tunnel interface config doesn't match cached one, stale IPSec tunnel port %s", interfaceConfig.InterfaceName)
}
klog.V(2).InfoS("Found cached IPSec tunnel interface", "interfaceName", interfaceConfig.InterfaceName, "port", interfaceConfig.OFPort, "nodeName", nodeName)
if interfaceConfig.OFPort != 0 {
return interfaceConfig.OFPort, nil
}
} else {
portName := util.GenerateNodeTunnelInterfaceName(nodeName)
ovsExternalIDs := map[string]interface{}{ovsExternalIDNodeName: nodeName}
portUUID, err := c.ovsBridgeClient.CreateTunnelPortExt(
portName,
Expand Down
156 changes: 152 additions & 4 deletions pkg/agent/controller/noderoute/node_route_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
"antrea.io/antrea/pkg/agent/interfacestore"
oftest "antrea.io/antrea/pkg/agent/openflow/testing"
routetest "antrea.io/antrea/pkg/agent/route/testing"
"antrea.io/antrea/pkg/agent/util"
"antrea.io/antrea/pkg/ovs/ovsconfig"
ovsconfigtest "antrea.io/antrea/pkg/ovs/ovsconfig/testing"
)

Expand All @@ -55,15 +57,16 @@ type fakeController struct {
interfaceStore interfacestore.InterfaceStore
}

func newController(t *testing.T) (*fakeController, func()) {
func newController(t *testing.T, networkConfig *config.NetworkConfig) (*fakeController, func()) {
clientset := fake.NewSimpleClientset()
informerFactory := informers.NewSharedInformerFactory(clientset, 12*time.Hour)
ctrl := gomock.NewController(t)
ofClient := oftest.NewMockClient(ctrl)
ovsClient := ovsconfigtest.NewMockOVSBridgeClient(ctrl)
routeClient := routetest.NewMockInterface(ctrl)
interfaceStore := interfacestore.NewInterfaceStore()
c := NewNodeRouteController(clientset, informerFactory, ofClient, ovsClient, routeClient, interfaceStore, &config.NetworkConfig{}, &config.NodeConfig{GatewayConfig: &config.GatewayConfig{

c := NewNodeRouteController(clientset, informerFactory, ofClient, ovsClient, routeClient, interfaceStore, networkConfig, &config.NodeConfig{GatewayConfig: &config.GatewayConfig{
IPv4: nil,
MAC: gatewayMAC,
}})
Expand All @@ -79,7 +82,7 @@ func newController(t *testing.T) (*fakeController, func()) {
}

func TestControllerWithDuplicatePodCIDR(t *testing.T) {
c, closeFn := newController(t)
c, closeFn := newController(t, &config.NetworkConfig{})
defer closeFn()
defer c.queue.ShutDown()

Expand Down Expand Up @@ -163,7 +166,7 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) {
}

func TestIPInPodSubnets(t *testing.T) {
c, closeFn := newController(t)
c, closeFn := newController(t, &config.NetworkConfig{})
defer closeFn()
defer c.queue.ShutDown()

Expand Down Expand Up @@ -228,3 +231,148 @@ func TestIPInPodSubnets(t *testing.T) {
assert.Equal(t, false, c.Controller.IPInPodSubnets(net.ParseIP("10.10.10.10")))
assert.Equal(t, false, c.Controller.IPInPodSubnets(net.ParseIP("8.8.8.8")))
}

func setup(t *testing.T, ifaces []*interfacestore.InterfaceConfig) (*fakeController, func()) {
c, closeFn := newController(t, &config.NetworkConfig{
TrafficEncapMode: 0,
TunnelType: ovsconfig.TunnelType("vxlan"),
EnableIPSecTunnel: true,
IPSecPSK: "changeme",
})
for _, i := range ifaces {
c.interfaceStore.AddInterface(i)
}
return c, closeFn
}

func TestRemoveStaleTunnelPorts(t *testing.T) {
c, closeFn := setup(t, []*interfacestore.InterfaceConfig{
{
Type: interfacestore.TunnelInterface,
InterfaceName: util.GenerateNodeTunnelInterfaceName("xyz-k8s-0-1"),
TunnelInterfaceConfig: &interfacestore.TunnelInterfaceConfig{
NodeName: "xyz-k8s-0-1",
Type: ovsconfig.TunnelType("vxlan"),
PSK: "mismatchpsk",
RemoteIP: nodeIP1,
},
OVSPortConfig: &interfacestore.OVSPortConfig{
PortUUID: "123",
},
},
})

defer closeFn()
defer c.queue.ShutDown()
stopCh := make(chan struct{})
defer close(stopCh)
c.informerFactory.Start(stopCh)
c.informerFactory.WaitForCacheSync(stopCh)
node1 := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "xyz-k8s-0-1",
},
Spec: corev1.NodeSpec{
PodCIDR: podCIDR.String(),
PodCIDRs: []string{podCIDR.String()},
},
Status: corev1.NodeStatus{
Addresses: []corev1.NodeAddress{
{
Type: corev1.NodeInternalIP,
Address: nodeIP1.String(),
},
},
},
}

c.clientset.CoreV1().Nodes().Create(context.TODO(), node1, metav1.CreateOptions{})
c.ovsClient.EXPECT().DeletePort("123").Times(1)

err := c.removeStaleTunnelPorts()
assert.NoError(t, err)
}

func TestCreateIPSecTunnelPort(t *testing.T) {
c, closeFn := setup(t, []*interfacestore.InterfaceConfig{
{
Type: interfacestore.TunnelInterface,
InterfaceName: "mismatchedname",
TunnelInterfaceConfig: &interfacestore.TunnelInterfaceConfig{
NodeName: "xyz-k8s-0-2",
Type: "vxlan",
PSK: "changeme",
RemoteIP: nodeIP2,
},
OVSPortConfig: &interfacestore.OVSPortConfig{
PortUUID: "123",
},
},
{
Type: interfacestore.TunnelInterface,
InterfaceName: util.GenerateNodeTunnelInterfaceName("xyz-k8s-0-3"),
TunnelInterfaceConfig: &interfacestore.TunnelInterfaceConfig{
NodeName: "xyz-k8s-0-3",
Type: "vxlan",
PSK: "changeme",
RemoteIP: net.ParseIP("10.10.10.1"),
},
OVSPortConfig: &interfacestore.OVSPortConfig{
PortUUID: "abc",
OFPort: int32(5),
},
},
})

defer closeFn()
defer c.queue.ShutDown()
stopCh := make(chan struct{})
defer close(stopCh)
c.informerFactory.Start(stopCh)
c.informerFactory.WaitForCacheSync(stopCh)

node1PortName := util.GenerateNodeTunnelInterfaceName("xyz-k8s-0-1")
c.ovsClient.EXPECT().CreateTunnelPortExt(
node1PortName, ovsconfig.TunnelType("vxlan"), int32(0),
false, "", nodeIP1.String(), "changeme",
map[string]interface{}{ovsExternalIDNodeName: "xyz-k8s-0-1"}).Times(1)
c.ovsClient.EXPECT().GetOFPort(node1PortName).Return(int32(1), nil)

tests := []struct {
name string
nodeName string
peerNodeIP net.IP
wantErr bool
want int32
}{
{
name: "create new port",
nodeName: "xyz-k8s-0-1",
peerNodeIP: nodeIP1,
wantErr: false,
want: 1,
},
{
name: "hit cache but interface name changed for the same node",
nodeName: "xyz-k8s-0-2",
peerNodeIP: nodeIP2,
wantErr: true,
},
{
name: "hit cache and return directly",
nodeName: "xyz-k8s-0-3",
peerNodeIP: net.ParseIP("10.10.10.1"),
wantErr: false,
want: 5,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := c.createIPSecTunnelPort(tt.nodeName, tt.peerNodeIP)
hasErr := err != nil
assert.Equal(t, tt.wantErr, hasErr)
assert.Equal(t, tt.want, got)
})
}
}
10 changes: 8 additions & 2 deletions pkg/agent/util/net.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io"
"net"
"strings"
)

const (
Expand All @@ -38,10 +39,15 @@ func generateInterfaceName(key string, name string, useHead bool) string {
interfaceKey := hex.EncodeToString(hash.Sum(nil))
prefix := name
if len(name) > interfacePrefixLength {
// We use Node/Pod name to generate the interface name,
// valid chars for Node/Pod name are ASCII letters from a to z,
// the digits from 0 to 9, and the hyphen (-).
// Hyphen (-) is the only char which will impact command-line interpretation
// if the interface name starts with one, so we remove it here.
if useHead {
prefix = name[:interfacePrefixLength]
prefix = strings.TrimLeft(name[:interfacePrefixLength], "-")
} else {
prefix = name[len(name)-interfacePrefixLength:]
prefix = strings.TrimLeft(name[len(name)-interfacePrefixLength:], "-")
abhiraut marked this conversation as resolved.
Show resolved Hide resolved
}
}
return fmt.Sprintf("%s-%s", prefix, interfaceKey[:interfaceKeyLength])
Expand Down