diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 6c0c79811ec..c7143927df1 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -697,8 +697,6 @@ func run(o *Options) error { if err := secondarynetwork.Initialize( o.config.ClientConnection, o.config.KubeAPIServerOverride, k8sClient, localPodInformer, nodeConfig.Name, cniPodInfoStore, - // safe to call given that cniServer.Initialize has been called already. - cniServer.GetPodConfigurator(), stopCh, &o.config.SecondaryNetwork, ovsdbConnection); err != nil { return fmt.Errorf("failed to initialize secondary network: %v", err) diff --git a/pkg/agent/cniserver/interface_configuration_linux.go b/pkg/agent/cniserver/interface_configuration_linux.go index 9ce9ec62c5e..0927249e4b7 100644 --- a/pkg/agent/cniserver/interface_configuration_linux.go +++ b/pkg/agent/cniserver/interface_configuration_linux.go @@ -249,7 +249,10 @@ func (ic *ifConfigurator) configureContainerLinkVeth( mtu int, result *current.Result, ) error { - hostIfaceName := util.GenerateContainerInterfaceName(podName, podNamespace, containerID) + // Include the container veth interface name in the name generation, as one Pod can have more + // than one interfaces inc. secondary interfaces, while the host interface name must be + // be unique. + hostIfaceName := util.GenerateContainerHostVethName(podName, podNamespace, containerID, containerIfaceName) hostIface := ¤t.Interface{Name: hostIfaceName} containerIface := ¤t.Interface{Name: containerIfaceName, Sandbox: containerNetNS} diff --git a/pkg/agent/cniserver/pod_configuration.go b/pkg/agent/cniserver/pod_configuration.go index 1e0782dfe88..ac4e183c91a 100644 --- a/pkg/agent/cniserver/pod_configuration.go +++ b/pkg/agent/cniserver/pod_configuration.go @@ -84,9 +84,9 @@ func newPodConfigurator( gatewayMAC net.HardwareAddr, ovsDatapathType ovsconfig.OVSDatapathType, isOvsHardwareOffloadEnabled bool, + disableTXChecksumOffload bool, podUpdateNotifier channel.Notifier, podInfoStore cnipodcache.CNIPodInfoStore, - disableTXChecksumOffload bool, ) (*podConfigurator, error) { ifConfigurator, err := newInterfaceConfigurator(ovsDatapathType, isOvsHardwareOffloadEnabled, disableTXChecksumOffload) if err != nil { @@ -259,7 +259,8 @@ func (pc *podConfigurator) configureInterfaces( // Note that the IP address should be advertised after Pod OpenFlow entries are installed, otherwise the packet might // be dropped by OVS. if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, &result.Result); err != nil { - klog.Errorf("Failed to advertise IP address for container %s: %v", containerID, err) + // Do not return an error and fail the interface creation. + klog.ErrorS(err, "Failed to advertise IP address for container", "container ID", containerID) } // Mark the manipulation as success to cancel deferred operations. success = true diff --git a/pkg/agent/cniserver/pod_configuration_linux_test.go b/pkg/agent/cniserver/pod_configuration_linux_test.go index 66b94f7f6e9..aedcdaae97f 100644 --- a/pkg/agent/cniserver/pod_configuration_linux_test.go +++ b/pkg/agent/cniserver/pod_configuration_linux_test.go @@ -476,7 +476,9 @@ func TestConfigureSriovSecondaryInterface(t *testing.T) { name: "advertise-failure", podSriovVFDeviceID: "vf2", advertiseErr: fmt.Errorf("unable to advertise on the sriov link"), - expectedErr: fmt.Errorf("failed to advertise IP address for container %s: unable to advertise on the sriov link", containerID), + // When advertiseContainerAddr returns an error, it is logged, but does not + // cause ConfigureSriovSecondaryInterface to also return an error. + }, { name: "success", podSriovVFDeviceID: "vf3", @@ -499,7 +501,7 @@ func createPodConfigurator(controller *gomock.Controller, testIfaceConfigurator mockOFClient = openflowtest.NewMockClient(controller) ifaceStore = interfacestore.NewInterfaceStore() mockRoute = routetest.NewMockInterface(controller) - configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + configurator, _ := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) configurator.ifConfigurator = testIfaceConfigurator return configurator } diff --git a/pkg/agent/cniserver/secondary.go b/pkg/agent/cniserver/secondary.go new file mode 100644 index 00000000000..11fc559f893 --- /dev/null +++ b/pkg/agent/cniserver/secondary.go @@ -0,0 +1,103 @@ +// Copyright 2021 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cniserver + +import ( + "fmt" + + current "github.com/containernetworking/cni/pkg/types/100" + "k8s.io/klog/v2" + + "antrea.io/antrea/pkg/ovs/ovsconfig" +) + +func NewSecondaryInterfaceConfigurator(ovsBridgeClient ovsconfig.OVSBridgeClient) (*podConfigurator, error) { + return newPodConfigurator(ovsBridgeClient, nil, nil, nil, nil, ovsconfig.OVSDatapathSystem, false, false, nil, nil) +} + +// ConfigureSriovSecondaryInterface configures a SR-IOV secondary interface for a Pod. +func (pc *podConfigurator) ConfigureSriovSecondaryInterface( + podName, podNamespace string, + containerID, containerNetNS, containerInterfaceName string, + mtu int, + podSriovVFDeviceID string, + result *current.Result) error { + if podSriovVFDeviceID == "" { + return fmt.Errorf("error getting the Pod SR-IOV VF device ID") + } + + err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerInterfaceName, mtu, "", podSriovVFDeviceID, result, nil) + if err != nil { + return err + } + hostIface := result.Interfaces[0] + containerIface := result.Interfaces[1] + klog.InfoS("Configured SR-IOV interface", "Pod", klog.KRef(podNamespace, podName), "interface", containerInterfaceName, "hostInterface", hostIface) + + if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { + klog.ErrorS(err, "Failed to advertise IP address for SR-IOV interface", "container ID", containerID, "interface", containerInterfaceName) + } + return nil +} + +// ConfigureVLANSecondaryInterface configures a VLAN secondary interface on the secondary network +// OVS bridge, and returns the OVS port UUID. +func (pc *podConfigurator) ConfigureVLANSecondaryInterface( + podName, podNamespace string, + containerID, containerNetNS, containerInterfaceName string, + mtu int, vlanID uint16, + result *current.Result) (string, error) { + // TODO: revisit the possibility of reusing configureInterfaces(), connectInterfaceToOVS() + // removeInterfaces() code, and using InterfaceStore to store secondary interface info. + if err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerInterfaceName, mtu, "", "", result, nil); err != nil { + return "", err + } + hostIface := result.Interfaces[0] + containerIface := result.Interfaces[1] + + success := false + defer func() { + if !success { + if err := pc.ifConfigurator.removeContainerLink(containerID, hostIface.Name); err != nil { + klog.ErrorS(err, "failed to roll back veth creation", "container ID", containerID, "interface", containerInterfaceName) + } + } + }() + + // Use the outer veth interface name as the OVS port name. + ovsPortName := hostIface.Name + ovsPortUUID, err := pc.createOVSPort(ovsPortName, nil, vlanID) + if err != nil { + return "", fmt.Errorf("failed to add OVS port for container %s: %v", containerID, err) + } + klog.InfoS("Configured VLAN interface", "Pod", klog.KRef(podNamespace, podName), "interface", containerInterfaceName, "hostInterface", hostIface) + + if err := pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { + klog.ErrorS(err, "Failed to advertise IP address for VLAN interface", "container ID", containerID, "interface", containerInterfaceName) + } + success = true + return ovsPortUUID, nil +} + +// DeleteVLANSecondaryInterface deletes a VLAN secondary interface. +func (pc *podConfigurator) DeleteVLANSecondaryInterface(containerID, hostInterfaceName, ovsPortUUID string) error { + if err := pc.ovsBridgeClient.DeletePort(ovsPortUUID); err != nil { + return fmt.Errorf("failed to delete OVS port for container %s: %v", containerID, err) + } + if err := pc.ifConfigurator.removeContainerLink(containerID, hostInterfaceName); err != nil { + return err + } + return nil +} diff --git a/pkg/agent/cniserver/server.go b/pkg/agent/cniserver/server.go index 995cc915852..41b1c4abaf7 100644 --- a/pkg/agent/cniserver/server.go +++ b/pkg/agent/cniserver/server.go @@ -387,10 +387,6 @@ func (s *CNIServer) validatePrevResult(cfgArgs *cnipb.CniCmdArgs, prevResult *cu return nil } -func (s *CNIServer) GetPodConfigurator() *podConfigurator { - return s.podConfigurator -} - // Declared variables for testing var ( ipamSecondaryNetworkAdd = ipam.SecondaryNetworkAdd @@ -531,8 +527,7 @@ func (s *CNIServer) CmdAdd(ctx context.Context, request *cnipb.CniCmdRequest) (* if s.secondaryNetworkEnabled { // Go cache the CNI server info at CNIConfigInfo cache, for podWatch usage cniInfo := &cnipodcache.CNIConfigInfo{CNIVersion: cniVersion, PodName: podName, PodNamespace: podNamespace, - ContainerID: cniConfig.ContainerId, ContainerNetNS: netNS, PodCNIDeleted: false, - MTU: cniConfig.MTU} + ContainerID: cniConfig.ContainerId, ContainerNetNS: netNS, PodCNIDeleted: false} s.podConfigurator.podInfoStore.AddCNIConfigInfo(cniInfo) } @@ -668,9 +663,9 @@ func (s *CNIServer) Initialize( s.podConfigurator, err = newPodConfigurator( ovsBridgeClient, ofClient, s.routeClient, ifaceStore, s.nodeConfig.GatewayConfig.MAC, - ovsBridgeClient.GetOVSDatapathType(), ovsBridgeClient.IsHardwareOffloadEnabled(), podUpdateNotifier, - podInfoStore, s.disableTXChecksumOffload, - ) + ovsBridgeClient.GetOVSDatapathType(), ovsBridgeClient.IsHardwareOffloadEnabled(), + s.disableTXChecksumOffload, + podUpdateNotifier, podInfoStore) if err != nil { return fmt.Errorf("error during initialize podConfigurator: %v", err) } diff --git a/pkg/agent/cniserver/server_linux_test.go b/pkg/agent/cniserver/server_linux_test.go index 9acabb19443..c0fd78cea88 100644 --- a/pkg/agent/cniserver/server_linux_test.go +++ b/pkg/agent/cniserver/server_linux_test.go @@ -98,7 +98,7 @@ func TestValidatePrevResult(t *testing.T) { cniConfig.Ifname = ifname cniConfig.Netns = "invalid_netns" sriovVFDeviceID := "" - cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) response := cniServer.validatePrevResult(cniConfig.CniCmdArgs, prevResult, sriovVFDeviceID) checkErrorResponse(t, response, cnipb.ErrorCode_CHECK_INTERFACE_FAILURE, "") }) @@ -109,7 +109,7 @@ func TestValidatePrevResult(t *testing.T) { cniConfig.Netns = "invalid_netns" sriovVFDeviceID := "0000:03:00.6" prevResult.Interfaces = []*current.Interface{hostIface, containerIface} - cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", true, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(nil, nil, nil, nil, nil, "", true, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) response := cniServer.validatePrevResult(cniConfig.CniCmdArgs, prevResult, sriovVFDeviceID) checkErrorResponse(t, response, cnipb.ErrorCode_CHECK_INTERFACE_FAILURE, "") }) @@ -122,7 +122,7 @@ func TestRemoveInterface(t *testing.T) { ifaceStore = interfacestore.NewInterfaceStore() mockRoute = routetest.NewMockInterface(controller) gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") - podConfigurator, err := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + podConfigurator, err := newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) require.Nil(t, err, "No error expected in podConfigurator constructor") containerMAC, _ := net.ParseMAC("aa:bb:cc:dd:ee:ff") @@ -203,7 +203,7 @@ func newMockCNIServer(t *testing.T, controller *gomock.Controller, ipamDriver ip gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") gateway := &config.GatewayConfig{Name: "", IPv4: gwIPv4, MAC: gwMAC} cniServer.nodeConfig = &config.NodeConfig{Name: "node1", PodIPv4CIDR: nodePodCIDRv4, GatewayConfig: gateway} - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) cniServer.enableSecondaryNetworkIPAM = enableSecondaryNetworkIPAM cniServer.isChaining = isChaining cniServer.secondaryNetworkEnabled = secondaryNetworkEnabled @@ -494,8 +494,7 @@ func TestCmdDel(t *testing.T) { cniserver.podConfigurator.ifConfigurator = testIfaceConfigurator if tc.secondaryNetworkEnabled { cniInfo := &cnipodcache.CNIConfigInfo{CNIVersion: supportedCNIVersion, PodName: tc.podName, PodNamespace: testPodNamespace, - ContainerID: containerID, ContainerNetNS: netns, PodCNIDeleted: false, - MTU: 1450} + ContainerID: containerID, ContainerNetNS: netns, PodCNIDeleted: false} cniserver.podConfigurator.podInfoStore.AddCNIConfigInfo(cniInfo) } if tc.ipamDel { @@ -639,7 +638,7 @@ func TestReconcile(t *testing.T) { cniServer := newCNIServer(t) cniServer.routeClient = mockRoute gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, channel.NewSubscribableChannel("PodUpdate", 100), nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, channel.NewSubscribableChannel("PodUpdate", 100), nil) cniServer.podConfigurator.ifConfigurator = newTestInterfaceConfigurator() cniServer.nodeConfig = &config.NodeConfig{ Name: nodeName, diff --git a/pkg/agent/cniserver/server_windows_test.go b/pkg/agent/cniserver/server_windows_test.go index 27b3e68d180..f299161ff0d 100644 --- a/pkg/agent/cniserver/server_windows_test.go +++ b/pkg/agent/cniserver/server_windows_test.go @@ -293,7 +293,7 @@ func newMockCNIServer(t *testing.T, controller *gomock.Controller, podUpdateNoti gwMAC, _ := net.ParseMAC("00:00:11:11:11:11") gateway := &config.GatewayConfig{Name: "", IPv4: gwIPv4, MAC: gwMAC} cniServer.nodeConfig = &config.NodeConfig{Name: "node1", PodIPv4CIDR: nodePodCIDRv4, GatewayConfig: gateway} - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, podUpdateNotifier, nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, podUpdateNotifier, nil) return cniServer } @@ -947,7 +947,7 @@ func TestReconcile(t *testing.T) { pod4IfaceName := "iface4" pod4Iface := containerIfaces["iface4"] waiter := newAsyncWaiter(pod4Iface.PodName, pod4Iface.ContainerID) - cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, waiter.notifier, nil, false) + cniServer.podConfigurator, _ = newPodConfigurator(mockOVSBridgeClient, mockOFClient, mockRoute, ifaceStore, gwMAC, "system", false, false, waiter.notifier, nil) cniServer.nodeConfig = &config.NodeConfig{Name: nodeName} // Re-install Pod1 flows diff --git a/pkg/agent/cniserver/sriov.go b/pkg/agent/cniserver/sriov.go deleted file mode 100644 index 905a99b318b..00000000000 --- a/pkg/agent/cniserver/sriov.go +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2021 Antrea Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cniserver - -import ( - "context" - "fmt" - "net" - "path" - "time" - - current "github.com/containernetworking/cni/pkg/types/100" - "google.golang.org/grpc" - grpcinsecure "google.golang.org/grpc/credentials/insecure" - "k8s.io/klog/v2" - - // Version v1 of the Kubelet API was introduced in K8s v1.20. - // Using version v1alpha1 instead to support older K8s versions. - podresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1" - - "antrea.io/antrea/pkg/agent/util" -) - -const ( - kubeletPodResourcesPath = "/var/lib/kubelet/pod-resources" - kubeletSocket = "kubelet.sock" - connectionTimeout = 10 * time.Second -) - -type KubeletPodResources struct { - resources []*podresourcesv1alpha1.PodResources -} - -// GetPodContainerDeviceIDs returns the device IDs assigned to a Pod's containers. -func GetPodContainerDeviceIDs(podName string, podNamespace string) ([]string, error) { - ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout) - defer cancel() - - conn, err := grpc.DialContext( - ctx, - path.Join(kubeletPodResourcesPath, kubeletSocket), - grpc.WithTransportCredentials(grpcinsecure.NewCredentials()), - grpc.WithContextDialer(func(ctx context.Context, addr string) (conn net.Conn, e error) { - return util.DialLocalSocket(addr) - }), - ) - if err != nil { - return []string{}, fmt.Errorf("error getting the gRPC client for Pod resources: %v", err) - } - - defer conn.Close() - - client := podresourcesv1alpha1.NewPodResourcesListerClient(conn) - - podResources, err := client.List(ctx, &podresourcesv1alpha1.ListPodResourcesRequest{}) - if err != nil { - return []string{}, fmt.Errorf("error getting the Pod resources: %v %v", podResources, err) - } - - var podDeviceIDs []string - var kpr KubeletPodResources - kpr.resources = podResources.GetPodResources() - for _, pr := range kpr.resources { - if pr.Name == podName && pr.Namespace == podNamespace { - for _, ctr := range pr.Containers { - for _, dev := range ctr.Devices { - podDeviceIDs = append(podDeviceIDs, dev.DeviceIds...) - } - } - } - } - klog.V(2).Infof("Pod container device IDs of %s/%s are: %v", podNamespace, podName, podDeviceIDs) - return podDeviceIDs, nil -} - -// ConfigureSriovSecondaryInterface adds Secondary Interface support. -// Limitation: only SR-IOV interface is supported as of now. -func (pc *podConfigurator) ConfigureSriovSecondaryInterface( - podName string, - podNamespace string, - containerID string, - containerNetNS string, - containerIFDev string, - mtu int, - podSriovVFDeviceID string, - result *current.Result, -) error { - if podSriovVFDeviceID == "" { - return fmt.Errorf("error getting the Pod SR-IOV VF device ID") - } - - err := pc.ifConfigurator.configureContainerLink(podName, podNamespace, containerID, containerNetNS, containerIFDev, mtu, "", podSriovVFDeviceID, result, nil) - if err != nil { - return err - } - hostIface := result.Interfaces[0] - containerIface := result.Interfaces[1] - - if err = pc.ifConfigurator.advertiseContainerAddr(containerNetNS, containerIface.Name, result); err != nil { - return fmt.Errorf("failed to advertise IP address for container %s: %v", containerID, err) - } - - klog.Infof("Configured interfaces for container %s; hostIface: %+v, containerIface: %+v", containerID, hostIface, containerIface) - return nil -} diff --git a/pkg/agent/secondarynetwork/cnipodcache/cache.go b/pkg/agent/secondarynetwork/cnipodcache/cache.go index cc0ec7b6356..0eba91af76e 100644 --- a/pkg/agent/secondarynetwork/cnipodcache/cache.go +++ b/pkg/agent/secondarynetwork/cnipodcache/cache.go @@ -35,19 +35,19 @@ type CNIPodInfoCache struct { } // Add CNIPodInfo to local cache store. -func (c *CNIPodInfoCache) AddCNIConfigInfo(CNIConfig *CNIConfigInfo) { - c.cache.Add(CNIConfig) +func (c *CNIPodInfoCache) AddCNIConfigInfo(cniConfig *CNIConfigInfo) { + c.cache.Add(cniConfig) } // Delete CNIPodInfo from local cache store. -func (c *CNIPodInfoCache) DeleteCNIConfigInfo(CNIConfig *CNIConfigInfo) { - c.cache.Delete(CNIConfig) +func (c *CNIPodInfoCache) DeleteCNIConfigInfo(cniConfig *CNIConfigInfo) { + c.cache.Delete(cniConfig) } -func (c *CNIPodInfoCache) SetPodCNIDeleted(CNIConfig *CNIConfigInfo) { +func (c *CNIPodInfoCache) SetPodCNIDeleted(cniConfig *CNIConfigInfo) { c.Lock() defer c.Unlock() - CNIConfig.PodCNIDeleted = true + cniConfig.PodCNIDeleted = true } // Retrieve a valid CNI cache (PodCNIDeleted is not true) entry for the given Pod name and namespace. diff --git a/pkg/agent/secondarynetwork/cnipodcache/types.go b/pkg/agent/secondarynetwork/cnipodcache/types.go index 1231cf1ebc2..16a58c656d2 100644 --- a/pkg/agent/secondarynetwork/cnipodcache/types.go +++ b/pkg/agent/secondarynetwork/cnipodcache/types.go @@ -20,11 +20,21 @@ type CNIConfigInfo struct { PodNamespace string ContainerID string ContainerNetNS string - MTU int PodCNIDeleted bool - // Uses interface name as a key and the network/CNI config (obtained from network-attachment-definition) as value. + // Interfaces is a map that stores the secondary interface information with interface + // name to be the key. + Interfaces map[string]*InterfaceInfo +} + +type NetworkType string + +type InterfaceInfo struct { + NetworkType NetworkType + HostInterfaceName string + // OVS port UUID for a VLAN interface. + OVSPortUUID string // NOTE: Interface specific network/CNI config required to be maintained for IPAM clean-up needs. - NetworkConfig map[string][]byte + CNIConfig []byte } type CNIPodInfoStore interface { diff --git a/pkg/agent/secondarynetwork/init.go b/pkg/agent/secondarynetwork/init.go index c943f7de40b..eda73d7a824 100644 --- a/pkg/agent/secondarynetwork/init.go +++ b/pkg/agent/secondarynetwork/init.go @@ -47,10 +47,11 @@ func Initialize( podInformer cache.SharedIndexInformer, nodeName string, podCache cnipodcache.CNIPodInfoStore, - interfaceConfigurator podwatch.InterfaceConfigurator, stopCh <-chan struct{}, config *agentconfig.SecondaryNetworkConfig, ovsdb *ovsdb.OVSDB) error { - if err := createOVSBridge(config.OVSBridges, ovsdb); err != nil { + + ovsBridgeClient, err := createOVSBridge(config.OVSBridges, ovsdb) + if err != nil { return err } @@ -63,51 +64,54 @@ func Initialize( // Create podController to handle secondary network configuration for Pods with // k8s.v1.cni.cncf.io/networks Annotation defined. - podWatchController := podwatch.NewPodController( - k8sClient, - netAttachDefClient, - podInformer, - nodeName, - podCache, - interfaceConfigurator) - go podWatchController.Run(stopCh) + if podWatchController, err := podwatch.NewPodController( + k8sClient, netAttachDefClient, podInformer, + nodeName, podCache, ovsBridgeClient); err != nil { + return err + } else { + go podWatchController.Run(stopCh) + } return nil } // TODO: check and update bridge configuration. -func createOVSBridge(bridges []agentconfig.OVSBridgeConfig, ovsdb *ovsdb.OVSDB) error { +func createOVSBridge(bridges []agentconfig.OVSBridgeConfig, ovsdb *ovsdb.OVSDB) (ovsconfig.OVSBridgeClient, error) { if len(bridges) == 0 { - return nil + return nil, nil } // Only one OVS bridge is supported. bridgeConfig := bridges[0] + phyInterface := "" + if len(bridgeConfig.PhysicalInterfaces) > 0 { + phyInterface = bridgeConfig.PhysicalInterfaces[0] + if _, err := interfaceByNameFn(phyInterface); err != nil { + return nil, fmt.Errorf("failed to get interface %s: %v", phyInterface, err) + } + } + ovsBridgeClient := newOVSBridgeFn(bridgeConfig.BridgeName, ovsconfig.OVSDatapathSystem, ovsdb) if err := ovsBridgeClient.Create(); err != nil { - return fmt.Errorf("failed to create OVS bridge %s: %v", bridgeConfig.BridgeName, err) + return nil, fmt.Errorf("failed to create OVS bridge %s: %v", bridgeConfig.BridgeName, err) } klog.InfoS("OVS bridge created", "bridge", bridgeConfig.BridgeName) - if len(bridgeConfig.PhysicalInterfaces) == 0 { - return nil - } - phyInterface := bridgeConfig.PhysicalInterfaces[0] - if _, err := interfaceByNameFn(phyInterface); err != nil { - return fmt.Errorf("failed to get interface %s: %v", phyInterface, err) + if phyInterface == "" { + return ovsBridgeClient, nil } if _, err := ovsBridgeClient.GetOFPort(phyInterface, false); err == nil { klog.V(2).InfoS("Physical interface already connected to OVS bridge, skip the configuration", "device", phyInterface, "bridge", bridgeConfig.BridgeName) - return nil + return ovsBridgeClient, nil } _, err := ovsBridgeClient.CreateUplinkPort(phyInterface, 0, map[string]interface{}{interfacestore.AntreaInterfaceTypeKey: interfacestore.AntreaUplink}) if err != nil { - return fmt.Errorf("failed to create OVS uplink port %s: %v", phyInterface, err) + return nil, fmt.Errorf("failed to create OVS uplink port %s: %v", phyInterface, err) } klog.InfoS("Physical interface added to OVS bridge", "device", phyInterface, "bridge", bridgeConfig.BridgeName) - return nil + return ovsBridgeClient, nil } // CreateNetworkAttachDefClient creates net-attach-def client handle from the given config. diff --git a/pkg/agent/secondarynetwork/init_test.go b/pkg/agent/secondarynetwork/init_test.go index b9538e055dc..a9452d28412 100644 --- a/pkg/agent/secondarynetwork/init_test.go +++ b/pkg/agent/secondarynetwork/init_test.go @@ -98,9 +98,6 @@ func TestCreateOVSBridge(t *testing.T) { ovsBridges: []string{"br1"}, physicalInterfaces: []string{nonExistingInterface, "eth2"}, expectedErr: "failed to get interface", - expectedCalls: func(m *ovsconfigtest.MockOVSBridgeClient) { - m.EXPECT().Create().Return(nil) - }, }, { name: "create port error", @@ -132,11 +129,15 @@ func TestCreateOVSBridge(t *testing.T) { tc.expectedCalls(mockOVSBridgeClient) } - err := createOVSBridge(bridges, nil) + brClient, err := createOVSBridge(bridges, nil) if tc.expectedErr != "" { assert.ErrorContains(t, err, tc.expectedErr) + assert.Nil(t, brClient) } else { require.NoError(t, err) + if tc.expectedCalls != nil { + assert.NotNil(t, brClient) + } } }) } diff --git a/pkg/agent/secondarynetwork/podwatch/controller.go b/pkg/agent/secondarynetwork/podwatch/controller.go index ff1f401e838..99f11475823 100644 --- a/pkg/agent/secondarynetwork/podwatch/controller.go +++ b/pkg/agent/secondarynetwork/podwatch/controller.go @@ -36,9 +36,10 @@ import ( netdefclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" netdefutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" - cniserver "antrea.io/antrea/pkg/agent/cniserver" + "antrea.io/antrea/pkg/agent/cniserver" cnipodcache "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" - ipam "antrea.io/antrea/pkg/agent/secondarynetwork/ipam" + "antrea.io/antrea/pkg/agent/secondarynetwork/ipam" + "antrea.io/antrea/pkg/ovs/ovsconfig" ) const ( @@ -46,6 +47,8 @@ const ( minRetryDelay = 2 * time.Second maxRetryDelay = 120 * time.Second numWorkers = 4 + // Set resyncPeriod to 0 to disable resyncing. + resyncPeriod = 0 * time.Minute ) const ( @@ -54,28 +57,21 @@ const ( defaultSecondaryInterfaceName = "eth1" startIfaceIndex = 1 endIfaceIndex = 101 -) -// Set resyncPeriod to 0 to disable resyncing. -const resyncPeriod = 0 * time.Minute + interfaceDefaultMTU = 1500 + vlanIDMax = 4094 +) var ( // ipamDelegator is used to request IP addresses for secondary network // interfaces. It can be overridden by unit tests. ipamDelegator ipam.IPAMDelegator = ipam.NewIPAMDelegator() - // getPodContainerDeviceIDs is used to retrieve SRIOV device IDs - // assigned to a specific Pod. It can be overridden by unit tests. - getPodContainerDeviceIDs = cniserver.GetPodContainerDeviceIDs ) -// Structure to associate a unique VF's PCI Address to the Linux ethernet interface. -type podSriovVFDeviceIDInfo struct { - vfDeviceID string - ifName string -} - type InterfaceConfigurator interface { - ConfigureSriovSecondaryInterface(podName string, podNamespace string, containerID string, containerNetNS string, containerIFDev string, mtu int, podSriovVFDeviceID string, result *current.Result) error + ConfigureSriovSecondaryInterface(podName, podNamespace, containerID, containerNetNS, containerInterfaceName string, mtu int, podSriovVFDeviceID string, result *current.Result) error + ConfigureVLANSecondaryInterface(podName, podNamespace, containerID, containerNetNS, containerInterfaceName string, mtu int, vlanID uint16, result *current.Result) (string, error) + DeleteVLANSecondaryInterface(containerID, hostInterfaceName, ovsPortUUID string) error } type PodController struct { @@ -86,6 +82,7 @@ type PodController struct { nodeName string podCache cnipodcache.CNIPodInfoStore interfaceConfigurator InterfaceConfigurator + ovsBridgeClient ovsconfig.OVSBridgeClient vfDeviceIDUsageMap sync.Map } @@ -95,8 +92,12 @@ func NewPodController( podInformer cache.SharedIndexInformer, nodeName string, podCache cnipodcache.CNIPodInfoStore, - interfaceConfigurator InterfaceConfigurator, -) *PodController { + ovsBridgeClient ovsconfig.OVSBridgeClient, +) (*PodController, error) { + interfaceConfigurator, err := cniserver.NewSecondaryInterfaceConfigurator(ovsBridgeClient) + if err != nil { + return nil, fmt.Errorf("failed to create SecondaryInterfaceConfigurator: %v", err) + } pc := PodController{ kubeClient: kubeClient, netAttachDefClient: netAttachDefClient, @@ -105,6 +106,7 @@ func NewPodController( nodeName: nodeName, podCache: podCache, interfaceConfigurator: interfaceConfigurator, + ovsBridgeClient: ovsBridgeClient, } podInformer.AddEventHandlerWithResyncPeriod( cache.ResourceEventHandlerFuncs{ @@ -114,75 +116,22 @@ func NewPodController( }, resyncPeriod, ) - return &pc + return &pc, nil } func podKeyGet(pod *corev1.Pod) string { return pod.Namespace + "/" + pod.Name } -// buildVFDeviceIDListPerPod is a helper function to build a cache structure with the -// list of all the PCI addresses allocated per Pod based on their resource requests (in Pod spec). -// When there is a request for a VF resource (to associate it for a secondary network interface), -// getUnusedSriovVFDeviceIDPerPod will use this cache information to pick up a unique PCI address -// which is still not associated with a network device name. -// NOTE: buildVFDeviceIDListPerPod is called only if a Pod specific VF to Interface mapping cache -// was not build earlier. Sample initial entry per Pod: "{18:01.1,""},{18:01.2,""},{18:01.3,""}" -func (pc *PodController) buildVFDeviceIDListPerPod(podName, podNamespace string) ([]podSriovVFDeviceIDInfo, error) { - podKey := podNamespace + "/" + podName - deviceCache, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) - if cacheFound { - return deviceCache.([]podSriovVFDeviceIDInfo), nil - } - podSriovVFDeviceIDs, err := getPodContainerDeviceIDs(podName, podNamespace) - if err != nil { - return nil, fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) - } - var vfDeviceIDInfoCache []podSriovVFDeviceIDInfo - for _, pciAddress := range podSriovVFDeviceIDs { - initSriovVfDeviceID := podSriovVFDeviceIDInfo{vfDeviceID: pciAddress, ifName: ""} - vfDeviceIDInfoCache = append(vfDeviceIDInfoCache, initSriovVfDeviceID) - } - pc.vfDeviceIDUsageMap.Store(podKey, vfDeviceIDInfoCache) - klog.V(2).InfoS("Pod specific SRIOV VF cache created", "Key", podKey) - return vfDeviceIDInfoCache, nil -} - -func (pc *PodController) deleteVFDeviceIDListPerPod(podName, podNamespace string) { - podKey := podNamespace + "/" + podName - _, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) - if cacheFound { - pc.vfDeviceIDUsageMap.Delete(podKey) - klog.V(2).InfoS("Pod specific SRIOV VF cache cleared", "Key", podKey) - } - return -} - -func (pc *PodController) assignUnusedSriovVFDeviceIDPerPod(podName, podNamespace, interfaceName string) (string, error) { - var cache []podSriovVFDeviceIDInfo - cache, err := pc.buildVFDeviceIDListPerPod(podName, podNamespace) - if err != nil { - return "", err - } - for idx := 0; idx < len(cache); idx++ { - if cache[idx].ifName == "" { - // Unused PCI address found. Associate PCI address to the interface. - cache[idx].ifName = interfaceName - return cache[idx].vfDeviceID, nil - } - } - return "", err -} - func generatePodSecondaryIfaceName(podCNIInfo *cnipodcache.CNIConfigInfo) (string, error) { - // Assign default interface name, if podCNIInfo.NetworkConfig is empty. - if count := len(podCNIInfo.NetworkConfig); count == 0 { + // Assign default interface name, if podCNIInfo.Interfaces is empty. + if len(podCNIInfo.Interfaces) == 0 { return defaultSecondaryInterfaceName, nil } else { // Generate new interface name (eth1,eth2..eth100) and return to caller. for ifaceIndex := startIfaceIndex; ifaceIndex < endIfaceIndex; ifaceIndex++ { ifName := fmt.Sprintf("%s%d", "eth", ifaceIndex) - _, exist := podCNIInfo.NetworkConfig[ifName] + _, exist := podCNIInfo.Interfaces[ifName] if !exist { return ifName, nil } @@ -203,22 +152,33 @@ func whereaboutsArgsBuilder(cmd string, interfaceName string, podCNIInfo *cnipod } -func removePodAllSecondaryNetwork(podCNIInfo *cnipodcache.CNIConfigInfo) error { +func (pc *PodController) deletePodSecondaryNetwork(podCNIInfo *cnipodcache.CNIConfigInfo) error { var cmdArgs *invoke.Args - // Clean-up IPAM at whereabouts db (etcd or kubernetes API server) for all the secondary networks of the Pod which is getting removed. - // PluginArgs added to provide additional arguments required for whereabouts v0.5.1 and above. - // NOTE: SR-IOV VF interface clean-up, upon Pod delete will be handled by SR-IOV device plugin. Not handled here. + // Clean-up IPAM at Whereabouts DB (etcd or Kubernetes API server) for all secondary + // networks of the Pod which is getting removed. + // NOTE: SR-IOV VF interface clean-up, upon Pod delete will be handled by SR-IOV device + // plugin. Not handled here. + // PluginArgs added to provide additional arguments required for Whereabouts v0.5.1 and + // above. cmdArgs = whereaboutsArgsBuilder("DEL", "", podCNIInfo) - // example: podCNIInfo.NetworkConfig = {"eth1": net1-cniconfig, "eth2": net2-cniconfig} - for secNetInstIface, secNetInstConfig := range podCNIInfo.NetworkConfig { + // example: podCNIInfo.Interfaces = {"eth1": net1-cniconfig, "eth2": net2-cniconfig} + for secNetInstIface, interfaceInfo := range podCNIInfo.Interfaces { + if interfaceInfo.NetworkType == vlanNetworkType { + if err := pc.interfaceConfigurator.DeleteVLANSecondaryInterface(podCNIInfo.ContainerID, + interfaceInfo.HostInterfaceName, interfaceInfo.OVSPortUUID); err != nil { + return err + } + } + cmdArgs.IfName = secNetInstIface // Do DelIPAMSubnetAddress on network config (secNetInstConfig) and command argument (updated with interface name). - err := ipamDelegator.DelIPAMSubnetAddress(secNetInstConfig, cmdArgs) + err := ipamDelegator.DelIPAMSubnetAddress(interfaceInfo.CNIConfig, cmdArgs) if err != nil { return fmt.Errorf("Failed to clean-up whereabouts IPAM %v", err) } + // Delete map entry for secNetInstIface, secNetInstConfig - delete(podCNIInfo.NetworkConfig, secNetInstIface) + delete(podCNIInfo.Interfaces, secNetInstIface) } return nil } @@ -252,6 +212,7 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { // Note: Return nil here to unqueue Pod add event. Secondary network configuration will be handled with Pod update event. return nil } + secondaryNetwork, ok := checkForPodSecondaryNetworkAttachement(pod) if !ok { // NOTE: We do not handle Pod annotation deletion/update scenario at present. @@ -265,10 +226,11 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { // Valid cache entry retrieved from cache and we received a Pod add or update event. // Avoid processing Pod annotation, if we already have at least one secondary network successfully configured on this Pod. // We do not support/handle Annotation updates yet. - if len(podCNIInfo.NetworkConfig) > 0 { + if len(podCNIInfo.Interfaces) > 0 { klog.InfoS("Secondary network already configured on this Pod and annotation update not supported, skipping update", "pod", klog.KObj(pod)) return nil } + // Parse Pod annotation and proceed with the secondary network configuration. networklist, err := netdefutils.ParseNetworkAnnotation(secondaryNetwork, pod.Namespace) if err != nil { @@ -278,9 +240,9 @@ func (pc *PodController) handleAddUpdatePod(obj interface{}) error { return nil } - err = pc.configureSecondaryNetwork(pod, networklist, podCNIInfo) + err = pc.configurePodSecondaryNetwork(pod, networklist, podCNIInfo) // We do not return error to retry, if at least one secondary network is configured. - if (err != nil) && (len(podCNIInfo.NetworkConfig) == 0) { + if (err != nil) && (len(podCNIInfo.Interfaces) == 0) { // Return error to requeue and retry. return err } @@ -293,16 +255,16 @@ func (pc *PodController) handleRemovePod(key string) error { // Read the CNI info (stored during Pod creation by cniserver) from cache. // Delete CNI info shared in cache for a specific Pod which is getting removed/deleted. podCNIInfo := pc.podCache.GetAllCNIConfigInfoPerPod(pod[1], pod[0]) - for _, containerInfo := range podCNIInfo { - // Release IPAM of all the secondary interfaces and delete CNI cache. - if err = removePodAllSecondaryNetwork(containerInfo); err != nil { - // Return error to requeue pod delete. + for _, info := range podCNIInfo { + // Delete all secondary interfaces and release IPAM. + if err = pc.deletePodSecondaryNetwork(info); err != nil { + // Return error to requeue Pod delete. return err } else { // Delete cache entry from podCNIInfo. - pc.podCache.DeleteCNIConfigInfo(containerInfo) + pc.podCache.DeleteCNIConfigInfo(info) // Delete Pod specific VF cache (if one exists) - pc.deleteVFDeviceIDListPerPod(containerInfo.PodName, containerInfo.PodNamespace) + pc.deleteVFDeviceIDListPerPod(info.PodName, info.PodNamespace) } } return nil @@ -340,31 +302,15 @@ func (pc *PodController) processNextWorkItem() bool { return true } -// Configure SRIOV VF as a Secondary Network Interface. -func (pc *PodController) configureSriovAsSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, containerInfo *cnipodcache.CNIConfigInfo, result *current.Result) error { - podSriovVFDeviceID, err := pc.assignUnusedSriovVFDeviceIDPerPod(pod.Name, pod.Namespace, network.InterfaceRequest) - if err != nil { - return fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) - } - - if err = pc.interfaceConfigurator.ConfigureSriovSecondaryInterface( - containerInfo.PodName, - containerInfo.PodNamespace, - containerInfo.ContainerID, - containerInfo.ContainerNetNS, - network.InterfaceRequest, - containerInfo.MTU, - podSriovVFDeviceID, - result, - ); err != nil { - return fmt.Errorf("SRIOV Interface creation failed: %v", err) - } - return nil -} - // Configure Secondary Network Interface. -func (pc *PodController) configureSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo, cniConfig []byte) error { - // Generate and assign new interface name, If secondary interface name was not provided in Pod annotation. +func (pc *PodController) configureSecondaryInterface( + pod *corev1.Pod, + network *netdefv1.NetworkSelectionElement, + podCNIInfo *cnipodcache.CNIConfigInfo, + cniConfig []byte, + networkConfig *SecondaryNetworkConfig) error { + // Generate a new interface name, if the secondary interface name was not provided in the + // Pod annotation. if len(network.InterfaceRequest) == 0 { var err error if network.InterfaceRequest, err = generatePodSecondaryIfaceName(podCNIInfo); err != nil { @@ -386,29 +332,52 @@ func (pc *PodController) configureSecondaryInterface(pod *corev1.Pod, network *n for _, ip := range result.IPs { ip.Interface = current.Int(1) } - // Configure SRIOV as a secondary network interface - if err := pc.configureSriovAsSecondaryInterface(pod, network, podCNIInfo, result); err != nil { + + var ovsPortUUID string + var ifConfigErr error + switch networkConfig.NetworkType { + case sriovNetworkType: + ifConfigErr = pc.configureSriovAsSecondaryInterface(pod, network, podCNIInfo, int(networkConfig.MTU), result) + case vlanNetworkType: + ovsPortUUID, ifConfigErr = pc.interfaceConfigurator.ConfigureVLANSecondaryInterface( + podCNIInfo.PodName, podCNIInfo.PodNamespace, + podCNIInfo.ContainerID, podCNIInfo.ContainerNetNS, network.InterfaceRequest, + int(networkConfig.MTU), uint16(networkConfig.VLAN), + result) + } + + if ifConfigErr != nil { // SRIOV interface creation failed. Free allocated IP address if err := ipamDelegator.DelIPAMSubnetAddress(cniConfig, cmdArgs); err != nil { klog.ErrorS(err, "IPAM de-allocation failed: ", err) } - return err + return ifConfigErr } // Update Pod CNI cache with the network config which was successfully configured. - if podCNIInfo.NetworkConfig == nil { - podCNIInfo.NetworkConfig = make(map[string][]byte) - } - podCNIInfo.NetworkConfig[network.InterfaceRequest] = cniConfig + if podCNIInfo.Interfaces == nil { + podCNIInfo.Interfaces = make(map[string]*cnipodcache.InterfaceInfo) + } + hostInterfaceName := "" + if len(result.Interfaces) > 0 { + // In mock tests, result.Interfaces can be nil + hostInterfaceName = result.Interfaces[0].Name + } + interfaceInfo := cnipodcache.InterfaceInfo{ + NetworkType: networkConfig.NetworkType, + HostInterfaceName: hostInterfaceName, + OVSPortUUID: ovsPortUUID, + CNIConfig: cniConfig} + podCNIInfo.Interfaces[network.InterfaceRequest] = &interfaceInfo return nil } -func (pc *PodController) configureSecondaryNetwork(pod *corev1.Pod, networklist []*netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo) error { +func (pc *PodController) configurePodSecondaryNetwork(pod *corev1.Pod, networklist []*netdefv1.NetworkSelectionElement, podCNIInfo *cnipodcache.CNIConfigInfo) error { for _, network := range networklist { - klog.InfoS("Secondary Network attached to Pod", "network", network, "Pod", klog.KObj(pod)) + klog.V(2).InfoS("Secondary Network attached to Pod", "network", network, "Pod", klog.KObj(pod)) netDefCRD, err := pc.netAttachDefClient.NetworkAttachmentDefinitions(network.Namespace).Get(context.TODO(), network.Name, metav1.GetOptions{}) if err != nil { // NetworkAttachmentDefinition not found at this time. Return error to re-queue and re-try. - return fmt.Errorf("NetworkAttachmentDefinition Get failed: %v", err) + return fmt.Errorf("failed to get NetworkAttachmentDefinition: %v", err) } cniConfig, err := netdefutils.GetCNIConfig(netDefCRD, "") if err != nil { @@ -425,13 +394,28 @@ func (pc *PodController) configureSecondaryNetwork(pod *corev1.Pod, networklist klog.InfoS("NetworkAttachmentDefinition is not of type 'antrea', ignoring", "NetworkAttachmentDefinition", klog.KObj(netDefCRD)) continue } - if networkConfig.NetworkType != sriovNetworkType { + if networkConfig.NetworkType != sriovNetworkType && networkConfig.NetworkType != vlanNetworkType { // same as above, if updated, we will not process the request again. - klog.ErrorS(err, "NetworkType not supported for Pod", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) + klog.ErrorS(err, "Secondary network type not supported", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod)) continue } + if networkConfig.NetworkType == vlanNetworkType { + if networkConfig.VLAN > vlanIDMax || networkConfig.VLAN < 0 { + klog.ErrorS(nil, "Invalid VLAN ID", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod), "VLAN", networkConfig.VLAN) + continue + } + } + if networkConfig.MTU < 0 { + klog.ErrorS(nil, "Invalid MTU", "NetworkAttachmentDefinition", klog.KObj(netDefCRD), "Pod", klog.KObj(pod), "MTU", networkConfig.MTU) + continue + } + if networkConfig.MTU == 0 { + // TODO: use the physical interface MTU as the default. + networkConfig.MTU = interfaceDefaultMTU + } // secondary network information retrieved from API server. Proceed to configure secondary interface now. - if err = pc.configureSecondaryInterface(pod, network, podCNIInfo, cniConfig); err != nil { + if err = pc.configureSecondaryInterface(pod, network, podCNIInfo, cniConfig, &networkConfig); err != nil { + klog.ErrorS(err, "Secondary interface configuration failed", "Pod", klog.KObj(pod), "networkType", networkConfig.NetworkType) // Secondary interface configuration failed. return error to re-queue and re-try. return fmt.Errorf("secondary interface configuration failed: %v", err) } diff --git a/pkg/agent/secondarynetwork/podwatch/controller_test.go b/pkg/agent/secondarynetwork/podwatch/controller_test.go index c265dc262e9..58e6602892c 100644 --- a/pkg/agent/secondarynetwork/podwatch/controller_test.go +++ b/pkg/agent/secondarynetwork/podwatch/controller_test.go @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !windows +// +build !windows + package podwatch import ( + "bytes" "context" "encoding/json" + "errors" "fmt" "net" + "strings" "sync" "sync/atomic" "testing" + "text/template" "time" current "github.com/containernetworking/cni/pkg/types/100" "github.com/golang/mock/gomock" netdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" netdefclientfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + netdefutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -46,14 +54,16 @@ const ( testNamespace = "nsA" testNode = "test-node" - // the ipam information is not actually used when testing, given that we + // The IPAM information is not actually used when testing, given that we // use a mock IPAMDelegator. But this is what the ipam information would // look like when using the actual IPAMDelegator implementation, which - // invokes the whereabouts plugin. - netAttachConfig = `{ + // invokes the Whereabouts plugin. + netAttachTemplate = `{ "cniVersion": "0.3.0", - "type": "antrea", - "networkType": "sriov", + "type": "{{.CNIType}}", + "networkType": "{{.NetworkType}}", + "mtu": {{.MTU}}, + "vlan": {{.VLAN}}, "ipam": { "type": "whereabouts", "datastore": "kubernetes", @@ -66,21 +76,37 @@ const ( defaultMTU = 1500 sriovDeviceID = "sriov-device-id" - podName = "pod1" containerID = "container1" podIP = "1.2.3.4" networkName = "net" interfaceName = "eth2" + ovsPortUUID = "12345678-e29b-41d4-a716-446655440000" ) -func testNetwork(name string) *netdefv1.NetworkAttachmentDefinition { +func testNetwork(name string, networkType cnipodcache.NetworkType) *netdefv1.NetworkAttachmentDefinition { + return testNetworkExt(name, "", networkType, 0, 0) +} + +func testNetworkExt(name, cniType string, networkType cnipodcache.NetworkType, mtu int, vlan int) *netdefv1.NetworkAttachmentDefinition { + if cniType == "" { + cniType = "antrea" + } + data := struct { + CNIType string + NetworkType string + MTU int + VLAN int + }{cniType, string(networkType), mtu, vlan} + tmpl := template.Must(template.New("test").Parse(netAttachTemplate)) + var b bytes.Buffer + tmpl.Execute(&b, &data) return &netdefv1.NetworkAttachmentDefinition{ ObjectMeta: metav1.ObjectMeta{ Name: name, }, Spec: netdefv1.NetworkAttachmentDefinitionSpec{ - Config: netAttachConfig, + Config: b.String(), }, } } @@ -127,7 +153,6 @@ func testPod(name string, container string, podIP string, networks ...netdefv1.N PodNamespace: testNamespace, ContainerID: container, ContainerNetNS: containerNetNs(container), - MTU: defaultMTU, PodCNIDeleted: false, } return pod, cniConfig @@ -145,7 +170,7 @@ func testIPAMResult(cidr string) *current.Result { } func init() { - getPodContainerDeviceIDs = func(name string, namespace string) ([]string, error) { + getPodContainerDeviceIDsFn = func(name string, namespace string) ([]string, error) { return []string{sriovDeviceID}, nil } } @@ -159,14 +184,14 @@ func TestPodControllerRun(t *testing.T) { interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) ipamDelegator = mockIPAM - podController := NewPodController( + podController, _ := NewPodController( client, netdefclient, informerFactory.Core().V1().Pods().Informer(), testNode, podCache, - interfaceConfigurator, - ) + nil) + podController.interfaceConfigurator = interfaceConfigurator stopCh := make(chan struct{}) informerFactory.Start(stopCh) @@ -183,7 +208,7 @@ func TestPodControllerRun(t *testing.T) { Name: networkName, InterfaceRequest: interfaceName, }) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) ipamResult := testIPAMResult("148.14.24.100/24") @@ -233,27 +258,187 @@ func TestPodControllerRun(t *testing.T) { wg.Wait() } -func TestPodControllerAddPod(t *testing.T) { - newPodController := func(ctrl *gomock.Controller) (*PodController, *ipamtesting.MockIPAMDelegator, *podwatchtesting.MockInterfaceConfigurator) { - client := fake.NewSimpleClientset() - netdefclient := netdefclientfake.NewSimpleClientset().K8sCniCncfIoV1() - informerFactory := informers.NewSharedInformerFactory(client, resyncPeriod) - podCache := cnipodcache.NewCNIPodInfoStore() - interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) - mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) - ipamDelegator = mockIPAM - // PodController object without event handlers - return &PodController{ - kubeClient: client, - netAttachDefClient: netdefclient, - queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(minRetryDelay, maxRetryDelay), "podcontroller"), - podInformer: informerFactory.Core().V1().Pods().Informer(), - nodeName: testNode, - podCache: podCache, - interfaceConfigurator: interfaceConfigurator, - }, mockIPAM, interfaceConfigurator +func TestConfigurePodSecondaryNetwork(t *testing.T) { + element1 := netdefv1.NetworkSelectionElement{ + Name: networkName, + Namespace: testNamespace, + InterfaceRequest: interfaceName, + } + ctrl := gomock.NewController(t) + + tests := []struct { + name string + cniType string + networkType cnipodcache.NetworkType + mtu int + vlan int + doNotCreateNetwork bool + interfaceCreated bool + expectedErr string + expectedCalls func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) + }{ + { + name: "VLAN network", + networkType: vlanNetworkType, + mtu: 1600, + vlan: 101, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1600, + uint16(101), + gomock.Any(), + ).Return(ovsPortUUID, nil) + }, + }, + { + name: "default MTU", + networkType: vlanNetworkType, + vlan: 0, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1500, + uint16(0), + gomock.Any(), + ).Return(ovsPortUUID, nil) + }, + }, + { + name: "SRIOV network", + networkType: sriovNetworkType, + mtu: 1500, + interfaceCreated: true, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureSriovSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1500, + sriovDeviceID, + gomock.Any(), + ).Return(nil) + }, + }, + { + name: "network not found", + networkType: vlanNetworkType, + mtu: 1500, + vlan: 100, + doNotCreateNetwork: true, + expectedErr: "failed to get NetworkAttachmentDefinition:", + }, + { + name: "non-Antrea network", + cniType: "non-antrea", + networkType: vlanNetworkType, + mtu: 1500, + vlan: 100, + }, + { + name: "unsupported network", + networkType: "unsupported", + }, + { + name: "negative MTU", + networkType: sriovNetworkType, + mtu: -1, + }, + { + name: "invalid VLAN", + networkType: vlanNetworkType, + vlan: 4095, + }, + { + name: "negative VLAN", + networkType: vlanNetworkType, + vlan: -200, + }, + { + name: "IPAM failure", + networkType: sriovNetworkType, + mtu: 1500, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), errors.New("failure")) + }, + expectedErr: "secondary network IPAM failed", + }, + { + name: "interface failure", + networkType: vlanNetworkType, + mtu: 1600, + vlan: 101, + expectedCalls: func(mockIPAM *ipamtesting.MockIPAMDelegator, mockIC *podwatchtesting.MockInterfaceConfigurator) { + mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) + mockIC.EXPECT().ConfigureVLANSecondaryInterface( + podName, + testNamespace, + containerID, + containerNetNs(containerID), + interfaceName, + 1600, + uint16(101), + gomock.Any(), + ).Return("", errors.New("interface creation failure")) + mockIPAM.EXPECT().DelIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(nil) + }, + expectedErr: "interface creation failure", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + pod, cniConfigInfo := testPod(podName, containerID, podIP, element1) + savedCNIConfig := *cniConfigInfo + + pc, mockIPAM, interfaceConfigurator := testPodController(ctrl) + network1 := testNetworkExt(networkName, tc.cniType, tc.networkType, tc.mtu, tc.vlan) + if !tc.doNotCreateNetwork { + pc.netAttachDefClient.NetworkAttachmentDefinitions(testNamespace).Create(context.Background(), network1, metav1.CreateOptions{}) + } + if tc.expectedCalls != nil { + tc.expectedCalls(mockIPAM, interfaceConfigurator) + } + err := pc.configurePodSecondaryNetwork(pod, []*netdefv1.NetworkSelectionElement{&element1}, cniConfigInfo) + if tc.expectedErr == "" { + assert.Nil(t, err) + } else { + assert.True(t, strings.Contains(err.Error(), tc.expectedErr)) + } + + if tc.interfaceCreated { + config1, _ := netdefutils.GetCNIConfig(network1, "") + info := cnipodcache.InterfaceInfo{ + NetworkType: tc.networkType, + CNIConfig: config1, + } + if tc.networkType == vlanNetworkType { + info.OVSPortUUID = ovsPortUUID + } + savedCNIConfig.Interfaces = map[string]*cnipodcache.InterfaceInfo{interfaceName: &info} + } + assert.Equal(t, &savedCNIConfig, cniConfigInfo) + }) } +} + +func TestPodControllerAddPod(t *testing.T) { pod, cniConfig := testPod(podName, containerID, podIP, netdefv1.NetworkSelectionElement{ Name: networkName, InterfaceRequest: interfaceName, @@ -261,7 +446,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing network", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -270,7 +455,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("multiple network interfaces", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) pod, cniConfig := testPod( podName, @@ -285,8 +470,10 @@ func TestPodControllerAddPod(t *testing.T) { InterfaceRequest: "eth11", }, ) - network1 := testNetwork("net1") - network2 := testNetwork("net2") + savedCNIConfig := *cniConfig + network1 := testNetwork("net1", sriovNetworkType) + testVLAN := 100 + network2 := testNetworkExt("net2", "", vlanNetworkType, defaultMTU, testVLAN) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -294,20 +481,20 @@ func TestPodControllerAddPod(t *testing.T) { containerID, containerNetNs(containerID), "eth10", - defaultMTU, + interfaceDefaultMTU, gomock.Any(), gomock.Any(), ) - interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( + interfaceConfigurator.EXPECT().ConfigureVLANSecondaryInterface( podName, testNamespace, containerID, containerNetNs(containerID), "eth11", defaultMTU, + uint16(testVLAN), gomock.Any(), - gomock.Any(), - ) + ).Return(ovsPortUUID, nil) mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.100/24"), nil) mockIPAM.EXPECT().GetIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(testIPAMResult("148.14.24.101/24"), nil) @@ -320,11 +507,37 @@ func TestPodControllerAddPod(t *testing.T) { _, err = podController.netAttachDefClient.NetworkAttachmentDefinitions(testNamespace).Create(context.Background(), network2, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test NetworkAttachmentDefinition") assert.NoError(t, podController.handleAddUpdatePod(pod)) + + infos := podController.podCache.GetAllCNIConfigInfoPerPod(podName, testNamespace) + assert.Equal(t, 1, len(infos)) + config1, _ := netdefutils.GetCNIConfig(network1, "") + config2, _ := netdefutils.GetCNIConfig(network2, "") + savedCNIConfig.Interfaces = map[string]*cnipodcache.InterfaceInfo{ + "eth10": { + NetworkType: sriovNetworkType, + CNIConfig: config1, + }, + "eth11": { + OVSPortUUID: ovsPortUUID, + NetworkType: vlanNetworkType, + CNIConfig: config2, + }, + } + assert.Equal(t, &savedCNIConfig, infos[0]) + + mockIPAM.EXPECT().DelIPAMSubnetAddress(gomock.Any(), gomock.Any()).Return(nil).Times(2) + interfaceConfigurator.EXPECT().DeleteVLANSecondaryInterface( + containerID, + gomock.Any(), + ovsPortUUID).Return(nil) + assert.NoError(t, podController.handleRemovePod(testNamespace+"/"+podName)) + infos = podController.podCache.GetAllCNIConfigInfoPerPod(podName, testNamespace) + assert.Equal(t, 0, len(infos)) }) t.Run("no network interfaces", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, podIP) @@ -336,9 +549,9 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing podcache entry", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -349,10 +562,10 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("missing Status.PodIPs", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, "") - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) @@ -364,18 +577,11 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("different Namespace for Pod and NetworkAttachmentDefinition", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) networkNamespace := "nsB" - network := &netdefv1.NetworkAttachmentDefinition{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: networkNamespace, - Name: networkName, - }, - Spec: netdefv1.NetworkAttachmentDefinitionSpec{ - Config: netAttachConfig, - }, - } + network := testNetwork(networkName, sriovNetworkType) + network.Namespace = networkNamespace pod, cniConfig := testPod(podName, containerID, podIP, netdefv1.NetworkSelectionElement{ Namespace: networkNamespace, @@ -405,7 +611,7 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("no interface name", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) pod, cniConfig := testPod( podName, @@ -420,7 +626,7 @@ func TestPodControllerAddPod(t *testing.T) { InterfaceRequest: "", }, ) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -456,9 +662,9 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("error when creating interface", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, mockIPAM, interfaceConfigurator := newPodController(ctrl) + podController, mockIPAM, interfaceConfigurator := testPodController(ctrl) - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) interfaceConfigurator.EXPECT().ConfigureSriovSecondaryInterface( podName, @@ -484,13 +690,13 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("invalid networks annotation", func(t *testing.T) { ctrl := gomock.NewController(t) - podController, _, _ := newPodController(ctrl) + podController, _, _ := testPodController(ctrl) pod, cniConfig := testPod(podName, containerID, podIP) pod.Annotations = map[string]string{ networkAttachDefAnnotationKey: "", } - network := testNetwork(networkName) + network := testNetwork(networkName, sriovNetworkType) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) @@ -503,8 +709,8 @@ func TestPodControllerAddPod(t *testing.T) { t.Run("Error when adding VF deviceID cache per Pod", func(t *testing.T) { ctrl := gomock.NewController(t) - network := testNetwork(networkName) - podController, _, _ := newPodController(ctrl) + network := testNetwork(networkName, sriovNetworkType) + podController, _, _ := testPodController(ctrl) podController.podCache.AddCNIConfigInfo(cniConfig) _, err := podController.kubeClient.CoreV1().Pods(testNamespace).Create(context.Background(), pod, metav1.CreateOptions{}) require.NoError(t, err, "error when creating test Pod") @@ -519,3 +725,23 @@ func TestPodControllerAddPod(t *testing.T) { }) } + +func testPodController(ctrl *gomock.Controller) (*PodController, *ipamtesting.MockIPAMDelegator, *podwatchtesting.MockInterfaceConfigurator) { + client := fake.NewSimpleClientset() + netdefclient := netdefclientfake.NewSimpleClientset().K8sCniCncfIoV1() + informerFactory := informers.NewSharedInformerFactory(client, resyncPeriod) + podCache := cnipodcache.NewCNIPodInfoStore() + interfaceConfigurator := podwatchtesting.NewMockInterfaceConfigurator(ctrl) + mockIPAM := ipamtesting.NewMockIPAMDelegator(ctrl) + ipamDelegator = mockIPAM + // PodController object without event handlers + return &PodController{ + kubeClient: client, + netAttachDefClient: netdefclient, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(minRetryDelay, maxRetryDelay), "podcontroller"), + podInformer: informerFactory.Core().V1().Pods().Informer(), + nodeName: testNode, + podCache: podCache, + interfaceConfigurator: interfaceConfigurator, + }, mockIPAM, interfaceConfigurator +} diff --git a/pkg/agent/secondarynetwork/podwatch/sriov.go b/pkg/agent/secondarynetwork/podwatch/sriov.go new file mode 100644 index 00000000000..adbb613a228 --- /dev/null +++ b/pkg/agent/secondarynetwork/podwatch/sriov.go @@ -0,0 +1,173 @@ +// Copyright 2023 Antrea Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package podwatch + +import ( + "context" + "fmt" + "net" + "path" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + // Version v1 of the kubelet API was introduced in K8s v1.20. + // Using version v1alpha1 instead to support older K8s versions. + current "github.com/containernetworking/cni/pkg/types/100" + netdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "google.golang.org/grpc" + grpcinsecure "google.golang.org/grpc/credentials/insecure" + podresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1" + + cnipodcache "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" + "antrea.io/antrea/pkg/agent/util" +) + +const ( + kubeletPodResourcesPath = "/var/lib/kubelet/pod-resources" + kubeletSocket = "kubelet.sock" + connectionTimeout = 10 * time.Second +) + +var ( + // getPodContainerDeviceIDsFn is used to retrieve SRIOV device IDs + // assigned to a specific Pod. It can be overridden by unit tests. + getPodContainerDeviceIDsFn = getPodContainerDeviceIDs +) + +type KubeletPodResources struct { + resources []*podresourcesv1alpha1.PodResources +} + +// Structure to associate a unique VF's PCI Address to the Linux ethernet interface. +type podSriovVFDeviceIDInfo struct { + vfDeviceID string + ifName string +} + +// getPodContainerDeviceIDs returns the device IDs assigned to a Pod's containers. +func getPodContainerDeviceIDs(podName string, podNamespace string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout) + defer cancel() + + conn, err := grpc.DialContext( + ctx, + path.Join(kubeletPodResourcesPath, kubeletSocket), + grpc.WithTransportCredentials(grpcinsecure.NewCredentials()), + grpc.WithContextDialer(func(ctx context.Context, addr string) (conn net.Conn, e error) { + return util.DialLocalSocket(addr) + }), + ) + if err != nil { + return []string{}, fmt.Errorf("error getting the gRPC client for Pod resources: %v", err) + } + + defer conn.Close() + + client := podresourcesv1alpha1.NewPodResourcesListerClient(conn) + if client == nil { + return []string{}, fmt.Errorf("error getting the lister client for Pod resources") + } + + podResources, err := client.List(ctx, &podresourcesv1alpha1.ListPodResourcesRequest{}) + if err != nil { + return []string{}, fmt.Errorf("error getting the Pod resources: %v %v", podResources, err) + } + + var podDeviceIDs []string + var kpr KubeletPodResources + kpr.resources = podResources.GetPodResources() + for _, pr := range kpr.resources { + if pr.Name == podName && pr.Namespace == podNamespace { + for _, ctr := range pr.Containers { + for _, dev := range ctr.Devices { + podDeviceIDs = append(podDeviceIDs, dev.DeviceIds...) + } + } + } + } + klog.V(2).Infof("Pod container device IDs of %s/%s are: %v", podNamespace, podName, podDeviceIDs) + return podDeviceIDs, nil +} + +// buildVFDeviceIDListPerPod is a helper function to build a cache structure with the +// list of all the PCI addresses allocated per Pod based on their resource requests (in Pod spec). +// When there is a request for a VF resource (to associate it for a secondary network interface), +// getUnusedSriovVFDeviceIDPerPod will use this cache information to pick up a unique PCI address +// which is still not associated with a network device name. +// NOTE: buildVFDeviceIDListPerPod is called only if a Pod specific VF to Interface mapping cache +// was not build earlier. Sample initial entry per Pod: "{18:01.1,""},{18:01.2,""},{18:01.3,""}" +func (pc *PodController) buildVFDeviceIDListPerPod(podName, podNamespace string) ([]podSriovVFDeviceIDInfo, error) { + podKey := podNamespace + "/" + podName + deviceCache, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) + if cacheFound { + return deviceCache.([]podSriovVFDeviceIDInfo), nil + } + podSriovVFDeviceIDs, err := getPodContainerDeviceIDsFn(podName, podNamespace) + if err != nil { + return nil, fmt.Errorf("getPodContainerDeviceIDs failed: %v", err) + } + var vfDeviceIDInfoCache []podSriovVFDeviceIDInfo + for _, pciAddress := range podSriovVFDeviceIDs { + initSriovVfDeviceID := podSriovVFDeviceIDInfo{vfDeviceID: pciAddress, ifName: ""} + vfDeviceIDInfoCache = append(vfDeviceIDInfoCache, initSriovVfDeviceID) + } + pc.vfDeviceIDUsageMap.Store(podKey, vfDeviceIDInfoCache) + klog.V(2).InfoS("Pod specific SRIOV VF cache created", "Key", podKey) + return vfDeviceIDInfoCache, nil +} + +func (pc *PodController) deleteVFDeviceIDListPerPod(podName, podNamespace string) { + podKey := podNamespace + "/" + podName + _, cacheFound := pc.vfDeviceIDUsageMap.Load(podKey) + if cacheFound { + pc.vfDeviceIDUsageMap.Delete(podKey) + klog.V(2).InfoS("Pod specific SRIOV VF cache cleared", "Key", podKey) + } + return +} + +func (pc *PodController) assignUnusedSriovVFDeviceIDPerPod(podName, podNamespace, interfaceName string) (string, error) { + var cache []podSriovVFDeviceIDInfo + cache, err := pc.buildVFDeviceIDListPerPod(podName, podNamespace) + if err != nil { + return "", err + } + for idx := 0; idx < len(cache); idx++ { + if cache[idx].ifName == "" { + // Unused PCI address found. Associate PCI address to the interface. + cache[idx].ifName = interfaceName + return cache[idx].vfDeviceID, nil + } + } + return "", err +} + +// Configure SRIOV VF as a Secondary Network Interface. +func (pc *PodController) configureSriovAsSecondaryInterface(pod *corev1.Pod, network *netdefv1.NetworkSelectionElement, containerInfo *cnipodcache.CNIConfigInfo, mtu int, result *current.Result) error { + podSriovVFDeviceID, err := pc.assignUnusedSriovVFDeviceIDPerPod(pod.Name, pod.Namespace, network.InterfaceRequest) + if err != nil { + return err + } + + if err = pc.interfaceConfigurator.ConfigureSriovSecondaryInterface( + containerInfo.PodName, containerInfo.PodNamespace, containerInfo.ContainerID, + containerInfo.ContainerNetNS, network.InterfaceRequest, + mtu, podSriovVFDeviceID, result); err != nil { + return fmt.Errorf("SRIOV Interface creation failed: %v", err) + } + return nil +} diff --git a/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go b/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go index bd25546f7e7..296a9cedca8 100644 --- a/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go +++ b/pkg/agent/secondarynetwork/podwatch/testing/mock_podwatch.go @@ -1,4 +1,4 @@ -// Copyright 2022 Antrea Authors +// Copyright 2023 Antrea Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -61,3 +61,32 @@ func (mr *MockInterfaceConfiguratorMockRecorder) ConfigureSriovSecondaryInterfac mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureSriovSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).ConfigureSriovSecondaryInterface), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) } + +// ConfigureVLANSecondaryInterface mocks base method +func (m *MockInterfaceConfigurator) ConfigureVLANSecondaryInterface(arg0, arg1, arg2, arg3, arg4 string, arg5 int, arg6 uint16, arg7 *types100.Result) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ConfigureVLANSecondaryInterface", arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ConfigureVLANSecondaryInterface indicates an expected call of ConfigureVLANSecondaryInterface +func (mr *MockInterfaceConfiguratorMockRecorder) ConfigureVLANSecondaryInterface(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureVLANSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).ConfigureVLANSecondaryInterface), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) +} + +// DeleteVLANSecondaryInterface mocks base method +func (m *MockInterfaceConfigurator) DeleteVLANSecondaryInterface(arg0, arg1, arg2 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteVLANSecondaryInterface", arg0, arg1, arg2) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteVLANSecondaryInterface indicates an expected call of DeleteVLANSecondaryInterface +func (mr *MockInterfaceConfiguratorMockRecorder) DeleteVLANSecondaryInterface(arg0, arg1, arg2 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteVLANSecondaryInterface", reflect.TypeOf((*MockInterfaceConfigurator)(nil).DeleteVLANSecondaryInterface), arg0, arg1, arg2) +} diff --git a/pkg/agent/secondarynetwork/podwatch/types.go b/pkg/agent/secondarynetwork/podwatch/types.go index f52f12239fa..b216315d9a8 100644 --- a/pkg/agent/secondarynetwork/podwatch/types.go +++ b/pkg/agent/secondarynetwork/podwatch/types.go @@ -14,6 +14,8 @@ package podwatch +import "antrea.io/antrea/pkg/agent/secondarynetwork/cnipodcache" + type RouteInfo struct { Dst string `json:"dst,omitempty"` } @@ -28,7 +30,8 @@ type IPAMConfig struct { } const ( - sriovNetworkType = "sriov" + sriovNetworkType cnipodcache.NetworkType = "sriov" + vlanNetworkType cnipodcache.NetworkType = "vlan" ) type SecondaryNetworkConfig struct { @@ -37,6 +40,9 @@ type SecondaryNetworkConfig struct { // Set type to "antrea" Type string `json:"type,omitempty"` // Set networkType to "sriov" - NetworkType string `json:"networkType,omitempty"` - IPAM IPAMConfig `json:"ipam,omitempty"` + NetworkType cnipodcache.NetworkType `json:"networkType,omitempty"` + + MTU int32 `json:"mtu,omitempty"` + VLAN int32 `json:"vlan,omitempty"` + IPAM IPAMConfig `json:"ipam,omitempty"` } diff --git a/pkg/agent/util/net.go b/pkg/agent/util/net.go index 7e08780c104..4e684b3b305 100644 --- a/pkg/agent/util/net.go +++ b/pkg/agent/util/net.go @@ -90,9 +90,10 @@ func GenerateNodeTunnelInterfaceKey(nodeName string) string { } // GenerateContainerInterfaceName generates a unique interface name using the -// Pod's namespace, name and containerID. The output should be deterministic (so that -// multiple calls to GenerateContainerInterfaceName with the same parameters -// return the same value). The output has the length of interfaceNameLength(15). +// Pod's Namespace, name and container ID. The output should be deterministic +// (so that multiple calls to GenerateContainerInterfaceName with the same +// parameters return the same value). The output has the length of +// interfaceNameLength(15). // The probability of collision should be neglectable. func GenerateContainerInterfaceName(podName, podNamespace, containerID string) string { // Use the podName as the prefix and the containerID as the hashing key. @@ -100,6 +101,20 @@ func GenerateContainerInterfaceName(podName, podNamespace, containerID string) s return generateInterfaceName(containerID, podName, true) } +// GenerateContainerHostVethName generates a unique interface name using the +// Pod's Name, container ID, and the container veth interface name. The output +// should be deterministic. +func GenerateContainerHostVethName(podName, podNamespace, containerID, containerVeth string) string { + var key string + if containerVeth == "eth0" { + key = containerID + } else { + // Secondary interface. + key = containerID + containerVeth + } + return generateInterfaceName(key, podName, true) +} + // GenerateNodeTunnelInterfaceName generates a unique interface name for the // tunnel to the Node, using the Node's name. func GenerateNodeTunnelInterfaceName(nodeName string) string { diff --git a/pkg/agent/util/net_test.go b/pkg/agent/util/net_test.go index 42bc4187e01..ea952c3ac42 100644 --- a/pkg/agent/util/net_test.go +++ b/pkg/agent/util/net_test.go @@ -22,6 +22,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/sets" "antrea.io/antrea/pkg/util/ip" @@ -70,6 +71,97 @@ func TestGenerateContainerInterfaceName(t *testing.T) { } } +func TestGenerateContainerHostVethName(t *testing.T) { + podName0 := "pod0" + podNamespace0 := "ns0" + containerID0 := "container0" + eth0 := "eth0" + ifaceName0 := GenerateContainerHostVethName(podName0, podNamespace0, containerID0, eth0) + require.LessOrEqual(t, len(ifaceName0), interfaceNameLength) + require.True(t, strings.HasPrefix(ifaceName0, podName0+"-")) + + tests := []struct { + name string + podName string + podNS string + containerID string + innerName string + namePrefix string + equalToIface0 bool + }{ + { + name: "should equal iface0", + podName: podName0, + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: podName0 + "-", + equalToIface0: true, + }, + { + name: "eth1", + podName: podName0, + podNS: podNamespace0, + containerID: containerID0, + innerName: "eth1", + namePrefix: podName0 + "-", + }, + { + name: "pod1", + podName: "pod1", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod1-", + }, + { + name: "pod0 and different container ID", + podName: podName0, + podNS: podNamespace0, + containerID: "container1", + innerName: eth0, + namePrefix: podName0 + "-", + }, + { + name: "pod0 of ns1", + podName: podName0, + podNS: "ns1", + containerID: containerID0, + innerName: "eth0", + namePrefix: podName0 + "-", + equalToIface0: true, + }, + { + name: "8-char Pod name", + podName: "pod12345", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod12345" + "-", + }, + { + name: "6-char Pod name", + podName: "pod123456", + podNS: podNamespace0, + containerID: containerID0, + innerName: eth0, + namePrefix: "pod12345" + "-", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ifaceName := GenerateContainerHostVethName(tc.podName, tc.podNS, tc.containerID, tc.innerName) + assert.True(t, len(ifaceName) <= interfaceNameLength) + assert.True(t, strings.HasPrefix(ifaceName, tc.namePrefix)) + if tc.equalToIface0 { + assert.Equal(t, ifaceName, ifaceName0) + } else { + assert.NotEqual(t, ifaceName, ifaceName0) + } + }) + } +} + func TestGetIPNetDeviceFromIP(t *testing.T) { testNetInterfaces := generateNetInterfaces() tests := []struct {