Skip to content

Commit 5c01db3

Browse files
committed
Fix: removing secondary interface in Asyc delete for SWiftV2 stateless CNI
1 parent 72f22e8 commit 5c01db3

File tree

6 files changed

+106
-5
lines changed

6 files changed

+106
-5
lines changed

cni/network/network.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,13 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10781078
case errors.Is(err, network.ErrConnectionFailure):
10791079
logger.Error("Failed to connect to CNS", zap.Error(err))
10801080
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
1081+
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
1082+
// we asynchronously remove the secondary (delegated) interface from the pod’s network namespace.
1083+
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
1084+
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
1085+
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
1086+
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
1087+
plugin.nm.RemoveSecondaryEndpointFromPodNetNS(args.IfName, args.Netns)
10811088
case errors.Is(err, network.ErrEndpointStateNotFound):
10821089
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
10831090
return nil

cns/restserver/restserver.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ type IPInfo struct {
132132
HnsNetworkID string `json:",omitempty"`
133133
HostVethName string `json:",omitempty"`
134134
MacAddress string `json:",omitempty"`
135-
NICType cns.NICType `json:",omitempty"`
135+
NICType cns.NICType
136136
}
137137

138138
type GetHTTPServiceDataResponse struct {

network/endpoint_linux.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,3 +547,12 @@ func getDefaultGateway(routes []RouteInfo) net.IP {
547547
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
548548
return epInfo, nil
549549
}
550+
551+
// removeSecondaryEndpointFromPodNetNSImpl deletes an existing secondary endpoint from the pod network namespace.
552+
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(nl netlink.NetlinkInterface, nsc NamespaceClientInterface) error {
553+
secondaryepClient := NewSecondaryEndpointClient(nl, nil, nil, nsc, nil, ep)
554+
if err := secondaryepClient.RemoveInterfacesfromNetnsPath(ep.IfName, ep.NetworkNameSpace); err != nil {
555+
return err
556+
}
557+
return nil
558+
}

network/endpoint_windows.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,3 +746,8 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
746746
logger.Info("Retrieved device problem code", zap.String("code", devpkeyDeviceProblemCode))
747747
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
748748
}
749+
750+
// removeSecondaryEndpointFromPodNetNSImpl removes an existing secondary endpoint from the pod network namespace.
751+
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(_ netlink.NetlinkInterface, _ NamespaceClientInterface) error {
752+
return nil
753+
}

network/manager.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ type NetworkManager interface {
122122
DeleteState(epInfos []*EndpointInfo) error
123123
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
124124
GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error)
125+
RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error
125126
}
126127

127128
// Creates a new network manager.
@@ -860,3 +861,16 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {
860861

861862
return ifNametoIPInfoMap
862863
}
864+
865+
// RemoveSecondaryEndpointFromPodNetNS removes the secondary endpoint from the pod netns
866+
func (nm *networkManager) RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error {
867+
ep := &endpoint{
868+
NetworkNameSpace: netns,
869+
IfName: ifName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
870+
}
871+
872+
logger.Info("Removing Secondary Endpoint from", zap.String("NetworkNameSpace: ", netns))
873+
874+
err := ep.removeSecondaryEndpointFromPodNetNSImpl(nm.netlink, nm.nsClient)
875+
return err
876+
}

network/secondary_endpoint_client_linux.go

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package network
22

33
import (
44
"context"
5+
"fmt"
56
"os"
67
"strings"
78
"time"
@@ -13,6 +14,7 @@ import (
1314
"github.com/Azure/azure-container-networking/network/networkutils"
1415
"github.com/Azure/azure-container-networking/platform"
1516
"github.com/pkg/errors"
17+
vishnetlink "github.com/vishvananda/netlink"
1618
"go.uber.org/zap"
1719
"k8s.io/kubernetes/pkg/kubelet"
1820
)
@@ -190,15 +192,15 @@ func (client *SecondaryEndpointClient) DeleteEndpoints(ep *endpoint) error {
190192
}
191193
}()
192194
// For stateless cni linux, check if delegated vmnic type, and if so, delete using this *endpoint* struct's ifname
193-
if ep.NICType == cns.DelegatedVMNIC {
194-
if err := client.moveInterfaceToHostNetns(ep.IfName, vmns); err != nil {
195+
if ep.NICType == cns.NodeNetworkInterfaceFrontendNIC {
196+
if err := client.MoveInterfaceToHostNetns(ep.IfName, vmns); err != nil {
195197
logger.Error("Failed to move interface", zap.String("IfName", ep.IfName), zap.Error(newErrorSecondaryEndpointClient(err)))
196198
}
197199
}
198200
// For Statefull cni linux, Use SecondaryInterfaces map to move all interfaces to host netns
199201
// TODO: SecondaryInterfaces map should be retired and only IfName field and NICType should be used to determine the delgated NIC
200202
for iface := range ep.SecondaryInterfaces {
201-
if err := client.moveInterfaceToHostNetns(iface, vmns); err != nil {
203+
if err := client.MoveInterfaceToHostNetns(iface, vmns); err != nil {
202204
logger.Error("Failed to move interface", zap.String("IfName", iface), zap.Error(newErrorSecondaryEndpointClient(err)))
203205
continue
204206
}
@@ -209,10 +211,74 @@ func (client *SecondaryEndpointClient) DeleteEndpoints(ep *endpoint) error {
209211
}
210212

211213
// moveInterfaceToHostNetns moves the given interface to the host netns.
212-
func (client *SecondaryEndpointClient) moveInterfaceToHostNetns(ifName string, vmns int) error {
214+
func (client *SecondaryEndpointClient) MoveInterfaceToHostNetns(ifName string, vmns int) error {
213215
logger.Info("Moving interface to host netns", zap.String("IfName", ifName))
214216
if err := client.netlink.SetLinkNetNs(ifName, uintptr(vmns)); err != nil {
215217
return newErrorSecondaryEndpointClient(err)
216218
}
217219
return nil
218220
}
221+
222+
// removeInterfacesfromNetnsPath finds and removes all interfaces from the specified netns path except the infra and non-eth interfaces.
223+
func (client *SecondaryEndpointClient) RemoveInterfacesfromNetnsPath(infraInterfaceName string, netnspath string) error {
224+
// Get VM namespace
225+
vmns, err := netns.New().Get()
226+
if err != nil {
227+
return newErrorSecondaryEndpointClient(err)
228+
}
229+
230+
// Open the network namespace.
231+
logger.Info("Opening netns", zap.Any("NetNsPath", netnspath))
232+
ns, err := client.nsClient.OpenNamespace(netnspath)
233+
if err != nil {
234+
return newErrorSecondaryEndpointClient(err)
235+
}
236+
defer ns.Close()
237+
238+
// Enter the container network namespace.
239+
logger.Info("Entering netns", zap.Any("NetNsPath", netnspath))
240+
if err := ns.Enter(); err != nil {
241+
if errors.Is(err, os.ErrNotExist) {
242+
return nil
243+
}
244+
245+
return newErrorSecondaryEndpointClient(err)
246+
}
247+
248+
// Return to host network namespace.
249+
defer func() {
250+
logger.Info("Exiting netns", zap.Any("NetNsPath", netnspath))
251+
if err := ns.Exit(); err != nil {
252+
logger.Error("Failed to exit netns with", zap.Error(newErrorSecondaryEndpointClient(err)))
253+
}
254+
}()
255+
// For stateless cni linux, check if delegated vmnic type, and if so, delete using this *endpoint* struct's ifname
256+
ifnames, err := listIfNamesInNetns(ns)
257+
for _, iface := range ifnames {
258+
// skip the infra interface as well as non-eth interfaces
259+
if iface == infraInterfaceName || !strings.Contains(iface, "eth") {
260+
continue
261+
}
262+
if err := client.MoveInterfaceToHostNetns(iface, vmns); err != nil {
263+
logger.Error("Failed to move interface", zap.String("IfName", iface), zap.Error(newErrorSecondaryEndpointClient(err)))
264+
}
265+
}
266+
267+
return nil
268+
}
269+
270+
// ListIfNamesInNetns returns all interface names in the given netns path.
271+
func listIfNamesInNetns(ns NamespaceInterface) ([]string, error) {
272+
// Use the existing netlink interface to list links
273+
links, err := vishnetlink.LinkList()
274+
if err != nil {
275+
return nil, fmt.Errorf("failed to list links: %w", err)
276+
}
277+
278+
names := make([]string, 0, len(links))
279+
for _, l := range links {
280+
names = append(names, l.Attrs().Name)
281+
}
282+
logger.Info("Found interfaces in netns that needs to be moved back to host", zap.Any("interfaces", names))
283+
return names, nil
284+
}

0 commit comments

Comments
 (0)