Skip to content

Commit f9af850

Browse files
committed
fix: generating endpoint locally when CNS is not reachabe.
1 parent 9600774 commit f9af850

File tree

8 files changed

+414
-204
lines changed

8 files changed

+414
-204
lines changed

cni/network/network.go

Lines changed: 29 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,13 +1045,15 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10451045
networkID, err = plugin.getNetworkID(args.Netns, nil, nwCfg)
10461046
if nwInfo, err = plugin.nm.GetNetworkInfo(networkID); err != nil {
10471047
if !nwCfg.MultiTenancy {
1048-
logger.Error("Failed to query network",
1049-
zap.String("network", networkID),
1050-
zap.Error(err))
10511048
// Log the error if the network is not found.
10521049
// if cni hits this, mostly state file would be missing and it can be reboot scenario where
10531050
// container runtime tries to delete and create pods which existed before reboot.
1054-
// this condition will not apply to stateless CNI since the network struct will be crated on each call
1051+
// this error will not apply to stateless CNI since the network struct will be crated on Delete calls
1052+
if !plugin.nm.IsStatelessCNIMode() {
1053+
logger.Info("Failed to query network",
1054+
zap.String("network", networkID),
1055+
zap.Error(err))
1056+
}
10551057
err = nil
10561058
}
10571059
}
@@ -1070,49 +1072,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10701072
}
10711073
logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID))
10721074

1073-
var epInfos []*network.EndpointInfo
1074-
if plugin.nm.IsStatelessCNIMode() {
1075-
// network ID is passed in and used only for migration
1076-
// otherwise, in stateless, we don't need the network id for deletion
1077-
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
1078-
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found or CNS connection failure
1079-
// return a retriable error so the container runtime will retry this DEL later
1080-
// the implementation of this function returns nil if the endpoint doesn't exist, so
1081-
// we don't have to check that here
1082-
if err != nil {
1083-
switch {
1084-
case errors.Is(err, network.ErrConnectionFailure):
1085-
logger.Error("Failed to connect to CNS", zap.Error(err))
1086-
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
1087-
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
1088-
// we asynchronously remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
1089-
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
1090-
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
1091-
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
1092-
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
1093-
// This workaround mitigates the issue by removing the secondary NIC from the pod netns when CNS is unreachable during DEL to provide the endpoint state.
1094-
if err = plugin.nm.RemoveSecondaryEndpointFromPodNetNS(args.IfName, args.Netns); err != nil {
1095-
logger.Error("Failed to remove secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
1096-
return plugin.RetriableError(fmt.Errorf("failed to remove secondary endpoint from pod netns: %w", err))
1097-
}
1098-
case errors.Is(err, network.ErrEndpointStateNotFound):
1099-
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
1100-
return nil
1101-
default:
1102-
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1103-
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
1104-
}
1105-
} else {
1106-
for _, epInfo := range epInfos {
1107-
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
1108-
}
1109-
}
1110-
} else {
1111-
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
1075+
epInfos, err := plugin.nm.GetEndpointInfos(networkID, args, nwCfg.DisableAsyncDelete)
1076+
if err != nil {
1077+
return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err))
11121078
}
1113-
1114-
// for Stateful CNI when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1115-
// this block is applied to stateless CNI only if there was a connection failure in previous block and asynchronous delete by CNS will remover the endpoint from state file
1079+
// when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
11161080
if len(epInfos) == 0 {
11171081
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
11181082
if !nwCfg.MultiTenancy {
@@ -1150,15 +1114,26 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
11501114
zap.String("endpointID", epInfo.EndpointID))
11511115
telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID)
11521116

1117+
// Delegated/secondary nic ips are statically allocated so we don't need to release
1118+
// Call into IPAM plugin to release the endpoint's addresses.
11531119
if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") {
1154-
// Delegated/secondary nic ips are statically allocated so we don't need to release
1155-
// Call into IPAM plugin to release the endpoint's addresses.
1156-
for i := range epInfo.IPAddresses {
1157-
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
1158-
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
1159-
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
1160-
if err != nil {
1161-
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
1120+
// This is an special case for stateless CNI when Asynchronous DEL to CNS will take place for SwiftV2 after the endpointinfo is recreated locally
1121+
// At this point the endpoint is already deleted and since it is created locally the IPAddress is nil. CNS will release the IP asynchronously whenever it is up
1122+
if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() && !nwCfg.DisableAsyncDelete {
1123+
logger.Warn("Release ip Asynchronously by CNS",
1124+
zap.String("containerID", args.ContainerID))
1125+
telemetryClient.SendEvent(fmt.Sprintf("Release ip for container id: %s asynchronously", args.ContainerID))
1126+
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
1127+
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
1128+
}
1129+
} else {
1130+
for i := range epInfo.IPAddresses {
1131+
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
1132+
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
1133+
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
1134+
if err != nil {
1135+
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
1136+
}
11621137
}
11631138
}
11641139
} else if epInfo.EnableInfraVnet { // remove in future PR

network/endpoint_linux.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/Azure/azure-container-networking/network/networkutils"
1717
"github.com/Azure/azure-container-networking/ovsctl"
1818
"github.com/Azure/azure-container-networking/platform"
19+
"github.com/pkg/errors"
1920
"go.uber.org/zap"
2021
)
2122

@@ -548,11 +549,29 @@ func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*E
548549
return epInfo, nil
549550
}
550551

551-
// removeSecondaryEndpointFromPodNetNSImpl deletes an existing secondary endpoint from the pod network namespace.
552-
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(nsc NamespaceClientInterface) error {
553-
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nsc, nil, ep)
554-
if err := secondaryepClient.RemoveInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace); err != nil {
555-
return err
552+
// getEndpointInfoByIfNameImpl returns an array of EndpointInfo for the given endpoint based on the IfName(s) found in the network namespace.
553+
func (nm *networkManager) getEndpointInfoByIfNameImpl(epID, netns, infraNicName string) ([]*EndpointInfo, error) {
554+
epInfo := &EndpointInfo{
555+
EndpointID: epID,
556+
NetNsPath: netns,
557+
NICType: cns.InfraNIC,
558+
IfName: infraNicName,
556559
}
557-
return nil
560+
ret := []*EndpointInfo{}
561+
ret = append(ret, epInfo)
562+
logger.Info("Fetching Secondary Endpoint from", zap.String("NetworkNameSpace", netns))
563+
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nm.nsClient, nil, nil)
564+
ifnames, err := secondaryepClient.FetchInterfacesFromNetnsPath(infraNicName, netns)
565+
if err != nil {
566+
return nil, errors.Wrap(err, "failed to fetch secondary interfaces")
567+
}
568+
// appending all secondary interfaces found in the netns to the return slice
569+
for _, ifName := range ifnames {
570+
ret = append(ret, &EndpointInfo{
571+
NetNsPath: netns,
572+
IfName: ifName,
573+
NICType: cns.NodeNetworkInterfaceFrontendNIC,
574+
})
575+
}
576+
return ret, nil
558577
}

network/endpoint_windows.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,6 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
771771
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
772772
}
773773

774-
// removeSecondaryEndpointFromPodNetNSImpl removes an existing secondary endpoint from the pod network namespace.
775-
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(_ NamespaceClientInterface) error {
776-
return nil
774+
func (nm *networkManager) getEndpointInfoByIfNameImpl(_, _, _ string) ([]*EndpointInfo, error) {
775+
return nil, nil
777776
}

network/errors.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ package network
33
import "errors"
44

55
var (
6-
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
7-
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
8-
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
9-
ErrConnectionFailure = errors.New("couldn't connect to CNS")
10-
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
6+
errSubnetV6NotFound = errors.New("couldn't find ipv6 subnet in network info") // nolint
7+
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
8+
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
9+
ErrConnectionFailure = errors.New("couldn't connect to CNS")
10+
ErrEndpointRemovalFailure = errors.New("failed to remove endpoint")
11+
ErrEndpointRetrievalFailure = errors.New("failed to obtain endpoint")
12+
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
1113
)

network/manager.go

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/Azure/azure-container-networking/netlink"
2020
"github.com/Azure/azure-container-networking/platform"
2121
"github.com/Azure/azure-container-networking/store"
22+
cniSkel "github.com/containernetworking/cni/pkg/skel"
2223
"github.com/pkg/errors"
2324
"go.uber.org/zap"
2425
)
@@ -120,9 +121,9 @@ type NetworkManager interface {
120121
IsStatelessCNIMode() bool
121122
SaveState(eps []*endpoint) error
122123
DeleteState(epInfos []*EndpointInfo) error
124+
GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error)
123125
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
124126
GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error)
125-
RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error
126127
}
127128

128129
// Creates a new network manager.
@@ -458,6 +459,7 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string
458459
// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
459460
// TODO unit tests need to be added, WorkItem: 26606939
460461
// In stateless cni, container id is the endpoint id, so you can pass in either
462+
// netns is used to populate the NetNsPath field in the returned EndpointInfo structs for SWiftV2 Linux mode to remove the secondary interface
461463
func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) {
462464
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID)
463465
if err != nil {
@@ -841,7 +843,9 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI
841843
epInfo.HNSNetworkID = ipInfo.HnsNetworkID
842844
epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress)
843845
epInfo.NetworkContainerID = ipInfo.NetworkContainerID
844-
846+
if epInfo.NetNsPath == "" {
847+
epInfo.NetNsPath = netns
848+
}
845849
ret = append(ret, epInfo)
846850
}
847851
return ret
@@ -882,13 +886,44 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {
882886
return ifNametoIPInfoMap
883887
}
884888

885-
// RemoveSecondaryEndpointFromPodNetNS removes the secondary endpoint from the pod netns
886-
func (nm *networkManager) RemoveSecondaryEndpointFromPodNetNS(ifName, netns string) error {
887-
ep := &endpoint{
888-
NetworkNameSpace: netns,
889-
IfName: ifName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
889+
// GetEndpointInfos gets all endpoint infos associated with a container id and networkID
890+
// In stateless CNI mode, it calls CNS GetEndpointState API to get the endpoint infos or genreate them locally if CNS is unreachable in SwiftV2 mode and AsyncDelete is enabled
891+
// In stateful CNI mode, it fetches the endpoint infos by calling GetEndpointInfosFromContainerID
892+
func (nm *networkManager) GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error) {
893+
if nm.IsStatelessCNIMode() {
894+
logger.Info("Calling cns getEndpoint API")
895+
epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
896+
emptyEpInfos := []*EndpointInfo{}
897+
if err != nil {
898+
switch {
899+
// async delete should be disabled for standalone scenarios but will be enabled for AKS scenarios
900+
case errors.Is(err, ErrConnectionFailure) && !disableAsyncDelete:
901+
logger.Info("Failed to connect to CNS, endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
902+
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
903+
// we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
904+
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
905+
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
906+
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
907+
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
908+
// This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API.
909+
epInfos, err = nm.getEndpointInfoByIfNameImpl(args.ContainerID, args.Netns, args.IfName)
910+
if err != nil {
911+
logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
912+
return emptyEpInfos, errors.Wrap(err, "failed to fetch secondary interfaces")
913+
}
914+
case errors.Is(err, ErrEndpointStateNotFound):
915+
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
916+
return emptyEpInfos, nil
917+
default:
918+
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
919+
return emptyEpInfos, ErrEndpointRetrievalFailure
920+
}
921+
}
922+
for _, epInfo := range epInfos {
923+
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
924+
}
925+
return epInfos, nil
890926
}
891-
logger.Info("Removing Secondary Endpoint from", zap.String("NetworkNameSpace: ", netns))
892-
err := ep.removeSecondaryEndpointFromPodNetNSImpl(nm.nsClient)
893-
return err
927+
// Stateful CNI mode
928+
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
894929
}

network/manager_mock.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package network
33
import (
44
"github.com/Azure/azure-container-networking/cns"
55
"github.com/Azure/azure-container-networking/common"
6+
cniSkel "github.com/containernetworking/cni/pkg/skel"
67
)
78

89
// MockNetworkManager is a mock structure for Network Manager
@@ -222,6 +223,6 @@ func (nm *MockNetworkManager) GetEndpointState(_, _, _ string) ([]*EndpointInfo,
222223
return []*EndpointInfo{}, nil
223224
}
224225

225-
func (nm *MockNetworkManager) RemoveSecondaryEndpointFromPodNetNS(_, _ string) error {
226-
return nil
226+
func (nm *MockNetworkManager) GetEndpointInfos(_ string, args *cniSkel.CmdArgs, _ bool) ([]*EndpointInfo, error) {
227+
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
227228
}

0 commit comments

Comments
 (0)