-
Notifications
You must be signed in to change notification settings - Fork 260
fix: fixing Stateless CNI delete in SwiftV2 scenario #3967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,6 @@ import ( | |
| "github.com/Azure/azure-container-networking/cni/util" | ||
| "github.com/Azure/azure-container-networking/cns" | ||
| cnscli "github.com/Azure/azure-container-networking/cns/client" | ||
| "github.com/Azure/azure-container-networking/cns/fsnotify" | ||
| "github.com/Azure/azure-container-networking/common" | ||
| "github.com/Azure/azure-container-networking/dhcp" | ||
| "github.com/Azure/azure-container-networking/iptables" | ||
|
|
@@ -719,7 +718,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn | |
| *opt.infraSeen = true | ||
| } else { | ||
| ifName = "eth" + strconv.Itoa(opt.endpointIndex) | ||
| endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName) | ||
| endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType) | ||
| } | ||
|
|
||
| endpointInfo := network.EndpointInfo{ | ||
|
|
@@ -1046,13 +1045,15 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| networkID, err = plugin.getNetworkID(args.Netns, nil, nwCfg) | ||
| if nwInfo, err = plugin.nm.GetNetworkInfo(networkID); err != nil { | ||
| if !nwCfg.MultiTenancy { | ||
| logger.Error("Failed to query network", | ||
| zap.String("network", networkID), | ||
| zap.Error(err)) | ||
| // Log the error if the network is not found. | ||
| // if cni hits this, mostly state file would be missing and it can be reboot scenario where | ||
| // container runtime tries to delete and create pods which existed before reboot. | ||
| // this condition will not apply to stateless CNI since the network struct will be crated on each call | ||
| // this error will not apply to stateless CNI since the network struct will be crated on Delete calls | ||
| if !plugin.nm.IsStatelessCNIMode() { | ||
| logger.Info("Failed to query network", | ||
| zap.String("network", networkID), | ||
| zap.Error(err)) | ||
| } | ||
| err = nil | ||
| } | ||
| } | ||
|
|
@@ -1071,37 +1072,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| } | ||
| logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID)) | ||
|
|
||
| var epInfos []*network.EndpointInfo | ||
| if plugin.nm.IsStatelessCNIMode() { | ||
| // network ID is passed in and used only for migration | ||
| // otherwise, in stateless, we don't need the network id for deletion | ||
| epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID) | ||
| // if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found | ||
| if err != nil { | ||
| // async delete should be disabled for standalone scenario | ||
| if errors.Is(err, network.ErrConnectionFailure) && !nwCfg.DisableAsyncDelete { | ||
| logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath) | ||
| logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr)) | ||
| if addErr != nil { | ||
| logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr)) | ||
| return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID)) | ||
| } | ||
| return nil | ||
| } | ||
| if errors.Is(err, network.ErrEndpointStateNotFound) { | ||
| logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return nil | ||
| } | ||
| logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) | ||
| } | ||
| } else { | ||
| epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID) | ||
| epInfos, err := plugin.nm.GetEndpointInfos(networkID, args, nwCfg.DisableAsyncDelete) | ||
| if err != nil { | ||
| return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err)) | ||
| } | ||
|
|
||
| // for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) | ||
| // this block is not applied to stateless CNI | ||
| // when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) | ||
| if len(epInfos) == 0 { | ||
| endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName) | ||
| if !nwCfg.MultiTenancy { | ||
|
|
@@ -1127,7 +1102,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil { | ||
| // An error will not be returned if the endpoint is not found | ||
| // return a retriable error so the container runtime will retry this DEL later | ||
| // the implementation of this function returns nil if the endpoint doens't exist, so | ||
| // the implementation of this function returns nil if the endpoint doesn't exist, so | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // we don't have to check that here | ||
| return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) | ||
| } | ||
|
|
@@ -1139,15 +1114,26 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| zap.String("endpointID", epInfo.EndpointID)) | ||
| telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID) | ||
|
|
||
| // Delegated/secondary nic ips are statically allocated so we don't need to release | ||
| // Call into IPAM plugin to release the endpoint's addresses. | ||
| if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") { | ||
| // Delegated/secondary nic ips are statically allocated so we don't need to release | ||
| // Call into IPAM plugin to release the endpoint's addresses. | ||
| for i := range epInfo.IPAddresses { | ||
| logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String())) | ||
| telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID)) | ||
| err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options) | ||
| if err != nil { | ||
| return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err)) | ||
| // This is an special case for stateless CNI when Asynchronous DEL to CNS will take place for SwiftV2 after the endpointinfo is recreated locally | ||
| // At this point the endpoint is already deleted and since it is created locally the IPAddress is nil. CNS will release the IP asynchronously whenever it is up | ||
| if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() && !nwCfg.DisableAsyncDelete { | ||
| logger.Warn("Release ip Asynchronously by CNS", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why releaseip only for stateless cni and not for stateful cni case? current code calling release for both cases
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we generate the ednpoint locally for SwiftV2 stateless when CNS is not up, then we will get to this block at the end to release the IP. The datapath already has been cleand up and we just do a Asynch delete via IPAMInvoker.Delete. |
||
| zap.String("containerID", args.ContainerID)) | ||
| telemetryClient.SendEvent(fmt.Sprintf("Release ip for container id: %s asynchronously", args.ContainerID)) | ||
| if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil { | ||
| return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err)) | ||
| } | ||
| } else { | ||
| for i := range epInfo.IPAddresses { | ||
| logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String())) | ||
| telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID)) | ||
QxBytes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options) | ||
| if err != nil { | ||
| return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err)) | ||
| } | ||
| } | ||
| } | ||
| } else if epInfo.EnableInfraVnet { // remove in future PR | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ import ( | |
| "github.com/Azure/azure-container-networking/netlink" | ||
| "github.com/Azure/azure-container-networking/platform" | ||
| "github.com/Azure/azure-container-networking/store" | ||
| cniSkel "github.com/containernetworking/cni/pkg/skel" | ||
| "github.com/pkg/errors" | ||
| "go.uber.org/zap" | ||
| ) | ||
|
|
@@ -116,11 +117,11 @@ type NetworkManager interface { | |
| UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error | ||
| GetNumberOfEndpoints(ifName string, networkID string) int | ||
| GetEndpointID(containerID, ifName string) string | ||
| GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string | ||
QxBytes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| IsStatelessCNIMode() bool | ||
| SaveState(eps []*endpoint) error | ||
| DeleteState(epInfos []*EndpointInfo) error | ||
| GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo | ||
| GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) | ||
| GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error) | ||
| } | ||
|
|
||
| // Creates a new network manager. | ||
|
|
@@ -456,7 +457,8 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string | |
| // GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo | ||
| // TODO unit tests need to be added, WorkItem: 26606939 | ||
| // In stateless cni, container id is the endpoint id, so you can pass in either | ||
| func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) { | ||
| // netns is used to populate the NetNsPath field in the returned EndpointInfo structs for SWiftV2 Linux mode to remove the secondary interface | ||
| func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) { | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID) | ||
| if err != nil { | ||
| if endpointResponse.Response.ReturnCode == types.NotFound { | ||
|
|
@@ -467,7 +469,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En | |
| } | ||
| return nil, ErrGetEndpointStateFailure | ||
| } | ||
| epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID) | ||
| epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns) | ||
|
|
||
| for i := 0; i < len(epInfos); i++ { | ||
| if epInfos[i].NICType == cns.InfraNIC { | ||
|
|
@@ -515,7 +517,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp | |
| nw := &network{ | ||
| Id: networkID, // currently unused in stateless cni | ||
| HnsId: epInfo.HNSNetworkID, | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Mode: opModeTransparentVlan, | ||
| Mode: opModeTransparent, | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| SnatBridgeIP: "", | ||
| NetNs: dummyGUID, // to trigger hns v2, windows | ||
| extIf: &externalInterface{ | ||
|
|
@@ -530,6 +532,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp | |
| HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network) | ||
| HostIfName: epInfo.HostIfName, | ||
| LocalIP: "", | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| IPAddresses: epInfo.IPAddresses, | ||
| VlanID: 0, | ||
| AllowInboundFromHostToNC: false, // stateless currently does not support apipa | ||
| AllowInboundFromNCToHost: false, | ||
|
|
@@ -538,11 +541,12 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp | |
| NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false | ||
| NetNs: dummyGUID, // to trigger hnsv2, windows | ||
| NICType: epInfo.NICType, | ||
| NetworkNameSpace: epInfo.NetNsPath, | ||
| IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId)) | ||
|
|
||
| err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep) | ||
| err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -563,7 +567,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi | |
|
|
||
| if nm.IsStatelessCNIMode() { | ||
| logger.Info("calling cns getEndpoint API") | ||
| epInfos, err := nm.GetEndpointState(networkID, endpointID) | ||
| epInfos, err := nm.GetEndpointState(networkID, endpointID, "") | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -746,6 +750,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string { | |
| return containerID + "-" + ifName | ||
| } | ||
|
|
||
| // GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type. | ||
| func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string { | ||
| // For stateless CNI, secondary NICs use containerID-ifName as endpointID. | ||
| if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for stateful cni, this is not an issue? what's the impact if we remove statelesscnimode check here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to make sure we only add - ifname for stateless. Stateful already has this. |
||
| return containerID + "-" + ifName | ||
| } | ||
| // For InfraNIC, use GetEndpointID() logic. | ||
| return nm.GetEndpointID(containerID, ifName) | ||
| } | ||
|
|
||
| // saves the map of network ids to endpoints to the state file | ||
| func (nm *networkManager) SaveState(eps []*endpoint) error { | ||
| nm.Lock() | ||
|
|
@@ -797,7 +811,7 @@ func (nm *networkManager) DeleteState(epInfos []*EndpointInfo) error { | |
| } | ||
|
|
||
| // called to convert a cns restserver EndpointInfo into a network EndpointInfo | ||
| func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo { | ||
| func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo { | ||
| ret := []*EndpointInfo{} | ||
|
|
||
| for ifName, ipInfo := range endpointInfo.IfnameToIPMap { | ||
|
|
@@ -827,13 +841,15 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI | |
| epInfo.HNSNetworkID = ipInfo.HnsNetworkID | ||
| epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress) | ||
| epInfo.NetworkContainerID = ipInfo.NetworkContainerID | ||
|
|
||
| if epInfo.NetNsPath == "" { | ||
| epInfo.NetNsPath = netns | ||
QxBytes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| ret = append(ret, epInfo) | ||
| } | ||
| return ret | ||
| } | ||
|
|
||
| // gets all endpoint infos associated with a container id and populates the network id field | ||
| // gets all endpoint infos associated with a container id and populates the network id field in Statefull CNI mode | ||
| // nictype may be empty in which case it is likely of type "infra" | ||
| func (nm *networkManager) GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo { | ||
| ret := []*EndpointInfo{} | ||
|
|
@@ -867,3 +883,45 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo { | |
|
|
||
| return ifNametoIPInfoMap | ||
| } | ||
|
|
||
| // GetEndpointInfos gets all endpoint infos associated with a container id and networkID | ||
| // In stateless CNI mode, it calls CNS GetEndpointState API to get the endpoint infos or genreate them locally if CNS is unreachable in SwiftV2 mode and AsyncDelete is enabled | ||
| // In stateful CNI mode, it fetches the endpoint infos by calling GetEndpointInfosFromContainerID | ||
| func (nm *networkManager) GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error) { | ||
QxBytes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if nm.IsStatelessCNIMode() { | ||
| logger.Info("Calling cns getEndpoint API") | ||
| epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns) | ||
| emptyEpInfos := []*EndpointInfo{} | ||
| if err != nil { | ||
| switch { | ||
| // async delete should be disabled for standalone scenarios but will be enabled for AKS scenarios | ||
| case errors.Is(err, ErrConnectionFailure) && !disableAsyncDelete: | ||
| logger.Info("Failed to connect to CNS, endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID)) | ||
| // In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS, | ||
| // we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state. | ||
| // This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations. | ||
| // When that happens, kubelet and containerd hang during sandbox creation or teardown. | ||
| // The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace, | ||
| // triggering hot-unplug/re-register events and leaving the node in an unhealthy state. | ||
| // This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API. | ||
| epInfos, err = nm.getEndpointInfoByIfNameImpl(args.ContainerID, args.Netns, args.IfName) | ||
| if err != nil { | ||
| logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err)) | ||
QxBytes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return emptyEpInfos, errors.Wrap(err, "failed to fetch secondary interfaces") | ||
| } | ||
| case errors.Is(err, ErrEndpointStateNotFound): | ||
| logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return emptyEpInfos, nil | ||
| default: | ||
| logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return emptyEpInfos, ErrEndpointRetrievalFailure | ||
| } | ||
| } | ||
| for _, epInfo := range epInfos { | ||
| logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType)) | ||
| } | ||
| return epInfos, nil | ||
| } | ||
| // Stateful CNI mode | ||
| return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.