Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 31 additions & 45 deletions cni/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/Azure/azure-container-networking/cni/util"
"github.com/Azure/azure-container-networking/cns"
cnscli "github.com/Azure/azure-container-networking/cns/client"
"github.com/Azure/azure-container-networking/cns/fsnotify"
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/dhcp"
"github.com/Azure/azure-container-networking/iptables"
Expand Down Expand Up @@ -719,7 +718,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn
*opt.infraSeen = true
} else {
ifName = "eth" + strconv.Itoa(opt.endpointIndex)
endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName)
endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType)
}

endpointInfo := network.EndpointInfo{
Expand Down Expand Up @@ -1046,13 +1045,15 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
networkID, err = plugin.getNetworkID(args.Netns, nil, nwCfg)
if nwInfo, err = plugin.nm.GetNetworkInfo(networkID); err != nil {
if !nwCfg.MultiTenancy {
logger.Error("Failed to query network",
zap.String("network", networkID),
zap.Error(err))
// Log the error if the network is not found.
// if cni hits this, mostly state file would be missing and it can be reboot scenario where
// container runtime tries to delete and create pods which existed before reboot.
// this condition will not apply to stateless CNI since the network struct will be crated on each call
// this error will not apply to stateless CNI since the network struct will be crated on Delete calls
if !plugin.nm.IsStatelessCNIMode() {
logger.Info("Failed to query network",
zap.String("network", networkID),
zap.Error(err))
}
err = nil
}
}
Expand All @@ -1071,37 +1072,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
}
logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID))

var epInfos []*network.EndpointInfo
if plugin.nm.IsStatelessCNIMode() {
// network ID is passed in and used only for migration
// otherwise, in stateless, we don't need the network id for deletion
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID)
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found
if err != nil {
// async delete should be disabled for standalone scenario
if errors.Is(err, network.ErrConnectionFailure) && !nwCfg.DisableAsyncDelete {
logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err))
addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath)
logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr))
if addErr != nil {
logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr))
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID))
}
return nil
}
if errors.Is(err, network.ErrEndpointStateNotFound) {
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
return nil
}
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
}
} else {
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
epInfos, err := plugin.nm.GetEndpointInfos(networkID, args, nwCfg.DisableAsyncDelete)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err))
}

// for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
// this block is not applied to stateless CNI
// when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
if len(epInfos) == 0 {
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
if !nwCfg.MultiTenancy {
Expand All @@ -1127,7 +1102,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil {
// An error will not be returned if the endpoint is not found
// return a retriable error so the container runtime will retry this DEL later
// the implementation of this function returns nil if the endpoint doens't exist, so
// the implementation of this function returns nil if the endpoint doesn't exist, so
// we don't have to check that here
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
}
Expand All @@ -1139,15 +1114,26 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
zap.String("endpointID", epInfo.EndpointID))
telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID)

// Delegated/secondary nic ips are statically allocated so we don't need to release
// Call into IPAM plugin to release the endpoint's addresses.
if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") {
// Delegated/secondary nic ips are statically allocated so we don't need to release
// Call into IPAM plugin to release the endpoint's addresses.
for i := range epInfo.IPAddresses {
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
// This is an special case for stateless CNI when Asynchronous DEL to CNS will take place for SwiftV2 after the endpointinfo is recreated locally
// At this point the endpoint is already deleted and since it is created locally the IPAddress is nil. CNS will release the IP asynchronously whenever it is up
if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() && !nwCfg.DisableAsyncDelete {
logger.Warn("Release ip Asynchronously by CNS",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why releaseip only for stateless cni and not for stateful cni case? current code calling release for both cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we generate the ednpoint locally for SwiftV2 stateless when CNS is not up, then we will get to this block at the end to release the IP. The datapath already has been cleand up and we just do a Asynch delete via IPAMInvoker.Delete.

zap.String("containerID", args.ContainerID))
telemetryClient.SendEvent(fmt.Sprintf("Release ip for container id: %s asynchronously", args.ContainerID))
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
}
} else {
for i := range epInfo.IPAddresses {
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
}
}
}
} else if epInfo.EnableInfraVnet { // remove in future PR
Expand Down
28 changes: 28 additions & 0 deletions network/endpoint_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/Azure/azure-container-networking/network/networkutils"
"github.com/Azure/azure-container-networking/ovsctl"
"github.com/Azure/azure-container-networking/platform"
"github.com/pkg/errors"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -547,3 +548,30 @@ func getDefaultGateway(routes []RouteInfo) net.IP {
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
return epInfo, nil
}

// getEndpointInfoByIfNameImpl returns an array of EndpointInfo for the given endpoint based on the IfName(s) found in the network namespace.
func (nm *networkManager) getEndpointInfoByIfNameImpl(epID, netns, infraNicName string) ([]*EndpointInfo, error) {
epInfo := &EndpointInfo{
EndpointID: epID,
NetNsPath: netns,
NICType: cns.InfraNIC,
IfName: infraNicName,
}
ret := []*EndpointInfo{}
ret = append(ret, epInfo)
logger.Info("Fetching Secondary Endpoint from", zap.String("NetworkNameSpace", netns))
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nm.nsClient, nil, nil)
ifnames, err := secondaryepClient.FetchInterfacesFromNetnsPath(infraNicName, netns)
if err != nil {
return nil, errors.Wrap(err, "failed to fetch secondary interfaces")
}
// appending all secondary interfaces found in the netns to the return slice
for _, ifName := range ifnames {
ret = append(ret, &EndpointInfo{
NetNsPath: netns,
IfName: ifName,
NICType: cns.NodeNetworkInterfaceFrontendNIC,
})
}
return ret, nil
}
4 changes: 4 additions & 0 deletions network/endpoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -770,3 +770,7 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
logger.Info("Retrieved device problem code", zap.String("code", devpkeyDeviceProblemCode))
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
}

func (nm *networkManager) getEndpointInfoByIfNameImpl(_, _, _ string) ([]*EndpointInfo, error) {
return nil, nil
}
12 changes: 7 additions & 5 deletions network/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package network
import "errors"

var (
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
ErrConnectionFailure = errors.New("couldn't connect to CNS")
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
errSubnetV6NotFound = errors.New("couldn't find ipv6 subnet in network info") // nolint
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
ErrConnectionFailure = errors.New("couldn't connect to CNS")
ErrEndpointRemovalFailure = errors.New("failed to remove endpoint")
ErrEndpointRetrievalFailure = errors.New("failed to obtain endpoint")
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
)
78 changes: 68 additions & 10 deletions network/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/Azure/azure-container-networking/netlink"
"github.com/Azure/azure-container-networking/platform"
"github.com/Azure/azure-container-networking/store"
cniSkel "github.com/containernetworking/cni/pkg/skel"
"github.com/pkg/errors"
"go.uber.org/zap"
)
Expand Down Expand Up @@ -116,11 +117,11 @@ type NetworkManager interface {
UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error
GetNumberOfEndpoints(ifName string, networkID string) int
GetEndpointID(containerID, ifName string) string
GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string
IsStatelessCNIMode() bool
SaveState(eps []*endpoint) error
DeleteState(epInfos []*EndpointInfo) error
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error)
GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error)
}

// Creates a new network manager.
Expand Down Expand Up @@ -456,7 +457,8 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string
// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
// TODO unit tests need to be added, WorkItem: 26606939
// In stateless cni, container id is the endpoint id, so you can pass in either
func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) {
// netns is used to populate the NetNsPath field in the returned EndpointInfo structs for SWiftV2 Linux mode to remove the secondary interface
func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) {
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID)
if err != nil {
if endpointResponse.Response.ReturnCode == types.NotFound {
Expand All @@ -467,7 +469,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En
}
return nil, ErrGetEndpointStateFailure
}
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID)
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns)

for i := 0; i < len(epInfos); i++ {
if epInfos[i].NICType == cns.InfraNIC {
Expand Down Expand Up @@ -515,7 +517,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
nw := &network{
Id: networkID, // currently unused in stateless cni
HnsId: epInfo.HNSNetworkID,
Mode: opModeTransparentVlan,
Mode: opModeTransparent,
SnatBridgeIP: "",
NetNs: dummyGUID, // to trigger hns v2, windows
extIf: &externalInterface{
Expand All @@ -530,6 +532,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network)
HostIfName: epInfo.HostIfName,
LocalIP: "",
IPAddresses: epInfo.IPAddresses,
VlanID: 0,
AllowInboundFromHostToNC: false, // stateless currently does not support apipa
AllowInboundFromNCToHost: false,
Expand All @@ -538,11 +541,12 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false
NetNs: dummyGUID, // to trigger hnsv2, windows
NICType: epInfo.NICType,
NetworkNameSpace: epInfo.NetNsPath,
IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
}
logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId))

err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep)
err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep)
if err != nil {
return err
}
Expand All @@ -563,7 +567,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi

if nm.IsStatelessCNIMode() {
logger.Info("calling cns getEndpoint API")
epInfos, err := nm.GetEndpointState(networkID, endpointID)
epInfos, err := nm.GetEndpointState(networkID, endpointID, "")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -746,6 +750,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string {
return containerID + "-" + ifName
}

// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for stateful cni, this is not an issue? what's the impact if we remove statelesscnimode check here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to make sure we only add - ifname for stateless. Stateful already has this.

return containerID + "-" + ifName
}
// For InfraNIC, use GetEndpointID() logic.
return nm.GetEndpointID(containerID, ifName)
}

// saves the map of network ids to endpoints to the state file
func (nm *networkManager) SaveState(eps []*endpoint) error {
nm.Lock()
Expand Down Expand Up @@ -797,7 +811,7 @@ func (nm *networkManager) DeleteState(epInfos []*EndpointInfo) error {
}

// called to convert a cns restserver EndpointInfo into a network EndpointInfo
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo {
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo {
ret := []*EndpointInfo{}

for ifName, ipInfo := range endpointInfo.IfnameToIPMap {
Expand Down Expand Up @@ -827,13 +841,15 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI
epInfo.HNSNetworkID = ipInfo.HnsNetworkID
epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress)
epInfo.NetworkContainerID = ipInfo.NetworkContainerID

if epInfo.NetNsPath == "" {
epInfo.NetNsPath = netns
}
ret = append(ret, epInfo)
}
return ret
}

// gets all endpoint infos associated with a container id and populates the network id field
// gets all endpoint infos associated with a container id and populates the network id field in Statefull CNI mode
// nictype may be empty in which case it is likely of type "infra"
func (nm *networkManager) GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo {
ret := []*EndpointInfo{}
Expand Down Expand Up @@ -867,3 +883,45 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {

return ifNametoIPInfoMap
}

// GetEndpointInfos gets all endpoint infos associated with a container id and networkID
// In stateless CNI mode, it calls CNS GetEndpointState API to get the endpoint infos or genreate them locally if CNS is unreachable in SwiftV2 mode and AsyncDelete is enabled
// In stateful CNI mode, it fetches the endpoint infos by calling GetEndpointInfosFromContainerID
func (nm *networkManager) GetEndpointInfos(networkID string, args *cniSkel.CmdArgs, disableAsyncDelete bool) ([]*EndpointInfo, error) {
if nm.IsStatelessCNIMode() {
logger.Info("Calling cns getEndpoint API")
epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
emptyEpInfos := []*EndpointInfo{}
if err != nil {
switch {
// async delete should be disabled for standalone scenarios but will be enabled for AKS scenarios
case errors.Is(err, ErrConnectionFailure) && !disableAsyncDelete:
logger.Info("Failed to connect to CNS, endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
// we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
// This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API.
epInfos, err = nm.getEndpointInfoByIfNameImpl(args.ContainerID, args.Netns, args.IfName)
if err != nil {
logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
return emptyEpInfos, errors.Wrap(err, "failed to fetch secondary interfaces")
}
case errors.Is(err, ErrEndpointStateNotFound):
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
return emptyEpInfos, nil
default:
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
return emptyEpInfos, ErrEndpointRetrievalFailure
}
}
for _, epInfo := range epInfos {
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
}
return epInfos, nil
}
// Stateful CNI mode
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
}
Loading
Loading