Skip to content

Commit 305303f

Browse files
committed
fix: fixing Stateless CNI delete in SwiftV2 scenario
1 parent bd7e2ae commit 305303f

File tree

8 files changed

+201
-39
lines changed

8 files changed

+201
-39
lines changed

cni/network/invoker_cns.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ func setHostOptions(ncSubnetPrefix *net.IPNet, options map[string]interface{}, i
279279
}
280280

281281
// Delete calls into the releaseipconfiguration API in CNS
282-
func (invoker *CNSIPAMInvoker) Delete(address *net.IPNet, nwCfg *cni.NetworkConfig, args *cniSkel.CmdArgs, _ map[string]interface{}) error { //nolint
282+
func (invoker *CNSIPAMInvoker) Delete(address *net.IPNet, nwCfg *cni.NetworkConfig, args *cniSkel.CmdArgs, options map[string]interface{}) error { //nolint
283283
var connectionErr *cnscli.ConnectionFailureErr
284284
// Parse Pod arguments.
285285
podInfo := cns.KubernetesPodInfo{
@@ -301,6 +301,11 @@ func (invoker *CNSIPAMInvoker) Delete(address *net.IPNet, nwCfg *cni.NetworkConf
301301
PodInterfaceID: GetEndpointID(args),
302302
InfraContainerID: args.ContainerID,
303303
}
304+
if options != nil {
305+
if v, ok := options["asyncDeleteFileID"].(string); ok && v != "" {
306+
ipConfigs.PodInterfaceID = v
307+
}
308+
}
304309

305310
if address != nil {
306311
ipConfigs.DesiredIPAddresses = append(ipConfigs.DesiredIPAddresses, address.IP.String())

cni/network/network.go

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/Azure/azure-container-networking/cni/util"
1919
"github.com/Azure/azure-container-networking/cns"
2020
cnscli "github.com/Azure/azure-container-networking/cns/client"
21-
"github.com/Azure/azure-container-networking/cns/fsnotify"
2221
"github.com/Azure/azure-container-networking/common"
2322
"github.com/Azure/azure-container-networking/dhcp"
2423
"github.com/Azure/azure-container-networking/iptables"
@@ -716,7 +715,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn
716715
*opt.infraSeen = true
717716
} else {
718717
ifName = "eth" + strconv.Itoa(opt.endpointIndex)
719-
endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName)
718+
endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType)
720719
}
721720

722721
endpointInfo := network.EndpointInfo{
@@ -1070,31 +1069,29 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10701069
// network ID is passed in and used only for migration
10711070
// otherwise, in stateless, we don't need the network id for deletion
10721071
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID)
1073-
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found
1072+
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found or CNS connection failure
1073+
// return a retriable error so the container runtime will retry this DEL later
1074+
// the implementation of this function returns nil if the endpoint doens't exist, so
1075+
// we don't have to check that here
10741076
if err != nil {
1075-
if errors.Is(err, network.ErrConnectionFailure) {
1076-
logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err))
1077-
addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath)
1078-
logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr))
1079-
if addErr != nil {
1080-
logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr))
1081-
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID))
1082-
}
1083-
return nil
1084-
}
1085-
if errors.Is(err, network.ErrEndpointStateNotFound) {
1077+
switch {
1078+
case errors.Is(err, network.ErrConnectionFailure):
1079+
logger.Error("Failed to connect to CNS", zap.Error(err))
1080+
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
1081+
case errors.Is(err, network.ErrEndpointStateNotFound):
10861082
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
10871083
return nil
1084+
default:
1085+
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1086+
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
10881087
}
1089-
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1090-
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
10911088
}
10921089
} else {
10931090
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
10941091
}
10951092

1096-
// for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1097-
// this block is not applied to stateless CNI
1093+
// for Stateful CNI when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1094+
// this block is applied to stateless CNI only if there was a connection failure in previous block
10981095
if len(epInfos) == 0 {
10991096
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
11001097
if !nwCfg.MultiTenancy {
@@ -1104,8 +1101,16 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
11041101

11051102
logger.Warn("Release ip by ContainerID (endpoint not found)",
11061103
zap.String("containerID", args.ContainerID))
1107-
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
1108-
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
1104+
if plugin.nm.IsStatelessCNIMode() {
1105+
options := make(map[string]interface{})
1106+
options["asyncDeleteFileID"] = args.ContainerID
1107+
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, options); err != nil {
1108+
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
1109+
}
1110+
} else {
1111+
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
1112+
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
1113+
}
11091114
}
11101115
}
11111116
// Log the error but return success if the endpoint being deleted is not found.

cns/restserver/ipam.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1313,12 +1313,16 @@ func updateIPInfoMap(iPInfo map[string]*IPInfo, interfaceInfo *IPInfo, ifName, e
13131313
iPInfo[ifName].MacAddress = interfaceInfo.MacAddress
13141314
logger.Printf("[updateEndpoint] update the endpoint %s with MacAddress %s", endpointID, interfaceInfo.MacAddress)
13151315
}
1316+
if interfaceInfo.NetworkNameSpace != "" {
1317+
iPInfo[ifName].NetworkNameSpace = interfaceInfo.NetworkNameSpace
1318+
logger.Printf("[updateEndpoint] update the endpoint %s with NetworkNameSpace %s", endpointID, interfaceInfo.NetworkNameSpace)
1319+
}
13161320
}
13171321

13181322
// verifyUpdateEndpointStateRequest verify the CNI request body for the UpdateENdpointState API
13191323
func verifyUpdateEndpointStateRequest(req map[string]*IPInfo) error {
13201324
for ifName, InterfaceInfo := range req {
1321-
if InterfaceInfo.HostVethName == "" && InterfaceInfo.HnsEndpointID == "" && InterfaceInfo.NICType == "" && InterfaceInfo.MacAddress == "" {
1325+
if InterfaceInfo.HostVethName == "" && InterfaceInfo.HnsEndpointID == "" && InterfaceInfo.NICType == "" && InterfaceInfo.MacAddress == "" && InterfaceInfo.NetworkNameSpace == "" {
13221326
return errors.New("[updateEndpoint] No NicType, MacAddress, HnsEndpointID or HostVethName has been provided")
13231327
}
13241328
if ifName == "" {

cns/restserver/restserver.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,14 @@ type EndpointInfo struct {
126126
}
127127

128128
type IPInfo struct {
129-
IPv4 []net.IPNet
130-
IPv6 []net.IPNet `json:",omitempty"`
131-
HnsEndpointID string `json:",omitempty"`
132-
HnsNetworkID string `json:",omitempty"`
133-
HostVethName string `json:",omitempty"`
134-
MacAddress string `json:",omitempty"`
135-
NICType cns.NICType
129+
IPv4 []net.IPNet
130+
IPv6 []net.IPNet `json:",omitempty"`
131+
HnsEndpointID string `json:",omitempty"`
132+
HnsNetworkID string `json:",omitempty"`
133+
HostVethName string `json:",omitempty"`
134+
MacAddress string `json:",omitempty"`
135+
NICType cns.NICType `json:",omitempty"`
136+
NetworkNameSpace string `json:",omitempty"`
136137
}
137138

138139
type GetHTTPServiceDataResponse struct {

network/manager.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ type NetworkManager interface {
116116
UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error
117117
GetNumberOfEndpoints(ifName string, networkID string) int
118118
GetEndpointID(containerID, ifName string) string
119+
GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string
119120
IsStatelessCNIMode() bool
120121
SaveState(eps []*endpoint) error
121122
DeleteState(epInfos []*EndpointInfo) error
@@ -514,7 +515,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
514515
nw := &network{
515516
Id: networkID, // currently unused in stateless cni
516517
HnsId: epInfo.HNSNetworkID,
517-
Mode: opModeTransparentVlan,
518+
Mode: opModeTransparent,
518519
SnatBridgeIP: "",
519520
NetNs: dummyGUID, // to trigger hns v2, windows
520521
extIf: &externalInterface{
@@ -529,6 +530,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
529530
HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network)
530531
HostIfName: epInfo.HostIfName,
531532
LocalIP: "",
533+
IPAddresses: epInfo.IPAddresses,
532534
VlanID: 0,
533535
AllowInboundFromHostToNC: false, // stateless currently does not support apipa
534536
AllowInboundFromNCToHost: false,
@@ -537,11 +539,12 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
537539
NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false
538540
NetNs: dummyGUID, // to trigger hnsv2, windows
539541
NICType: epInfo.NICType,
542+
NetworkNameSpace: epInfo.NetNsPath,
540543
IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
541544
}
542545
logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId))
543546

544-
err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep)
547+
err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep)
545548
if err != nil {
546549
return err
547550
}
@@ -745,6 +748,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string {
745748
return containerID + "-" + ifName
746749
}
747750

751+
// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
752+
func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
753+
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
754+
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
755+
return containerID + "-" + ifName
756+
}
757+
// For InfraNIC, use GetEndpointID() logic.
758+
return nm.GetEndpointID(containerID, ifName)
759+
}
760+
748761
// saves the map of network ids to endpoints to the state file
749762
func (nm *networkManager) SaveState(eps []*endpoint) error {
750763
nm.Lock()
@@ -809,6 +822,7 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI
809822
epInfo.NICType = ipInfo.NICType
810823
epInfo.HNSNetworkID = ipInfo.HnsNetworkID
811824
epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress)
825+
epInfo.NetNsPath = ipInfo.NetworkNameSpace
812826
ret = append(ret, epInfo)
813827
}
814828
return ret
@@ -837,11 +851,12 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {
837851

838852
for _, ep := range eps {
839853
ifNametoIPInfoMap[ep.IfName] = &restserver.IPInfo{ // in windows, the nicname is args ifname, in linux, it's ethX
840-
NICType: ep.NICType,
841-
HnsEndpointID: ep.HnsId,
842-
HnsNetworkID: ep.HNSNetworkID,
843-
HostVethName: ep.HostIfName,
844-
MacAddress: ep.MacAddress.String(),
854+
NICType: ep.NICType,
855+
HnsEndpointID: ep.HnsId,
856+
HnsNetworkID: ep.HNSNetworkID,
857+
HostVethName: ep.HostIfName,
858+
MacAddress: ep.MacAddress.String(),
859+
NetworkNameSpace: ep.NetworkNameSpace,
845860
}
846861
}
847862

network/manager_mock.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package network
22

33
import (
4+
"github.com/Azure/azure-container-networking/cns"
45
"github.com/Azure/azure-container-networking/common"
56
)
67

@@ -94,6 +95,16 @@ func (nm *MockNetworkManager) GetEndpointID(containerID, ifName string) string {
9495
return containerID + "-" + ifName
9596
}
9697

98+
// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
99+
func (nm *MockNetworkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
100+
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
101+
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
102+
return containerID + "-" + ifName
103+
}
104+
// For InfraNIC, use GetEndpointID() logic.
105+
return nm.GetEndpointID(containerID, ifName)
106+
}
107+
97108
func (nm *MockNetworkManager) GetAllEndpoints(networkID string) (map[string]*EndpointInfo, error) {
98109
return nm.TestEndpointInfoMap, nil
99110
}

network/manager_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,3 +489,108 @@ var _ = Describe("Test Manager", func() {
489489
})
490490
})
491491
})
492+
493+
func TestGetEndpointIDByNicType_Cases(t *testing.T) {
494+
nm := &networkManager{}
495+
496+
cases := []struct {
497+
name string
498+
stateless bool
499+
containerID string
500+
ifName string
501+
nicType cns.NICType
502+
expectedResult string
503+
}{
504+
{
505+
name: "Stateless InfraNIC",
506+
stateless: true,
507+
containerID: "container123",
508+
ifName: "eth0",
509+
nicType: cns.InfraNIC,
510+
expectedResult: "container123",
511+
},
512+
{
513+
name: "Stateless SecondaryNIC",
514+
stateless: true,
515+
containerID: "container123",
516+
ifName: "eth1",
517+
nicType: cns.DelegatedVMNIC,
518+
expectedResult: "container123-eth1",
519+
},
520+
{
521+
name: "Stateful InfraNIC",
522+
stateless: false,
523+
containerID: "container123456789",
524+
ifName: "eth0",
525+
nicType: cns.InfraNIC,
526+
expectedResult: "containe-eth0", // truncated to 8 chars
527+
},
528+
}
529+
530+
for _, tc := range cases {
531+
t.Run(tc.name, func(t *testing.T) {
532+
nm.statelessCniMode = tc.stateless
533+
id := nm.GetEndpointIDByNicType(tc.containerID, tc.ifName, tc.nicType)
534+
if id != tc.expectedResult {
535+
t.Errorf("expected %s, got %s", tc.expectedResult, id)
536+
}
537+
})
538+
}
539+
}
540+
541+
func TestCnsEndpointInfotoCNIEpInfos_Cases(t *testing.T) {
542+
cases := []struct {
543+
name string
544+
ifName string
545+
ipInfo restserver.IPInfo
546+
expectedNetNs string
547+
expectedIfName string
548+
expectedNICType cns.NICType
549+
}{
550+
{
551+
name: "With NetworkNameSpace and DelegatedVMNIC",
552+
ifName: "eth1",
553+
ipInfo: restserver.IPInfo{
554+
NetworkNameSpace: "/var/run/netns/testns",
555+
NICType: cns.DelegatedVMNIC,
556+
},
557+
expectedNetNs: "/var/run/netns/testns",
558+
expectedIfName: "eth1",
559+
expectedNICType: cns.DelegatedVMNIC,
560+
},
561+
{
562+
name: "Empty NetworkNameSpace and InfraNIC",
563+
ifName: "eth0",
564+
ipInfo: restserver.IPInfo{
565+
NetworkNameSpace: "",
566+
NICType: cns.InfraNIC,
567+
},
568+
expectedNetNs: "",
569+
expectedIfName: "eth0",
570+
expectedNICType: cns.InfraNIC,
571+
},
572+
}
573+
574+
for _, tc := range cases {
575+
t.Run(tc.name, func(t *testing.T) {
576+
endpointInfo := restserver.EndpointInfo{
577+
IfnameToIPMap: map[string]*restserver.IPInfo{
578+
tc.ifName: &tc.ipInfo,
579+
},
580+
}
581+
epInfos := cnsEndpointInfotoCNIEpInfos(endpointInfo, "container123")
582+
if len(epInfos) == 0 {
583+
t.Fatalf("expected at least one epInfo")
584+
}
585+
if epInfos[0].NetNsPath != tc.expectedNetNs {
586+
t.Errorf("expected NetNsPath %q, got %q", tc.expectedNetNs, epInfos[0].NetNsPath)
587+
}
588+
if epInfos[0].IfName != tc.expectedIfName {
589+
t.Errorf("expected IfName %q, got %q", tc.expectedIfName, epInfos[0].IfName)
590+
}
591+
if epInfos[0].NICType != tc.expectedNICType {
592+
t.Errorf("expected NICType %v, got %v", tc.expectedNICType, epInfos[0].NICType)
593+
}
594+
})
595+
}
596+
}

0 commit comments

Comments
 (0)