From d6c5525f029d4c9b1b04dc7fb2a9d65a3fcf3357 Mon Sep 17 00:00:00 2001 From: Hongliang Liu <75655411+hongliangl@users.noreply.github.com> Date: Wed, 24 May 2023 12:57:59 +0800 Subject: [PATCH] Improve AntreaProxy route syncing on Windows (#4941) This PR fixes the following issues: 1. AntreaAgent logs "Failed to sync route" when attempting to sync route entries every time. 2. AntreaAgent logs "Failed to install route for Service CIDR" err="failed to delete stale Service CIDR route" during startup. For the first issue, previously, to recover the connected route of antrea-gw0 (assuming the IP address is 10.10.0.1/24) that may have been deleted by mistake, a route with a destination 10.10.0.1/24 and gateway 10.10.0.1 was periodically synced. However, this caused an error because an existing active route with the same destination but a different gateway 0.0.0.0 should have already been automatically installed when antrea-gw0 was created. To address this issue, this PR changes the gateway of the recover route from 10.10.0.1 to 0.0.0.0, which matches the existing installed route. This ensures that the periodic sync will not cause any errors. For the second issue, previously, when syncing the second ClusterIP, the stale route entry installed for the first ClusterIP is added to the stale routes twice. This results in the error log. Signed-off-by: Hongliang Liu --- pkg/agent/route/route_windows.go | 35 ++++++++++++------- .../util/powershell/powershell_windows.go | 4 +-- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pkg/agent/route/route_windows.go b/pkg/agent/route/route_windows.go index e16ff833dae..b3d4f037318 100644 --- a/pkg/agent/route/route_windows.go +++ b/pkg/agent/route/route_windows.go @@ -285,23 +285,34 @@ func (c *Client) addServiceCIDRRoute(serviceCIDR *net.IPNet) error { // a new route with a newly calculated destination CIDR has been installed. if serviceCIDRRouteExists { staleRoutes = append(staleRoutes, oldServiceCIDRRoute.(*util.Route)) - } - routes, err := c.listIPRoutesOnGW() - if err != nil { - return fmt.Errorf("error listing ip routes: %w", err) - } - // Collect stale per-IP routes for ClusterIPs before this patch. - for _, rt := range routes { - ones, _ := rt.DestinationSubnet.Mask.Size() - if ones == net.IPv4len*8 && serviceCIDR.Contains(rt.DestinationSubnet.IP) { - staleRoutes = append(staleRoutes, rt) + } else { + routes, err := c.listIPRoutesOnGW() + if err != nil { + return fmt.Errorf("error listing ip routes: %w", err) + } + for _, rt := range routes { + if !rt.GatewayAddress.Equal(gw) { + continue + } + // It's the latest route we just installed. + if iputil.IPNetEqual(rt.DestinationSubnet, serviceCIDR) { + continue + } + // The route covers the desired route. It was installed when the calculated ServiceCIDR is larger than the current one, which could happen after some Services are deleted. + if iputil.IPNetContains(rt.DestinationSubnet, serviceCIDR) { + staleRoutes = append(staleRoutes, rt) + } + // The desired route covers the route. It was either installed when the calculated ServiceCIDR is smaller than the current one, or a per-IP route generated before v1.12.0. + if iputil.IPNetContains(serviceCIDR, rt.DestinationSubnet) { + staleRoutes = append(staleRoutes, rt) + } } } // Remove stale routes. for _, rt := range staleRoutes { if err := util.RemoveNetRoute(rt); err != nil { - if err.Error() == "No matching MSFT_NetRoute objects" { + if strings.Contains(err.Error(), "No matching MSFT_NetRoute objects") { klog.InfoS("Failed to delete stale Service CIDR route since the route has been deleted", "route", rt) } else { return fmt.Errorf("failed to delete stale Service CIDR route %s: %w", rt.String(), err) @@ -383,7 +394,7 @@ func (c *Client) syncRoute() error { gwAutoconfRoute := &util.Route{ LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, DestinationSubnet: c.nodeConfig.PodIPv4CIDR, - GatewayAddress: c.nodeConfig.GatewayConfig.IPv4, + GatewayAddress: net.IPv4zero, RouteMetric: util.MetricDefault, } restoreRoute(gwAutoconfRoute) diff --git a/pkg/agent/util/powershell/powershell_windows.go b/pkg/agent/util/powershell/powershell_windows.go index 29c3acb6372..df8629de4cf 100644 --- a/pkg/agent/util/powershell/powershell_windows.go +++ b/pkg/agent/util/powershell/powershell_windows.go @@ -26,8 +26,8 @@ func RunCommand(cmd string) (string, error) { // The try/catch command idea is from the following page: // https://stackoverflow.com/questions/19282870/how-can-i-use-try-catch-and-get-my-script-to-stop-if-theres-an-error/19285405 psCmd := exec.Command("powershell.exe", "-NoLogo", "-NoProfile", "-NonInteractive", "-Command", - fmt.Sprintf(`$ErrorActionPreference="Stop";try {%s} catch {Write-Host $_;os.Exit(1)}`, cmd)) // #nosec G204 - stdout, err := psCmd.Output() + fmt.Sprintf(`$ErrorActionPreference="Stop";try {%s} catch {Write-Host $_;Exit(1)}`, cmd)) // #nosec G204 + stdout, err := psCmd.CombinedOutput() stdoutStr := string(stdout) if err != nil { return "", fmt.Errorf("failed to run command '%s': output '%s', %v", cmd, stdoutStr, err)