From 7c240d420c0645b6d1a03c3de18c421e8ff0745b Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Mon, 16 Sep 2024 22:27:30 +0000 Subject: [PATCH] Update IMDS consumers to use helper with fixed retry set --- nodeadm/cmd/nodeadm/init/init.go | 16 ++++++---- nodeadm/internal/api/status.go | 15 +++------ nodeadm/internal/aws/ecr/ecr.go | 2 +- nodeadm/internal/aws/imds/imds.go | 32 +++++++++++++------ nodeadm/internal/configprovider/userdata.go | 3 +- nodeadm/internal/kubelet/config.go | 35 +++++++-------------- nodeadm/internal/system/resources.go | 22 ++++++------- 7 files changed, 63 insertions(+), 62 deletions(-) diff --git a/nodeadm/cmd/nodeadm/init/init.go b/nodeadm/cmd/nodeadm/init/init.go index febb1159c..1d4e818e1 100644 --- a/nodeadm/cmd/nodeadm/init/init.go +++ b/nodeadm/cmd/nodeadm/init/init.go @@ -5,7 +5,6 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/integrii/flaggy" "go.uber.org/zap" @@ -13,6 +12,7 @@ import ( "github.com/awslabs/amazon-eks-ami/nodeadm/internal/api" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/aws/ecr" + "github.com/awslabs/amazon-eks-ami/nodeadm/internal/aws/imds" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/cli" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/configprovider" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/containerd" @@ -146,14 +146,18 @@ func (c *initCmd) Run(log *zap.Logger, opts *cli.GlobalOptions) error { // perform in-place updates when allowed by the user func enrichConfig(log *zap.Logger, cfg *api.NodeConfig) error { log.Info("Fetching instance details..") - imdsClient := imds.New(imds.Options{}) - awsConfig, err := config.LoadDefaultConfig(context.TODO(), config.WithClientLogMode(aws.LogRetries), config.WithEC2IMDSRegion(func(o *config.UseEC2IMDSRegion) { - o.Client = imdsClient - })) + awsConfig, err := config.LoadDefaultConfig(context.TODO(), + config.WithClientLogMode(aws.LogRetries), + config.WithEC2IMDSRegion(func(o *config.UseEC2IMDSRegion) { + // Use our pre-configured IMDS client to avoid hitting common retry + // issues with the default config. + o.Client = imds.Client + }), + ) if err != nil { return err } - instanceDetails, err := api.GetInstanceDetails(context.TODO(), cfg.Spec.FeatureGates, imdsClient, ec2.NewFromConfig(awsConfig)) + instanceDetails, err := api.GetInstanceDetails(context.TODO(), cfg.Spec.FeatureGates, ec2.NewFromConfig(awsConfig)) if err != nil { return err } diff --git a/nodeadm/internal/api/status.go b/nodeadm/internal/api/status.go index b49a72a1a..4490f2e32 100644 --- a/nodeadm/internal/api/status.go +++ b/nodeadm/internal/api/status.go @@ -3,34 +3,29 @@ package api import ( "context" "fmt" - "io" "time" "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2extra "github.com/awslabs/amazon-eks-ami/nodeadm/internal/aws/ec2" + "github.com/awslabs/amazon-eks-ami/nodeadm/internal/aws/imds" ) // Fetch information about the ec2 instance using IMDS data. // This information is stored into the internal config to avoid redundant calls // to IMDS when looking for instance metadata -func GetInstanceDetails(ctx context.Context, featureGates map[Feature]bool, imdsClient *imds.Client, ec2Client *ec2.Client) (*InstanceDetails, error) { - instanceIdenitityDocument, err := imdsClient.GetInstanceIdentityDocument(ctx, &imds.GetInstanceIdentityDocumentInput{}) +func GetInstanceDetails(ctx context.Context, featureGates map[Feature]bool, ec2Client *ec2.Client) (*InstanceDetails, error) { + instanceIdenitityDocument, err := imds.GetInstanceIdentityDocument(ctx) if err != nil { return nil, err } - macResponse, err := imdsClient.GetMetadata(ctx, &imds.GetMetadataInput{Path: "mac"}) - if err != nil { - return nil, err - } - mac, err := io.ReadAll(macResponse.Content) + mac, err := imds.GetProperty(ctx, "mac") if err != nil { return nil, err } - privateDNSName := "" + var privateDNSName string if !IsFeatureEnabled(InstanceIdNodeName, featureGates) { privateDNSName, err = getPrivateDNSName(ec2Client, instanceIdenitityDocument.InstanceID) if err != nil { diff --git a/nodeadm/internal/aws/ecr/ecr.go b/nodeadm/internal/aws/ecr/ecr.go index 3e317c314..4616d64bf 100644 --- a/nodeadm/internal/aws/ecr/ecr.go +++ b/nodeadm/internal/aws/ecr/ecr.go @@ -39,7 +39,7 @@ func (r *ECRRegistry) GetSandboxImage() string { func GetEKSRegistry(region string) (ECRRegistry, error) { account, region := getEKSRegistryCoordinates(region) - servicesDomain, err := imds.GetProperty(imds.ServicesDomain) + servicesDomain, err := imds.GetProperty(context.TODO(), imds.ServicesDomain) if err != nil { return "", err } diff --git a/nodeadm/internal/aws/imds/imds.go b/nodeadm/internal/aws/imds/imds.go index fd0684561..9fa6870df 100644 --- a/nodeadm/internal/aws/imds/imds.go +++ b/nodeadm/internal/aws/imds/imds.go @@ -9,14 +9,24 @@ import ( "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" ) -var client *imds.Client +var Client *imds.Client func init() { - client = imds.New(imds.Options{ + Client = imds.New(imds.Options{ DisableDefaultTimeout: true, Retryer: retry.NewStandard(func(so *retry.StandardOptions) { so.MaxAttempts = 15 so.MaxBackoff = 1 * time.Second + so.Retryables = append(so.Retryables, + retry.RetryableHTTPStatusCode{ + Codes: map[int]struct{}{ + // 404 errors normally occur due when there is a delay + // with resource propagation, so making this retryable + // should alleviate most issues. + 404: {}, + }, + }, + ) }), }) } @@ -27,24 +37,28 @@ const ( ServicesDomain IMDSProperty = "services/domain" ) -func GetUserData() ([]byte, error) { - resp, err := client.GetUserData(context.TODO(), &imds.GetUserDataInput{}) +func GetInstanceIdentityDocument(ctx context.Context) (*imds.GetInstanceIdentityDocumentOutput, error) { + return Client.GetInstanceIdentityDocument(ctx, &imds.GetInstanceIdentityDocumentInput{}) +} + +func GetUserData(ctx context.Context) ([]byte, error) { + res, err := Client.GetUserData(ctx, &imds.GetUserDataInput{}) if err != nil { return nil, err } - return io.ReadAll(resp.Content) + return io.ReadAll(res.Content) } -func GetProperty(prop IMDSProperty) (string, error) { - bytes, err := GetPropertyBytes(prop) +func GetProperty(ctx context.Context, prop IMDSProperty) (string, error) { + bytes, err := GetPropertyBytes(ctx, prop) if err != nil { return "", err } return string(bytes), nil } -func GetPropertyBytes(prop IMDSProperty) ([]byte, error) { - res, err := client.GetMetadata(context.TODO(), &imds.GetMetadataInput{Path: string(prop)}) +func GetPropertyBytes(ctx context.Context, prop IMDSProperty) ([]byte, error) { + res, err := Client.GetMetadata(ctx, &imds.GetMetadataInput{Path: string(prop)}) if err != nil { return nil, err } diff --git a/nodeadm/internal/configprovider/userdata.go b/nodeadm/internal/configprovider/userdata.go index 671f8881f..0c1e9acc6 100644 --- a/nodeadm/internal/configprovider/userdata.go +++ b/nodeadm/internal/configprovider/userdata.go @@ -3,6 +3,7 @@ package configprovider import ( "bytes" "compress/gzip" + "context" "encoding/base64" "fmt" "io" @@ -31,7 +32,7 @@ type userDataProvider interface { type imdsUserDataProvider struct{} func (p *imdsUserDataProvider) GetUserData() ([]byte, error) { - return imds.GetUserData() + return imds.GetUserData(context.TODO()) } type userDataConfigProvider struct { diff --git a/nodeadm/internal/kubelet/config.go b/nodeadm/internal/kubelet/config.go index 978a2b24b..ae3d976a9 100644 --- a/nodeadm/internal/kubelet/config.go +++ b/nodeadm/internal/kubelet/config.go @@ -5,7 +5,6 @@ import ( _ "embed" "encoding/json" "fmt" - "io" "net" "net/url" "os" @@ -20,10 +19,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8skubelet "k8s.io/kubelet/config/v1beta1" - "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" "github.com/aws/smithy-go/ptr" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/api" + "github.com/awslabs/amazon-eks-ami/nodeadm/internal/aws/imds" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/containerd" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/system" "github.com/awslabs/amazon-eks-ami/nodeadm/internal/util" @@ -203,7 +202,7 @@ func (ksc *kubeletConfig) withOutpostSetup(cfg *api.NodeConfig) error { } func (ksc *kubeletConfig) withNodeIp(cfg *api.NodeConfig, flags map[string]string) error { - nodeIp, err := getNodeIp(context.TODO(), imds.New(imds.Options{}), cfg) + nodeIp, err := getNodeIp(context.TODO(), cfg) if err != nil { return err } @@ -262,11 +261,11 @@ func (ksc *kubeletConfig) withCloudProvider(kubeletVersion string, cfg *api.Node func (ksc *kubeletConfig) withDefaultReservedResources(cfg *api.NodeConfig) { ksc.SystemReservedCgroup = ptr.String("/system") ksc.KubeReservedCgroup = ptr.String("/runtime") - maxPods, ok := MaxPodsPerInstanceType[cfg.Status.Instance.Type] - if !ok { - ksc.MaxPods = CalcMaxPods(cfg.Status.Instance.Region, cfg.Status.Instance.Type) - } else { + if maxPods, ok := MaxPodsPerInstanceType[cfg.Status.Instance.Type]; ok { + // #nosec G115 // known source from ec2 apis within int32 range ksc.MaxPods = int32(maxPods) + } else { + ksc.MaxPods = CalcMaxPods(cfg.Status.Instance.Region, cfg.Status.Instance.Type) } ksc.KubeReserved = map[string]string{ "cpu": fmt.Sprintf("%dm", getCPUMillicoresToReserve()), @@ -407,36 +406,24 @@ func getProviderId(availabilityZone, instanceId string) string { } // Get the IP of the node depending on the ipFamily configured for the cluster -func getNodeIp(ctx context.Context, imdsClient *imds.Client, cfg *api.NodeConfig) (string, error) { +func getNodeIp(ctx context.Context, cfg *api.NodeConfig) (string, error) { ipFamily, err := api.GetCIDRIpFamily(cfg.Spec.Cluster.CIDR) if err != nil { return "", err } switch ipFamily { case api.IPFamilyIPv4: - ipv4Response, err := imdsClient.GetMetadata(ctx, &imds.GetMetadataInput{ - Path: "local-ipv4", - }) - if err != nil { - return "", err - } - ip, err := io.ReadAll(ipv4Response.Content) + ipv4, err := imds.GetProperty(ctx, "local-ipv4") if err != nil { return "", err } - return string(ip), nil + return ipv4, nil case api.IPFamilyIPv6: - ipv6Response, err := imdsClient.GetMetadata(ctx, &imds.GetMetadataInput{ - Path: fmt.Sprintf("network/interfaces/macs/%s/ipv6s", cfg.Status.Instance.MAC), - }) - if err != nil { - return "", err - } - ip, err := io.ReadAll(ipv6Response.Content) + ipv6, err := imds.GetProperty(ctx, imds.IMDSProperty(fmt.Sprintf("network/interfaces/macs/%s/ipv6s", cfg.Status.Instance.MAC))) if err != nil { return "", err } - return string(ip), nil + return ipv6, nil default: return "", fmt.Errorf("invalid ip-family. %s is not one of %v", ipFamily, []api.IPFamily{api.IPFamilyIPv4, api.IPFamilyIPv6}) } diff --git a/nodeadm/internal/system/resources.go b/nodeadm/internal/system/resources.go index 6e6c34a69..3bcbe40b3 100644 --- a/nodeadm/internal/system/resources.go +++ b/nodeadm/internal/system/resources.go @@ -27,9 +27,9 @@ const ( ) type core struct { - Id int `json:"core_id"` - Threads []int `json:"thread_ids"` - SocketID int `json:"socket_id"` + Id int `json:"core_id"` + Threads []uint64 `json:"thread_ids"` + SocketID int `json:"socket_id"` } func init() { @@ -168,7 +168,7 @@ func getCoresInfo(cpuDirs []string) ([]core, error) { desiredCore.SocketID = physicalPackageID if len(desiredCore.Threads) == 0 { - desiredCore.Threads = []int{cpuID} + desiredCore.Threads = []uint64{cpuID} } else { desiredCore.Threads = append(desiredCore.Threads, cpuID) } @@ -177,12 +177,12 @@ func getCoresInfo(cpuDirs []string) ([]core, error) { return cores, nil } -func getCPUID(str string) (int, error) { +func getCPUID(str string) (uint64, error) { matches := cpuDirRegExp.FindStringSubmatch(str) if len(matches) != 2 { return 0, fmt.Errorf("failed to match regexp, str: %s", str) } - valInt, err := strconv.Atoi(matches[1]) + valInt, err := strconv.ParseUint(matches[1], 10, 16) if err != nil { return 0, err } @@ -199,7 +199,7 @@ func getCoreID(cpuPath string) (string, error) { return strings.TrimSpace(string(coreID)), err } -func IsCPUOnline(cpuID int) bool { +func IsCPUOnline(cpuID uint64) bool { cpuOnlinePath, err := filepath.Abs(cpusPath + "/online") if err != nil { zap.L().Info("Unable to get absolute path", zap.String("absolutPath", cpusPath+"/online")) @@ -217,7 +217,7 @@ func IsCPUOnline(cpuID int) bool { zap.Error(err)) } - isOnline, err := isCpuOnline(cpuOnlinePath, uint16(cpuID)) + isOnline, err := isCpuOnline(cpuOnlinePath, cpuID) if err != nil { zap.L().Error("Unable to get online CPUs list", zap.Error(err)) return false @@ -225,7 +225,7 @@ func IsCPUOnline(cpuID int) bool { return isOnline } -func isCpuOnline(path string, cpuID uint16) (bool, error) { +func isCpuOnline(path string, cpuID uint64) (bool, error) { // #nosec G304 // This path is cpuOnlinePath from isCPUOnline fileContent, err := os.ReadFile(path) if err != nil { @@ -254,7 +254,7 @@ func isCpuOnline(path string, cpuID uint16) (bool, error) { return false, fmt.Errorf("invalid values in %s", path) } // Return true, if the CPU under consideration is in the range of online CPUs. - if cpuID >= uint16(min) && cpuID <= uint16(max) { + if cpuID >= min && cpuID <= max { return true, nil } case 1: @@ -262,7 +262,7 @@ func isCpuOnline(path string, cpuID uint16) (bool, error) { if err != nil { return false, err } - if uint16(value) == cpuID { + if value == cpuID { return true, nil } }