Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cmd to configure nodegroups on a running cluster #2246

Merged
merged 41 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3b28bbd
WIP on nodegroup adder
RobertLucian Jun 7, 2021
6fa2e8f
WIP nodegroup adder cmd
RobertLucian Jun 8, 2021
d4d2bda
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 8, 2021
ec88c2d
Use the simplified aws resource table when showing the costs
RobertLucian Jun 8, 2021
3083606
WIP cluster configure
RobertLucian Jun 9, 2021
ddd5b3e
WIP cluster configure
RobertLucian Jun 10, 2021
9de468a
Bug fixes
RobertLucian Jun 10, 2021
bc3f33a
Further fixes on the cloudformation stacks
RobertLucian Jun 10, 2021
914233b
Address layout on install.sh
RobertLucian Jun 10, 2021
3fcc248
Add priority field to the node group config
RobertLucian Jun 10, 2021
e8b7bf6
Document the priority field in the docs
RobertLucian Jun 10, 2021
19e4be5
Make lint
RobertLucian Jun 10, 2021
cadadfa
Layout change for cluster configure cmd
RobertLucian Jun 11, 2021
20412af
Better reconciliation w/ cloudformation stacks
RobertLucian Jun 11, 2021
a38ec20
Fix number of SGs when cluster already exists
RobertLucian Jun 11, 2021
299fb39
Quota fixes
RobertLucian Jun 11, 2021
e2949c2
Further fixes
RobertLucian Jun 11, 2021
2b445b2
Improve cluster info cmd
RobertLucian Jun 11, 2021
1b599a2
Remove debugging comments
RobertLucian Jun 11, 2021
33c6d36
Nits
RobertLucian Jun 11, 2021
d7d394b
Remove the nodegroups first and then add the others
RobertLucian Jun 11, 2021
df02113
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 11, 2021
2c50eb4
Nits
RobertLucian Jun 11, 2021
d6fd7b8
Separate validate functions
RobertLucian Jun 14, 2021
5f385cc
Simplify get cluster state package
RobertLucian Jun 14, 2021
97e4f30
Address PR comments
RobertLucian Jun 14, 2021
4153321
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 14, 2021
1c42ae0
Add missing error print when stacks couldn't be retrieved
RobertLucian Jun 14, 2021
157fec7
Bolts and fixes
RobertLucian Jun 14, 2021
c562386
Address PR comments
RobertLucian Jun 15, 2021
fb53dda
Print cluster stacks when running cluster info cmd
RobertLucian Jun 15, 2021
eae2bd1
Refactor
RobertLucian Jun 15, 2021
262b4f5
Some refactoring
RobertLucian Jun 15, 2021
b0fc893
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 15, 2021
0068a7d
Fix to the number of required SGs on configure
RobertLucian Jun 15, 2021
3bc9d13
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 16, 2021
91bf390
Address PR comments
RobertLucian Jun 16, 2021
d4b5bb9
Address merge conflicts from master
RobertLucian Jun 16, 2021
b58b7a6
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 17, 2021
4d5e378
Addressing PR comments and a fix
RobertLucian Jun 17, 2021
4d40100
Merge branch 'master' into feature/add-or-remove-ngs
RobertLucian Jun 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Quota fixes
  • Loading branch information
RobertLucian committed Jun 11, 2021
commit 299fb39edc6ac8e778877287f79c01a9de6ace68
6 changes: 2 additions & 4 deletions cli/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,8 @@ var _clusterConfigureCmd = &cobra.Command{
exit.Error(err)
}

clusterConfig := refreshCachedClusterConfig(awsClient, accessConfig, true)

clusterState, err := clusterstate.GetClusterState(awsClient, &clusterConfig)
oldClusterConfig := refreshCachedClusterConfig(awsClient, accessConfig, true)
clusterState, err := clusterstate.GetClusterState(awsClient, &oldClusterConfig)
if err != nil {
exit.Error(err)
}
Expand All @@ -348,7 +347,6 @@ var _clusterConfigureCmd = &cobra.Command{
}

staleNodeGroups := clusterState.GetStaleNodeGroupNames()
oldClusterConfig := refreshCachedClusterConfig(awsClient, accessConfig, true)
newClusterConfig, newNgs, removedNgs, scaledNgs, err := getConfigureClusterConfig(awsClient, oldClusterConfig, clusterConfigFile, staleNodeGroups, _flagClusterDisallowPrompt)
if err != nil {
exit.Error(err)
Expand Down
2 changes: 1 addition & 1 deletion cli/cmd/lib_cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
}

func confirmConfigureClusterConfig(newNgs, removedNgs, scaledNgs []string, oldCc, newCc clusterconfig.Config, disallowPrompt bool) {
fmt.Printf("your %s cluster in region %s will receive the following changes\n", newCc.ClusterName, newCc.Region)
fmt.Printf("your %s cluster in region %s will receive the following changes\n\n", newCc.ClusterName, newCc.Region)
RobertLucian marked this conversation as resolved.
Show resolved Hide resolved
if len(newNgs) > 0 {
fmt.Printf("○ %d %s (%s) will be added\n", len(newNgs), s.PluralS("nodegroup", len(newNgs)), s.StrsAnd(newNgs))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/lib/aws/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func ErrorSecurityGroupRulesExceeded(currentLimit, additionalQuotaRequired int,
url := "https://console.aws.amazon.com/servicequotas/home?#!/services/vpc/quotas"
return errors.WithStack(&errors.Error{
Kind: ErrSecurityGroupRulesExceeded,
Message: fmt.Sprintf("security group rules limit of %d exceeded in region %s; use fewer availability zones, remove some node groups, reduce the number of CIDR white lists (if you have any), or increase your quota for inbound/outbound rules per security group by at least %d here: %s (if your request was recently approved, please allow ~30 minutes for AWS to reflect this change)", currentLimit, region, additionalQuotaRequired, url),
Message: fmt.Sprintf("security group rules limit of %d exceeded in region %s; remove some node groups, use fewer availability zones (on a new cluster), reduce the number of CIDR white lists (if you have any on a new cluster), or increase your quota for inbound/outbound rules per security group by at least %d here: %s (if your request was recently approved, please allow ~30 minutes for AWS to reflect this change)", currentLimit, region, additionalQuotaRequired, url),
RobertLucian marked this conversation as resolved.
Show resolved Hide resolved
})
}

Expand Down
19 changes: 7 additions & 12 deletions pkg/lib/aws/servicequotas.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,20 +314,23 @@ func (c *Client) VerifyNetworkQuotas(

// check rules quota for nodegroup SGs
requiredRulesForSG := requiredRulesForNodeGroupSecurityGroup(len(availabilityZones), longestCIDRWhiteList)
// fmt.Println("requiredRulesForSG", requiredRulesForSG, "quota", quotaCodeToValueMap[_securityGroupRulesQuotaCode])
if requiredRulesForSG > quotaCodeToValueMap[_securityGroupRulesQuotaCode] {
additionalQuotaRequired := requiredRulesForSG - quotaCodeToValueMap[_securityGroupRulesQuotaCode]
return ErrorSecurityGroupRulesExceeded(quotaCodeToValueMap[_securityGroupRulesQuotaCode], additionalQuotaRequired, c.Region)
}

// check rules quota for control plane SG
requiredRulesForCPSG := requiredRulesForControlPlaneSecurityGroup(numNodeGroups, clusterAlreadyExists)
requiredRulesForCPSG := requiredRulesForControlPlaneSecurityGroup(numNodeGroups)
// fmt.Println("requiredRulesForCPSG", requiredRulesForCPSG, "quota", quotaCodeToValueMap[_securityGroupRulesQuotaCode])
if requiredRulesForCPSG > quotaCodeToValueMap[_securityGroupRulesQuotaCode] {
additionalQuotaRequired := requiredRulesForCPSG - quotaCodeToValueMap[_securityGroupRulesQuotaCode]
return ErrorSecurityGroupRulesExceeded(quotaCodeToValueMap[_securityGroupRulesQuotaCode], additionalQuotaRequired, c.Region)
}

// check security groups quota
requiredSecurityGroups := requiredSecurityGroups(numNodeGroups, clusterAlreadyExists)
requiredSecurityGroups := requiredSecurityGroups(numNodeGroups)
// fmt.Println("requiredSecurityGroups", requiredSecurityGroups)
sgs, err := c.DescribeSecurityGroups()
if err != nil {
return err
Expand Down Expand Up @@ -359,21 +362,13 @@ func requiredRulesForNodeGroupSecurityGroup(numAZs, whitelistLength int) int {
return _baseInboundRulesForNodeGroup + numAZs*_inboundRulesPerAZ + whitelistRuleCount
}

func requiredRulesForControlPlaneSecurityGroup(numNodeGroups int, clusterAlreadyExists bool) int {
if clusterAlreadyExists {
return 2 * numNodeGroups
}

func requiredRulesForControlPlaneSecurityGroup(numNodeGroups int) int {
// +1 for the operator node group
// this is the number of outbound rules (there are half as many inbound rules, so that is not the limiting factor)
return 2 * (numNodeGroups + 1)
}

func requiredSecurityGroups(numNodeGroups int, clusterAlreadyExists bool) int {
if clusterAlreadyExists {
return numNodeGroups
}

func requiredSecurityGroups(numNodeGroups int) int {
// each node group requires a security group
return _baseNumberOfSecurityGroups + numNodeGroups
}
15 changes: 6 additions & 9 deletions pkg/types/clusterconfig/cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1152,15 +1152,12 @@ func (cc *Config) ValidateOnConfigure(awsClient *aws.Client, oldConfig Config) (
newNgs := cc.getNewNodeGroups(oldConfig)
removedNgs := cc.getRemovedNodeGroups(oldConfig)

netAdditionOfNgs := len(newNgs) - len(removedNgs)
if netAdditionOfNgs > 0 {
longestCIDRWhiteList := libmath.MaxInt(len(cc.APILoadBalancerCIDRWhiteList), len(cc.OperatorLoadBalancerCIDRWhiteList))
if err := awsClient.VerifyNetworkQuotasOnNodeGroupsAddition(strset.FromSlice(cc.AvailabilityZones), netAdditionOfNgs, longestCIDRWhiteList); err != nil {
// Skip AWS errors, since some regions (e.g. eu-north-1) do not support this API
if !aws.IsAWSError(err) {
errReturned = errors.Wrap(err, NodeGroupsKey)
return
}
longestCIDRWhiteList := libmath.MaxInt(len(cc.APILoadBalancerCIDRWhiteList), len(cc.OperatorLoadBalancerCIDRWhiteList))
if err := awsClient.VerifyNetworkQuotasOnNodeGroupsAddition(strset.FromSlice(cc.AvailabilityZones), len(cc.NodeGroups), longestCIDRWhiteList); err != nil {
// Skip AWS errors, since some regions (e.g. eu-north-1) do not support this API
if !aws.IsAWSError(err) {
errReturned = errors.Wrap(err, NodeGroupsKey)
return
}
}

Expand Down