Skip to content

Commit 3341c86

Browse files
authored
Merge branch 'master' into chart_rbac
2 parents f5cf35b + 7858da6 commit 3341c86

20 files changed

+394
-74
lines changed

charts/cluster-autoscaler/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
apiVersion: v2
2-
appVersion: 1.20.0
2+
appVersion: 1.21.0
33
description: Scales Kubernetes worker nodes within autoscaling groups.
44
engine: gotpl
55
home: https://github.com/kubernetes/autoscaler
@@ -17,4 +17,4 @@ name: cluster-autoscaler
1717
sources:
1818
- https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
1919
type: application
20-
version: 9.9.3
20+
version: 9.10.01

charts/cluster-autoscaler/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ Though enough for the majority of installations, the default PodSecurityPolicy _
367367
| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
368368
| image.pullSecrets | list | `[]` | Image pull secrets |
369369
| image.repository | string | `"k8s.gcr.io/autoscaling/cluster-autoscaler"` | Image repository |
370-
| image.tag | string | `"v1.20.0"` | Image tag |
370+
| image.tag | string | `"v1.21.0"` | Image tag |
371371
| kubeTargetVersionOverride | string | `""` | Allow overriding the `.Capabilities.KubeVersion.GitVersion` check. Useful for `helm template` commands. |
372372
| magnumCABundlePath | string | `"/etc/kubernetes/ca-bundle.crt"` | Path to the host's CA bundle, from `ca-file` in the cloud-config file. |
373373
| magnumClusterName | string | `""` | Cluster name or ID in Magnum. Required if `cloudProvider=magnum` and not setting `autoDiscovery.clusterName`. |

charts/cluster-autoscaler/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ image:
195195
# image.repository -- Image repository
196196
repository: k8s.gcr.io/autoscaling/cluster-autoscaler
197197
# image.tag -- Image tag
198-
tag: v1.20.0
198+
tag: v1.21.0
199199
# image.pullPolicy -- Image pull policy
200200
pullPolicy: IfNotPresent
201201
## Optionally specify an array of imagePullSecrets.

cluster-autoscaler/FAQ.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ this document:
3232
* [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
3333
* [How can I prevent Cluster Autoscaler from scaling down a particular node?](#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node)
3434
* [How can I configure overprovisioning with Cluster Autoscaler?](#how-can-i-configure-overprovisioning-with-cluster-autoscaler)
35+
* [How can I enable/disable eviction for a specific DaemonSet](#how-can-i-enabledisable-eviction-for-a-specific-daemonset)
3536
* [Internals](#internals)
3637
* [Are all of the mentioned heuristics and timings final?](#are-all-of-the-mentioned-heuristics-and-timings-final)
3738
* [How does scale-up work?](#how-does-scale-up-work)
@@ -434,6 +435,30 @@ spec:
434435
serviceAccountName: cluster-proportional-autoscaler-service-account
435436
```
436437

438+
### How can I enable/disable eviction for a specific DaemonSet
439+
440+
Cluster Autoscaler will evict DaemonSets based on its configuration, which is
441+
common for the entire cluster. It is possible, however, to specify the desired
442+
behavior on a per pod basis. All DaemonSet pods will be evicted when they have
443+
the following annotation.
444+
445+
```
446+
"cluster-autoscaler.kubernetes.io/enable-ds-eviction": "true"
447+
```
448+
449+
It is also possible to disable DaemonSet pods eviction expicitly:
450+
451+
452+
```
453+
"cluster-autoscaler.kubernetes.io/enable-ds-eviction": "false"
454+
```
455+
456+
Note that this annotation needs to be specified on DaemonSet pods, not the
457+
DaemonSet object itself. In order to do that for all DaemonSet pods, it is
458+
sufficient to modify the pod spec in the DaemonSet object.
459+
460+
This annotation has no effect on pods that are not a part of any DaemonSet.
461+
437462
****************
438463
439464
# Internals
@@ -512,6 +537,17 @@ What happens when a non-empty node is terminated? As mentioned above, all pods s
512537
elsewhere. Cluster Autoscaler does this by evicting them and tainting the node, so they aren't
513538
scheduled there again.
514539
540+
DaemonSet pods may also be evicted. This can be configured separately for empty
541+
(i.e. containing only DaemonSet pods) and non-empty nodes with
542+
`--daemonset-eviction-for-empty-nodes` and
543+
`--daemonset-eviction-for-occupied-nodes` flags, respectively. Note that the
544+
default behavior is different on each flag: by default DaemonSet pods eviction
545+
will happen only on occupied nodes. Individual DaemonSet pods can also
546+
explicitly choose to be evicted (or not). See [How can I enable/disable eviction
547+
for a specific
548+
DaemonSet](#how-can-i-enabledisable-eviction-for-a-specific-daemonset) for more
549+
details.
550+
515551
Example scenario:
516552
517553
Nodes A, B, C, X, Y.
@@ -690,6 +726,8 @@ The following startup parameters are supported for cluster autoscaler:
690726
| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods) | true
691727
| `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true
692728
| `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0
729+
| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false
730+
| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true
693731
694732
# Troubleshooting:
695733

cluster-autoscaler/cloudprovider/azure/azure_instance_types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,12 @@ var InstanceTypes = map[string]*InstanceType{
19721972
MemoryMb: 479232,
19731973
GPU: 0,
19741974
},
1975+
"Standard_HB120rs_v3": {
1976+
InstanceType: "Standard_HB120rs_v3",
1977+
VCPU: 120,
1978+
MemoryMb: 479232,
1979+
GPU: 0,
1980+
},
19751981
"Standard_HB60rs": {
19761982
InstanceType: "Standard_HB60rs",
19771983
VCPU: 60,

cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"time"
2828

2929
"google.golang.org/api/googleapi"
30+
"k8s.io/apimachinery/pkg/util/rand"
3031
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
3132
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
3233
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
@@ -39,6 +40,7 @@ const (
3940
defaultOperationWaitTimeout = 20 * time.Second
4041
defaultOperationPollInterval = 100 * time.Millisecond
4142
defaultOperationDeletionPollInterval = 1 * time.Second
43+
instanceGroupNameSuffix = "-grp"
4244
// ErrorCodeQuotaExceeded is an error code used in InstanceErrorInfo if quota exceeded error occurs.
4345
ErrorCodeQuotaExceeded = "QUOTA_EXCEEDED"
4446

@@ -75,6 +77,7 @@ type AutoscalingGceClient interface {
7577
// modifying resources
7678
ResizeMig(GceRef, int64) error
7779
DeleteInstances(migRef GceRef, instances []GceRef) error
80+
CreateInstances(GceRef, int64, []string) error
7881
}
7982

8083
type autoscalingGceClientV1 struct {
@@ -195,6 +198,26 @@ func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error
195198
return client.waitForOp(op, migRef.Project, migRef.Zone, false)
196199
}
197200

201+
func (client *autoscalingGceClientV1) CreateInstances(migRef GceRef, delta int64, existingInstances []string) error {
202+
registerRequest("instance_group_managers", "create_instances")
203+
req := gce.InstanceGroupManagersCreateInstancesRequest{}
204+
instanceNames := map[string]bool{}
205+
for _, inst := range existingInstances {
206+
instanceNames[inst] = true
207+
}
208+
req.Instances = make([]*gce.PerInstanceConfig, 0, delta)
209+
for i := int64(0); i < delta; i++ {
210+
newInstanceName := generateInstanceName(migRef, instanceNames)
211+
instanceNames[newInstanceName] = true
212+
req.Instances = append(req.Instances, &gce.PerInstanceConfig{Name: newInstanceName})
213+
}
214+
op, err := client.gceService.InstanceGroupManagers.CreateInstances(migRef.Project, migRef.Zone, migRef.Name, &req).Do()
215+
if err != nil {
216+
return err
217+
}
218+
return client.waitForOp(op, migRef.Project, migRef.Zone, false)
219+
}
220+
198221
func (client *autoscalingGceClientV1) waitForOp(operation *gce.Operation, project, zone string, isDeletion bool) error {
199222
pollInterval := client.operationPollInterval
200223
if isDeletion {
@@ -346,6 +369,18 @@ func isInstanceNotRunningYet(gceInstance *gce.ManagedInstance) bool {
346369
return gceInstance.InstanceStatus == "" || gceInstance.InstanceStatus == "PROVISIONING" || gceInstance.InstanceStatus == "STAGING"
347370
}
348371

372+
func generateInstanceName(migRef GceRef, existingNames map[string]bool) string {
373+
for i := 0; i < 100; i++ {
374+
name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
375+
if ok, _ := existingNames[name]; !ok {
376+
return name
377+
}
378+
}
379+
klog.Warning("Unable to create unique name for a new instance, duplicate name might occur")
380+
name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
381+
return name
382+
}
383+
349384
func (client *autoscalingGceClientV1) FetchZones(region string) ([]string, error) {
350385
registerRequest("regions", "get")
351386
r, err := client.gceService.Regions.Get(client.projectId, region).Do()

cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func (mig *gceMig) IncreaseSize(delta int) error {
220220
if int(size)+delta > mig.MaxSize() {
221221
return fmt.Errorf("size increase too large - desired:%d max:%d", int(size)+delta, mig.MaxSize())
222222
}
223-
return mig.gceManager.SetMigSize(mig, size+int64(delta))
223+
return mig.gceManager.CreateInstances(mig, int64(delta))
224224
}
225225

226226
// DecreaseTargetSize decreases the target size of the node group. This function

cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ func (m *gceManagerMock) GetMigTemplateNode(mig Mig) (*apiv1.Node, error) {
9292
return args.Get(0).(*apiv1.Node), args.Error(1)
9393
}
9494

95+
func (m *gceManagerMock) CreateInstances(mig Mig, delta int64) error {
96+
args := m.Called(mig, delta)
97+
return args.Error(0)
98+
}
99+
95100
func (m *gceManagerMock) getCpuAndMemoryForMachineType(machineType string, zone string) (cpu int64, mem int64, err error) {
96101
args := m.Called(machineType, zone)
97102
return args.Get(0).(int64), args.Get(1).(int64), args.Error(2)
@@ -266,7 +271,7 @@ func TestMig(t *testing.T) {
266271

267272
// Test IncreaseSize.
268273
gceManagerMock.On("GetMigSize", mock.AnythingOfType("*gce.gceMig")).Return(int64(2), nil).Once()
269-
gceManagerMock.On("SetMigSize", mock.AnythingOfType("*gce.gceMig"), int64(3)).Return(nil).Once()
274+
gceManagerMock.On("CreateInstances", mock.AnythingOfType("*gce.gceMig"), int64(1)).Return(nil).Once()
270275
err = mig1.IncreaseSize(1)
271276
assert.NoError(t, err)
272277
mock.AssertExpectationsForObjects(t, gceManagerMock)

cluster-autoscaler/cloudprovider/gce/gce_manager.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ type GceManager interface {
9595
SetMigSize(mig Mig, size int64) error
9696
// DeleteInstances deletes the given instances. All instances must be controlled by the same MIG.
9797
DeleteInstances(instances []GceRef) error
98+
// CreateInstances creates delta new instances in a given mig.
99+
CreateInstances(mig Mig, delta int64) error
98100
}
99101

100102
type gceManagerImpl struct {
@@ -289,6 +291,22 @@ func (m *gceManagerImpl) Refresh() error {
289291
return m.forceRefresh()
290292
}
291293

294+
func (m *gceManagerImpl) CreateInstances(mig Mig, delta int64) error {
295+
if delta == 0 {
296+
return nil
297+
}
298+
instances, err := m.GetMigNodes(mig)
299+
if err != nil {
300+
return err
301+
}
302+
instancesNames := make([]string, 0, len(instances))
303+
for _, ins := range instances {
304+
instancesNames = append(instancesNames, ins.Id)
305+
}
306+
m.cache.InvalidateMigTargetSize(mig.GceRef())
307+
return m.GceService.CreateInstances(mig.GceRef(), delta, instancesNames)
308+
}
309+
292310
func (m *gceManagerImpl) forceRefresh() error {
293311
m.clearMachinesCache()
294312
if err := m.fetchAutoMigs(); err != nil {

cluster-autoscaler/cloudprovider/gce/gce_manager_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,3 +1530,49 @@ func TestParseMIGAutoDiscoverySpecs(t *testing.T) {
15301530
})
15311531
}
15321532
}
1533+
1534+
const createInstancesResponse = `{
1535+
"kind": "compute#operation",
1536+
"id": "2890052495600280364",
1537+
"name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
1538+
"zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
1539+
"operationType": "compute.instanceGroupManagers.createInstances",
1540+
"targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
1541+
"targetId": "7836594831806456968",
1542+
"status": "DONE",
1543+
"user": "user@example.com",
1544+
"progress": 100,
1545+
"insertTime": "2021-06-22T05:55:31.903-07:00",
1546+
"startTime": "2021-06-22T05:55:31.907-07:00",
1547+
"selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
1548+
}`
1549+
1550+
const createInstancesOperationResponse = `{
1551+
"kind": "compute#operation",
1552+
"id": "2890052495600280364",
1553+
"name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
1554+
"zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
1555+
"operationType": "compute.instanceGroupManagers.createInstances",
1556+
"targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
1557+
"targetId": "7836594831806456968",
1558+
"status": "DONE",
1559+
"user": "user@example.com",
1560+
"progress": 100,
1561+
"insertTime": "2021-06-22T05:55:31.903-07:00",
1562+
"startTime": "2021-06-22T05:55:31.907-07:00",
1563+
"selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
1564+
}`
1565+
1566+
func TestAppendInstances(t *testing.T) {
1567+
server := NewHttpServerMock()
1568+
defer server.Close()
1569+
g := newTestGceManager(t, server.URL, false)
1570+
1571+
defaultPoolMig := setupTestDefaultPool(g, true)
1572+
server.On("handle", "/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool/listManagedInstances").Return(buildFourRunningInstancesOnDefaultMigManagedInstancesResponse(zoneB)).Once()
1573+
server.On("handle", fmt.Sprintf("/project1/zones/us-central1-b/instanceGroupManagers/%v/createInstances", defaultPoolMig.gceRef.Name)).Return(createInstancesResponse).Once()
1574+
server.On("handle", "/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32").Return(createInstancesOperationResponse).Once()
1575+
err := g.CreateInstances(defaultPoolMig, 2)
1576+
assert.NoError(t, err)
1577+
mock.AssertExpectationsForObjects(t, server)
1578+
}

cluster-autoscaler/cloudprovider/hetzner/README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,17 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
55
# Configuration
66

77
`HCLOUD_TOKEN` Required Hetzner Cloud token.
8+
89
`HCLOUD_CLOUD_INIT` Base64 encoded Cloud Init yaml with commands to join the cluster, Sample [examples/cloud-init.txt for (Kubernetes 1.20.1)](examples/cloud-init.txt)
9-
`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images
10+
11+
`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.
12+
1013
`HCLOUD_NETWORK` Default empty , The name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks
14+
1115
`HCLOUD_SSH_KEY` Default empty , This SSH Key will have access to the fresh created server, @see https://docs.hetzner.cloud/#ssh-keys
16+
1217
Node groups must be defined with the `--nodes=<min-servers>:<max-servers>:<instance-type>:<region>:<name>` flag.
18+
1319
Multiple flags will create multiple node pools. For example:
1420
```
1521
--nodes=1:10:CPX51:FSN1:pool1

cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ type hetznerManager struct {
3939
nodeGroups map[string]*hetznerNodeGroup
4040
apiCallContext context.Context
4141
cloudInit string
42-
image string
42+
image *hcloud.Image
4343
sshKey *hcloud.SSHKey
4444
network *hcloud.Network
4545
}
@@ -55,18 +55,43 @@ func newManager() (*hetznerManager, error) {
5555
return nil, errors.New("`HCLOUD_CLOUD_INIT` is not specified")
5656
}
5757

58-
image := os.Getenv("HCLOUD_IMAGE")
59-
if image == "" {
60-
image = "ubuntu-20.04"
61-
}
62-
6358
client := hcloud.NewClient(hcloud.WithToken(token))
6459
ctx := context.Background()
6560
cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64)
6661
if err != nil {
6762
return nil, fmt.Errorf("failed to parse cloud init error: %s", err)
6863
}
6964

65+
imageName := os.Getenv("HCLOUD_IMAGE")
66+
if imageName == "" {
67+
imageName = "ubuntu-20.04"
68+
}
69+
70+
// Search for an image ID corresponding to the supplied HCLOUD_IMAGE env
71+
// variable. This value can either be an image ID itself (an int), a name
72+
// (e.g. "ubuntu-20.04"), or a label selector associated with an image
73+
// snapshot. In the latter case it will use the most recent snapshot.
74+
image, _, err := client.Image.Get(ctx, imageName)
75+
if err != nil {
76+
return nil, fmt.Errorf("unable to find image %s: %v", imageName, err)
77+
}
78+
if image == nil {
79+
images, err := client.Image.AllWithOpts(ctx, hcloud.ImageListOpts{
80+
Type: []hcloud.ImageType{hcloud.ImageTypeSnapshot},
81+
Status: []hcloud.ImageStatus{hcloud.ImageStatusAvailable},
82+
Sort: []string{"created:desc"},
83+
ListOpts: hcloud.ListOpts{
84+
LabelSelector: imageName,
85+
},
86+
})
87+
88+
if err != nil || len(images) == 0 {
89+
return nil, fmt.Errorf("unable to find image %s: %v", imageName, err)
90+
}
91+
92+
image = images[0]
93+
}
94+
7095
var network *hcloud.Network
7196
networkName := os.Getenv("HCLOUD_NETWORK")
7297

0 commit comments

Comments
 (0)