From 65ebd8db83a03aa75497f0b6f62497b87934e0d9 Mon Sep 17 00:00:00 2001 From: "jesse.millan" Date: Mon, 2 Aug 2021 16:46:52 -0700 Subject: [PATCH] OCI provider based on instance-pools and instance-configurations. --- cluster-autoscaler/README.md | 2 + .../cloudprovider/builder/builder_all.go | 8 +- .../cloudprovider/builder/builder_oci.go | 43 ++ .../cloudprovider/cloud_provider.go | 2 + cluster-autoscaler/cloudprovider/oci/OWNERS | 5 + .../cloudprovider/oci/README.md | 220 ++++++++ .../oci/examples/instance-details.json | 19 + .../oci-ip-cluster-autoscaler-w-config.yaml | 174 ++++++ ...ci-ip-cluster-autoscaler-w-principals.yaml | 163 ++++++ .../oci/examples/placement-config.json | 6 + .../cloudprovider/oci/oci_cloud_provider.go | 175 ++++++ .../cloudprovider/oci/oci_instance_pool.go | 234 ++++++++ .../oci/oci_instance_pool_cache.go | 363 +++++++++++++ .../oci/oci_instance_pool_manager.go | 513 ++++++++++++++++++ .../oci/oci_instance_pool_manager_test.go | 511 +++++++++++++++++ .../cloudprovider/oci/oci_ref.go | 66 +++ .../cloudprovider/oci/oci_shape.go | 136 +++++ .../cloudprovider/oci/oci_shape_test.go | 237 ++++++++ .../cloudprovider/oci/oci_util.go | 211 +++++++ .../cloudprovider/oci/oci_util_test.go | 12 + hack/boilerplate/boilerplate.py | 3 +- hack/verify-spelling.sh | 2 +- 22 files changed, 3101 insertions(+), 4 deletions(-) create mode 100644 cluster-autoscaler/cloudprovider/builder/builder_oci.go create mode 100644 cluster-autoscaler/cloudprovider/oci/OWNERS create mode 100644 cluster-autoscaler/cloudprovider/oci/README.md create mode 100644 cluster-autoscaler/cloudprovider/oci/examples/instance-details.json create mode 100644 cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml create mode 100644 cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml create mode 100644 cluster-autoscaler/cloudprovider/oci/examples/placement-config.json create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_ref.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_shape.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_shape_test.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_util.go create mode 100644 cluster-autoscaler/cloudprovider/oci/oci_util_test.go diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md index 0f20bc6a4560..5d9148b34c25 100644 --- a/cluster-autoscaler/README.md +++ b/cluster-autoscaler/README.md @@ -25,6 +25,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p * [IonosCloud](./cloudprovider/ionoscloud/README.md) * [OVHcloud](./cloudprovider/ovhcloud/README.md) * [Linode](./cloudprovider/linode/README.md) +* [OracleCloud](./cloudprovider/oci/README.md) * [ClusterAPI](./cloudprovider/clusterapi/README.md) * [BizflyCloud](./cloudprovider/bizflycloud/README.md) @@ -165,5 +166,6 @@ Supported cloud providers: * Equinix Metal https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md * OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md * Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md +* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md * Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md * Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go index 190f58ec344d..107d1fe06379 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_all.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go @@ -1,5 +1,5 @@ -//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet -// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet +//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet && !oci +// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!oci /* Copyright 2018 The Kubernetes Authors. @@ -37,6 +37,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ionoscloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/linode" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/packet" "k8s.io/autoscaler/cluster-autoscaler/config" @@ -55,6 +56,7 @@ var AvailableCloudProviders = []string{ cloudprovider.ExoscaleProviderName, cloudprovider.HuaweicloudProviderName, cloudprovider.HetznerProviderName, + cloudprovider.OracleCloudProviderName, cloudprovider.OVHcloudProviderName, cloudprovider.ClusterAPIProviderName, cloudprovider.IonoscloudProviderName, @@ -105,6 +107,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro return ionoscloud.BuildIonosCloud(opts, do, rl) case cloudprovider.LinodeProviderName: return linode.BuildLinode(opts, do, rl) + case cloudprovider.OracleCloudProviderName: + return oci.BuildOCI(opts, do, rl) } return nil } diff --git a/cluster-autoscaler/cloudprovider/builder/builder_oci.go b/cluster-autoscaler/cloudprovider/builder/builder_oci.go new file mode 100644 index 000000000000..55756f147d12 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/builder/builder_oci.go @@ -0,0 +1,43 @@ +//go:build oci +// +build oci + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci" + "k8s.io/autoscaler/cluster-autoscaler/config" +) + +// AvailableCloudProviders supported by the cloud provider builder. +var AvailableCloudProviders = []string{ + cloudprovider.OracleCloudProviderName, +} + +// DefaultCloudProvider for oci-only build is oci. +const DefaultCloudProvider = cloudprovider.OracleCloudProviderName + +func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + switch opts.CloudProviderName { + case cloudprovider.OracleCloudProviderName: + return oci.BuildOCI(opts, do, rl) + } + + return nil +} diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index b2d1284062ce..39ef7360d66a 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -60,6 +60,8 @@ const ( HuaweicloudProviderName = "huaweicloud" // IonoscloudProviderName gets the provider name of ionoscloud IonoscloudProviderName = "ionoscloud" + // OracleCloudProviderName gets the provider name of oci + OracleCloudProviderName = "oci" // OVHcloudProviderName gets the provider name of ovhcloud OVHcloudProviderName = "ovhcloud" // LinodeProviderName gets the provider name of linode diff --git a/cluster-autoscaler/cloudprovider/oci/OWNERS b/cluster-autoscaler/cloudprovider/oci/OWNERS new file mode 100644 index 000000000000..62122c4ebdbf --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/OWNERS @@ -0,0 +1,5 @@ +approvers: +#- jlamillan +reviewers: +#- jlamillan +#- ericrrath diff --git a/cluster-autoscaler/cloudprovider/oci/README.md b/cluster-autoscaler/cloudprovider/oci/README.md new file mode 100644 index 000000000000..8ef1e04d2b9d --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/README.md @@ -0,0 +1,220 @@ +# Cluster Autoscaler for Oracle Cloud Infrastructure (OCI) + +On OCI, the cluster-autoscaler utilizes [Instance Pools](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstancepool.htm) +combined with [Instance Configurations](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm) to +automatically resize a cluster's nodes based on application workload demands by: + +- adding nodes to static instance-pool(s) when a pod cannot be scheduled in the cluster because of insufficient resource constraints. +- removing nodes from an instance-pool(s) when the nodes have been underutilized for an extended time, and when pods can be placed on other existing nodes. + +The cluster-autoscaler works on a per-instance pool basis. You configure the cluster-autoscaler to tell it which instance pools to target +for expansion and contraction, the minimum and maximum sizes for each pool, and how you want the autoscaling to take place. +Instance pools not referenced in the configuration file are not managed by the cluster-autoscaler. + +## Create Required OCI Resources + +### IAM Policy (if using Instance Principals) + +We recommend setting up and configuring the cluster-autoscaler to use +[Instance Principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm) +to authenticate to the OCI APIs. + +The following policy provides the minimum privileges necessary for Cluster Autoscaler to run: + +1: Create a compartment-level dynamic group containing the nodes (compute instances) in the cluster: + +``` +All {instance.compartment.id = 'ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf'} +``` + +2: Create a *tenancy-level* policy to allow nodes to manage instance-pools: + +``` +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-pools in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-configurations in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-family in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use subnets in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to read virtual-network-family in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use vnics in compartment +Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to inspect compartments in compartment +``` + +### Instance Pool and Instance Configurations + +Before you deploy the cluster auto-scaler on OCI, your need to create one or more static Instance Pools and Instance +Configuration with `cloud-init` specified in the launch details so new nodes automatically joins the existing cluster on +start up. + +Advanced Instance Pool and Instance Configuration configuration is out of scope for this document. However, a +working [instance-details.json](./examples/instance-details.json) and [placement-config.json](./examples/placement-config.json) +([example](./examples/instance-details.json) based on Rancher [RKE](https://rancher.com/products/rke/)) using [cloud-init](https://cloudinit.readthedocs.io/en/latest/) are +included in the examples, which can be applied using the [OCI CLI](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm). + +Modify the `user_data` in the example [instance-details.json](./examples/instance-details.json) to suit your needs, re-base64 encode, apply: + +```bash +# e.g. cloud-init. Modify, re-encode, and update user_data in instance-details.json to suit your needs: + +$ echo IyEvYmluL2Jhc2gKdG91hci9saWIvYXB0L....1yZXRyeSAzIGhG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg== | base64 -D + +#!/bin/bash +groupadd docker +usermod -aG docker ubuntu +curl --retry 3 https://releases.rancher.com/install-docker/20.10.sh | sh +docker run -d --privileged --restart=unless-stopped --net=host -v /etc/kubernetes:/etc/kubernetes -v /var/run:/var/run rancher/rancher-agent:v2.5.5 --server https://my-rancher.com --token xxxxxx --worker +``` + +```bash +$ oci compute-management instance-configuration create --instance-details file://./cluster-autoscaler/cloudprovider/oci/examples/instance-details.json --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --query 'data.id' --raw-output + +ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq + +$ oci compute-management instance-pool create --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --instance-configuration-id ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq --placement-configurations file://./cluster-autoscaler/cloudprovider/oci/examples/placement-config.json --size 0 --wait-for-state RUNNING --query 'data.id' --raw-output + +Action completed. Waiting until the resource has entered state: ('RUNNING',) +ocid1.instancepool.oc1.phx.aaaaaaaayd5bxwrzomzr2b2enchm4mof7uhw7do5hc2afkhks576syikk2ca +``` + +## Configure Autoscaler + +Use the `--nodes=::` parameter to specify which pre-existing instance +pools to target for automatic expansion and contraction, the minimum and maximum sizes for each node pool, and how you +want the autoscaling to take place. Instance pools not referenced in the configuration file are not managed by the +autoscaler where: + +- `` is the minimum number of nodes allowed in the instance-pool. +- `` is the maximum number of nodes allowed in the instance-pool. Make sure the maximum number of nodes you specify does not exceed the tenancy limits for the node shape defined for the node pool. +- `` is the OCIDs of a pre-existing instance-pool. + +If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct +value in the deployment e.g.: + +```yaml +env: + - name: OCI_REGION + value: "us-phoenix-1" +``` + +### Optional cloud-config file + +_Optional_ cloud-config file mounted in the path specified by `--cloud-config`. + +An example, of passing optional configuration via `cloud-config` file that uses configures the cluster-autoscaler to use +instance-principals authenticating via instance principalsand only see configured instance-pools in a single compartment: + +```ini +[Global] +compartment-id = ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf +region = uk-london-1 +use-instance-principals = true +``` + +### Environment variables + +Configuration via environment-variables: + +- `OCI_USE_INSTANCE_PRINCIPAL` - Whether to use Instance Principals for authentication rather than expecting an OCI config file to be mounted in the container. Defaults to false. +- `OCI_REGION` - **Required** when using Instance Principals. e.g. `OCI_REGION=us-phoenix-1`. See [region list](https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm) for identifiers. +- `OCI_COMPARTMENT_ID` - Restrict the cluster-autoscaler to instance-pools in a single compartment. When unset, the cluster-autoscaler will manage each specified instance-pool no matter which compartment they are in. +- `OCI_REFRESH_INTERVAL` - Optional refresh interval to sync internal cache with OCI API defaults to `2m`. + +## Deployment + +### Create OCI config secret (only if _not_ using Instance Principals) + +If you are opting for a file based OCI configuration (as opposed to instance principals), the OCI config file and private key need to be mounted into the container filesystem using a secret volume. + +The following policy is required when the specified is not an administrator to run the cluster-autoscaler: + +``` +Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-pools in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-configurations in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-family in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to use subnets in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to read virtual-network-family in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to use vnics in compartment +Allow group acme-oci-cluster-autoscaler-user-grp to inspect compartments in compartment +``` + +Example OCI config file (note `key_file` is the expected path and filename of the OCI API private-key from the perspective of the container): + +```bash +$ cat ~/.oci/config + +[DEFAULT] +user=ocid1.user.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +fingerprint=xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx +key_file=/root/.oci/api_key.pem +tenancy=ocid1.tenancy.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +pass_phrase= +region=us-phoenix-1 +``` + +Create the secret (`api_key.pem` key name is required): + +```bash +kubectl create secret generic oci-config -n kube-system --from-file=/Users/me/.oci/config --from-file=api_key.pem=/Users/me/.oci/my_api_key.pem +``` + +### Example Deployment + +Two example deployments of the cluster-autoscaler that manage instancepools are located in the [examples](./examples/) directory. +[oci-ip-cluster-autoscaler-w-principals.yaml](./examples/oci-ip-cluster-autoscaler-w-principals.yaml) uses +instance principals, and [oci-ip-cluster-autoscaler-w-config.yaml](./examples/oci-ip-cluster-autoscaler-w-config.yaml) uses file +based authentication. + +Note the 3 specified instance-pools are intended to correspond to different availability domains in the Phoenix, AZ region: + +```yaml +... + containers: + - image: docker.io/jlamillan/autoscaler:oci-pr-rc6 + name: cluster-autoscaler + command: + - ./cluster-autoscaler + - --cloud-provider=oci + - --nodes=1:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua + - --nodes=0:20:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia +``` + +Instance principal based authentication deployment: + +Substitute the OCIDs of _your_ instance pool(s) and set the `OCI_REGION` environment variable to the region where your +instance pool(s) reside before applying the deployment: + +``` +kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml +``` + +OCI config file based authentication deployment: + +``` +kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml +``` + +## Common Notes and Gotchas: +- You must configure the instance configuration of new compute instances to join the existing cluster when they start. This can + be accomplished with `cloud-init` / `user-data` in the instance launch configuration [example](./examples/instance-details.json). +- If opting for a file based OCI configuration (as opposed to instance principals), ensure the OCI config and private-key + PEM files are mounted into the container filesystem at the [expected path](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm). Note the `key_file` option in the example `~/.oci/config` above references a private-key file mounted into container by the example [volumeMount](./examples/oci-ip-cluster-autoscaler-w-config.yaml#L165) +- Make sure the maximum number of nodes you specify does not exceed the limit for the instance-pool or the tenancy. +- We recommend creating multiple instance-pools with one availability domain specified so new nodes can be created to meet + affinity requirements across availability domains. +- If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct + value in the deployment. +- The cluster-autoscaler will not automatically remove scaled down (terminated) `Node` objects from the Kubernetes API + without assistance from the [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager) (CCM). + If scaled down nodes are lingering in your cluster in the `NotReady` status, ensure the OCI CCM is installed and running + correctly (`oci-cloud-controller-manager`). +- Avoid manually changing node pools that are managed by the cluster-autoscaler. For example, do not add or remove nodes + using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API). +- `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported. +- We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled instance-pool. + +## Helpful links +- [Oracle Cloud Infrastructure home](https://cloud.oracle.com) +- [OCI instance configuration documentation](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm) +- [instance principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm) +- [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager) +- [OCI Container Storage Interface driver](https://github.com/oracle/oci-cloud-controller-manager/blob/master/container-storage-interface.md) diff --git a/cluster-autoscaler/cloudprovider/oci/examples/instance-details.json b/cluster-autoscaler/cloudprovider/oci/examples/instance-details.json new file mode 100644 index 000000000000..fc4f25c93dc5 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/examples/instance-details.json @@ -0,0 +1,19 @@ +{ + "instanceType": "compute", + "launchDetails": { + "compartmentId": "ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf", + "shape": "VM.Standard2.8", + "sourceDetails": + { + "imageId": "ocid1.image.oc1.phx.aaaaaaaa55tzajot4gbiw2p7gquwjnvfzrasosbrq4h6wywkff4zjosp2fia", + "sourceType": "image", + "bootVolumeSizeInGBs": 100 + }, + "metadata": { + "user_data": "IyEvYmluL2Jhc2gKdG91Y2ggL3RtcC9jbG91ZC1pbml0LXN0YXJ0ZWQKaXB0YWJsZXMgLUYKZ3JvdXBhZGQgZG9ja2VyCnVzZXJtb2QgLWFHIGRvY2tlciB1YnVudHUKcm0gL3Zhci9saWIvYXB0L2xpc3RzL2xvY2sKcGtpbGwgLTkgLWYgYXB0CmN1cmwgLS1yZXRyeSAzIGh0dHBzOi8vcmVsZWFzZXMucmFuY2hlci5jb20vaW5zdGFsbC1kb2NrZXIvMjAuMTAuc2ggfCBzaApkb2NrZXIgcnVuIC1kIC0tcHJpdmlsZWdlZCAtLXJlc3RhcnQ9dW5sZXNzLXN0b3BwZWQgLS1uZXQ9aG9zdCAtdiAvZXRjL2t1YmVybmV0ZXM6L2V0Yy9rdWJlcm5ldGVzIC12IC92YXIvcnVuOi92YXIvcnVuIHJhbmNoZXIvcmFuY2hlci1hZ2VudDp2Mi41LjUgLS1zZXJ2ZXIgaHR0cHM6Ly9teS1yYW5jaGVyLmNvbSAtLXRva2VuIHh4eHh4eCAgLS13b3JrZXIKdG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg==" + }, + "createVnicDetails": { + "assignPublicIp": true + } + } +} \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml new file mode 100644 index 000000000000..c7430a6ae82b --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml @@ -0,0 +1,174 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: ["namepaces"] + verbs: ["list"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["csidrivers", "csistoragecapacities"] + verbs: ["get", "list"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: + - "cluster-autoscaler-status" + - "cluster-autoscaler-priority-expander" + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: docker.io/jlamillan/autoscaler:oci-pr-rc6 + name: cluster-autoscaler + command: + - ./cluster-autoscaler + - --v=5 + - --logtostderr=true + - --cloud-provider=oci + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia + - --scale-down-delay-after-add=1m + - --scale-down-unneeded-time=1m + - --namespace=kube-system + imagePullPolicy: "Always" + env: + - name: OCI_USE_INSTANCE_PRINCIPAL + value: "false" + volumeMounts: + - name: oci-config-vol + mountPath: "/root/.oci" + readOnly: true + volumes: + - name: oci-config-vol + secret: + secretName: oci-config + items: + - key: config + path: config + - key: api_key.pem + path: api_key.pem diff --git a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml new file mode 100644 index 000000000000..621e4dea3f59 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml @@ -0,0 +1,163 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["list"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["csidrivers", "csistoragecapacities"] + verbs: ["get", "list"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: + - "cluster-autoscaler-status" + - "cluster-autoscaler-priority-expander" + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: docker.io/jlamillan/autoscaler:oci-pr-rc6 + name: cluster-autoscaler + command: + - ./cluster-autoscaler + - --v=5 + - --logtostderr=true + - --cloud-provider=oci + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q + - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua + - --nodes=1:10:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia + - --scale-down-delay-after-add=10m + - --scale-down-unneeded-time=10m + - --namespace=kube-system + imagePullPolicy: "Always" + env: + - name: OCI_USE_INSTANCE_PRINCIPAL + value: "true" + - name: OCI_REGION + value: "us-phoenix-1" diff --git a/cluster-autoscaler/cloudprovider/oci/examples/placement-config.json b/cluster-autoscaler/cloudprovider/oci/examples/placement-config.json new file mode 100644 index 000000000000..b253ce4e27a8 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/examples/placement-config.json @@ -0,0 +1,6 @@ +[ + { + "availabilityDomain": "hXgQ:PHX-AD-2", + "primarySubnetId": "ocid1.subnet.oc1.phx.aaaaaaaaouihv645dp2xaee6w4uvx6emjwuscsrxcn3miwa6vmijtpdnqdeq" + } +] \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go new file mode 100644 index 000000000000..4658b7a5fc53 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go @@ -0,0 +1,175 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "github.com/pkg/errors" + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" + "time" +) + +const ( + ociUseInstancePrincipalEnvVar = "OCI_USE_INSTANCE_PRINCIPAL" + ociCompartmentEnvVar = "OCI_COMPARTMENT_ID" + ociRegionEnvVar = "OCI_REGION" + ociRefreshInterval = "OCI_REFRESH_INTERVAL" + ociAnnotationCompartmentID = "oci.oraclecloud.com/compartment-id" + // ResourceGPU is the GPU resource type + ResourceGPU apiv1.ResourceName = "nvidia.com/gpu" + defaultRefreshInterval = 5 * time.Minute +) + +// OciCloudProvider implements the CloudProvider interface for OCI. It contains an +// instance pool manager to interact with OCI instance pools. +type OciCloudProvider struct { + rl *cloudprovider.ResourceLimiter + poolManager InstancePoolManager +} + +// CloudConfig holds the cloud config for OCI provider. +type CloudConfig struct { + Global struct { + RefreshInterval time.Duration `gcfg:"refresh-interval"` + CompartmentID string `gcfg:"compartment-id"` + Region string `gcfg:"region"` + UseInstancePrinciples bool `gcfg:"use-instance-principals"` + } +} + +// Name returns name of the cloud provider. +func (ocp *OciCloudProvider) Name() string { + return cloudprovider.OracleCloudProviderName +} + +// NodeGroups returns all node groups configured for this cloud provider. +func (ocp *OciCloudProvider) NodeGroups() []cloudprovider.NodeGroup { + nodePools := ocp.poolManager.GetInstancePools() + result := make([]cloudprovider.NodeGroup, 0, len(nodePools)) + for _, nodePool := range nodePools { + result = append(result, nodePool) + } + return result +} + +// NodeGroupForNode returns the node group for the given node, nil if the node +// should not be processed by cluster autoscaler, or non-nil error if such +// occurred. Must be implemented. +func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.NodeGroup, error) { + + ociRef, err := nodeToOciRef(n) + if err != nil { + return nil, err + } + + ng, err := ocp.poolManager.GetInstancePoolForInstance(ociRef) + + // this instance may not be a part of an instance pool, or it may be part of a instance pool that the autoscaler does not manage + if errors.Cause(err) == errInstanceInstancePoolNotFound { + // should not be processed by cluster autoscaler + return nil, nil + } + + return ng, err +} + +// Pricing returns pricing model for this cloud provider or error if not available. +// Implementation optional. +func (ocp *OciCloudProvider) Pricing() (cloudprovider.PricingModel, caerrors.AutoscalerError) { + klog.Info("Pricing called") + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes getInstancePool all machine types that can be requested from the cloud provider. +// Implementation optional. +func (ocp *OciCloudProvider) GetAvailableMachineTypes() ([]string, error) { + klog.Info("GetAvailableMachineTypes called") + return nil, cloudprovider.ErrNotImplemented +} + +// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically +// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. +// Implementation optional. +func (ocp *OciCloudProvider) NewNodeGroup(machineType string, + labels map[string]string, + systemLabels map[string]string, + taints []apiv1.Taint, + extraResources map[string]resource.Quantity, +) (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.). +func (ocp *OciCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + return ocp.rl, nil +} + +// GPULabel returns the label added to nodes with GPU resource. +func (ocp *OciCloudProvider) GPULabel() string { + // No labels, only taint: nvidia.com/gpu:NoSchedule + return "" +} + +// GetAvailableGPUTypes return all available GPU types cloud provider supports. +func (ocp *OciCloudProvider) GetAvailableGPUTypes() map[string]struct{} { + return map[string]struct{}{} +} + +// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc. +func (ocp *OciCloudProvider) Cleanup() error { + return ocp.poolManager.Cleanup() +} + +// Refresh is called before every main loop and can be used to dynamically update cloud provider state. +// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh(). +func (ocp *OciCloudProvider) Refresh() error { + return ocp.poolManager.Refresh() +} + +// BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager). +func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) *OciCloudProvider { + + ipManager, err := CreateInstancePoolManager(opts.CloudConfig, do, createKubeClient(opts)) + if err != nil { + klog.Fatalf("Could not create OCI cloud provider: %v", err) + } + return &OciCloudProvider{ + poolManager: ipManager, + rl: rl, + } +} + +func getKubeConfig(opts config.AutoscalingOptions) *rest.Config { + klog.V(1).Infof("Using kubeconfig file: %s", opts.KubeConfigPath) + kubeConfig, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfigPath) + if err != nil { + klog.Fatalf("Failed to build kubeConfig: %v", err) + } + + return kubeConfig +} + +func createKubeClient(opts config.AutoscalingOptions) kubernetes.Interface { + return kubernetes.NewForConfigOrDie(getKubeConfig(opts)) +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go new file mode 100644 index 000000000000..18423804a107 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go @@ -0,0 +1,234 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "fmt" + "github.com/pkg/errors" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + ociInstanceIDAnnotation = "oci.oraclecloud.com/instance-id" + ociInstancePoolIDAnnotation = "oci.oraclecloud.com/instancepool-id" + ociInstancePoolResourceIdent = "instancepool" +) + +// InstancePoolNodeGroup implements the NodeGroup interface using OCI instance pools. +type InstancePoolNodeGroup struct { + manager InstancePoolManager + kubeClient kubernetes.Interface + id string + minSize int + maxSize int +} + +// MaxSize returns maximum size of the instance-pool based node group. +func (ip *InstancePoolNodeGroup) MaxSize() int { + return ip.maxSize +} + +// MinSize returns minimum size of the instance-pool based node group. +func (ip *InstancePoolNodeGroup) MinSize() int { + return ip.minSize +} + +// TargetSize returns the current target size of the instance-pool based node group. It is possible that the +// number of nodes in Kubernetes is different at the moment but should be equal +// to Size() once everything stabilizes (new nodes finish startup and registration or +// removed nodes are deleted completely). Implementation required. +func (ip *InstancePoolNodeGroup) TargetSize() (int, error) { + return ip.manager.GetInstancePoolSize(*ip) +} + +// IncreaseSize increases the size of the instance-pool based node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// instance-pool size is updated. Implementation required. +func (ip *InstancePoolNodeGroup) IncreaseSize(delta int) error { + if delta <= 0 { + return fmt.Errorf("size increase must be positive") + } + + size, err := ip.manager.GetInstancePoolSize(*ip) + if err != nil { + return err + } + + if size+delta > ip.MaxSize() { + return fmt.Errorf("size increase too large - desired:%d max:%d", size+delta, ip.MaxSize()) + } + + return ip.manager.SetInstancePoolSize(*ip, size+delta) +} + +// DeleteNodes deletes nodes from this instance-pool. Error is returned either on +// failure or if the given node doesn't belong to this instance-pool. This function +// should wait until instance-pool size is updated. Implementation required. +func (ip *InstancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + + // FYI, unregistered nodes come in as the provider id as node name. + + klog.Infof("DeleteNodes called with %d node(s)", len(nodes)) + + size, err := ip.manager.GetInstancePoolSize(*ip) + if err != nil { + return err + } + + if size <= ip.MinSize() { + return fmt.Errorf("min size reached, nodes will not be deleted") + } + + refs := make([]OciRef, 0, len(nodes)) + for _, node := range nodes { + belongs, err := ip.Belongs(node) + if err != nil { + return err + } + if !belongs { + return fmt.Errorf("%s belong to a different instance-pool than %s", node.Name, ip.Id()) + } + ociRef, err := nodeToOciRef(node) + if err != nil { + return err + } + + refs = append(refs, ociRef) + } + + return ip.manager.DeleteInstances(*ip, refs) +} + +// DecreaseTargetSize decreases the target size of the instance-pool based node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. Implementation required. +func (ip *InstancePoolNodeGroup) DecreaseTargetSize(delta int) error { + if delta >= 0 { + return fmt.Errorf("size decrease must be negative") + } + + size, err := ip.manager.GetInstancePoolSize(*ip) + if err != nil { + return err + } + + nodes, err := ip.manager.GetInstancePoolNodes(*ip) + if err != nil { + return err + } + + if size+delta < len(nodes) { + return fmt.Errorf("attempt to delete existing nodes targetSize:%d delta:%d existingNodes: %d", + size, delta, len(nodes)) + } + + return ip.manager.SetInstancePoolSize(*ip, size+delta) +} + +// Belongs returns true if the given node belongs to the InstancePoolNodeGroup. +func (ip *InstancePoolNodeGroup) Belongs(node *apiv1.Node) (bool, error) { + ref, err := nodeToOciRef(node) + if err != nil { + return false, err + } + + targetInstancePool, err := ip.manager.GetInstancePoolForInstance(ref) + if err != nil { + return false, err + } + + if targetInstancePool == nil { + return false, fmt.Errorf("%s doesn't belong to a known instance-pool", node.Name) + } + + return targetInstancePool.Id() == ip.Id(), nil +} + +// Id returns an unique identifier of the instance-pool based node group. +func (ip *InstancePoolNodeGroup) Id() string { + return ip.id +} + +// Debug returns a string containing all information regarding this instance-pool. +func (ip *InstancePoolNodeGroup) Debug() string { + return fmt.Sprintf("%s (%d:%d)", ip.Id(), ip.MinSize(), ip.MaxSize()) +} + +// Nodes returns a list of all nodes that belong to this instance-pool. +// It is required that Instance objects returned by this method have Id field set. +// Other fields are optional. +// This list should include also instances that might have not become a kubernetes node yet. +func (ip *InstancePoolNodeGroup) Nodes() ([]cloudprovider.Instance, error) { + return ip.manager.GetInstancePoolNodes(*ip) +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a instance-pool was expanded. The returned +// NodeInfo is expected to have a fully populated Node object, with all of the labels, +// capacity and allocatable information as well as all pods that are started on +// the node by default, using manifest (most likely only kube-proxy). Implementation optional. +func (ip *InstancePoolNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + node, err := ip.manager.GetInstancePoolTemplateNode(*ip) + if err != nil { + return nil, errors.Wrap(err, "unable to build node info template") + } + + nodeInfo := schedulerframework.NewNodeInfo( + cloudprovider.BuildKubeProxy(ip.id), + buildCSINodePod(), + ) + nodeInfo.SetNode(node) + return nodeInfo, nil +} + +// Exist checks if the instance-pool based node group really exists on the cloud provider side. Allows to tell the +// theoretical instance-pool from the real one. Implementation required. +func (ip *InstancePoolNodeGroup) Exist() bool { + return true +} + +// Create creates the instance-pool based node group on the cloud provider side. Implementation optional. +func (ip *InstancePoolNodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the instance-pool based node group on the cloud provider side. +// This will be executed only for autoprovisioned instance-pools, once their size drops to 0. +// Implementation optional. +func (ip *InstancePoolNodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// InstancePoolNodeGroup. Returning a nil will result in using default options. +func (ip *InstancePoolNodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the instance-pool based node group is autoprovisioned. An autoprovisioned group +// was created by CA and can be deleted when scaled to 0. +func (ip *InstancePoolNodeGroup) Autoprovisioned() bool { + return false +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go new file mode 100644 index 000000000000..75b0c2c45e8e --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go @@ -0,0 +1,363 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "context" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core" + "k8s.io/klog/v2" + "strings" + "sync" + "time" +) + +// ComputeMgmtClient wraps core.ComputeManagementClient exposing the functions we actually require. +type ComputeMgmtClient interface { + GetInstancePool(context.Context, core.GetInstancePoolRequest) (core.GetInstancePoolResponse, error) + UpdateInstancePool(context.Context, core.UpdateInstancePoolRequest) (core.UpdateInstancePoolResponse, error) + GetInstancePoolInstance(context.Context, core.GetInstancePoolInstanceRequest) (core.GetInstancePoolInstanceResponse, error) + ListInstancePoolInstances(context.Context, core.ListInstancePoolInstancesRequest) (core.ListInstancePoolInstancesResponse, error) + DetachInstancePoolInstance(context.Context, core.DetachInstancePoolInstanceRequest) (core.DetachInstancePoolInstanceResponse, error) +} + +// ComputeClient wraps core.ComputeClient exposing the functions we actually require. +type ComputeClient interface { + ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error) +} + +// VirtualNetworkClient wraps core.VirtualNetworkClient exposing the functions we actually require. +type VirtualNetworkClient interface { + GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error) +} + +type instancePoolCache struct { + mu sync.Mutex + poolCache map[string]*core.InstancePool + instanceSummaryCache map[string]*[]core.InstanceSummary + targetSize map[string]int + unownedInstances map[OciRef]bool + + computeManagementClient ComputeMgmtClient + computeClient ComputeClient + virtualNetworkClient VirtualNetworkClient +} + +func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient) *instancePoolCache { + return &instancePoolCache{ + poolCache: map[string]*core.InstancePool{}, + instanceSummaryCache: map[string]*[]core.InstanceSummary{}, + targetSize: map[string]int{}, + unownedInstances: map[OciRef]bool{}, + computeManagementClient: computeManagementClient, + computeClient: computeClient, + virtualNetworkClient: virtualNetworkClient, + } +} + +func (c *instancePoolCache) InstancePools() map[string]*core.InstancePool { + result := map[string]*core.InstancePool{} + for k, v := range c.poolCache { + result[k] = v + } + return result +} + +func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePoolNodeGroup, cfg CloudConfig) error { + // Since we only support static instance-pools we don't need to worry about pruning. + + for id := range staticInstancePools { + resp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{ + InstancePoolId: common.String(id), + }) + if err != nil { + klog.Errorf("get instance pool %s failed: %v", id, err) + return err + } + klog.V(5).Infof("GetInstancePool() response %v", resp.InstancePool) + + c.setInstancePool(&resp.InstancePool) + + // OCI instance-pools do not contain individual instance objects so they must be fetched separately. + listInstancesResponse, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{ + InstancePoolId: common.String(id), + CompartmentId: common.String(cfg.Global.CompartmentID), + }) + if err != nil { + return err + } + klog.V(5).Infof("ListInstancePoolInstances() response %v", listInstancesResponse.Items) + c.setInstanceSummaries(*resp.InstancePool.Id, &listInstancesResponse.Items) + } + // Reset unowned instances cache. + c.unownedInstances = make(map[OciRef]bool) + + return nil +} + +// removeInstance tries to remove the instance from the specified instance pool. If the instance isn't in the array, +// then it won't do anything removeInstance returns true if it actually removed the instance and reduced the size of +// the instance pool. +func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, instanceID string) bool { + + c.mu.Lock() + defer c.mu.Unlock() + + if instanceID == "" { + klog.Warning("instanceID is not set - skipping removal.") + return false + } + + // This instance pool must be in state RUNNING in order to detach a particular instance. + err := c.waitForInstancePoolState(context.Background(), instancePool.Id(), core.InstancePoolLifecycleStateRunning) + if err != nil { + return false + } + + _, err = c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{ + InstancePoolId: common.String(instancePool.Id()), + DetachInstancePoolInstanceDetails: core.DetachInstancePoolInstanceDetails{ + InstanceId: common.String(instanceID), + IsDecrementSize: common.Bool(true), + IsAutoTerminate: common.Bool(true), + }, + }) + + if err == nil { + // Decrease pool size in cache since IsDecrementSize was true + c.targetSize[instancePool.Id()] -= 1 + return true + } + + return false +} + +// findInstanceByDetails attempts to find the given instance by details by searching +// through the configured instance-pools (ListInstancePoolInstances) for a match. +func (c *instancePoolCache) findInstanceByDetails(ociInstance OciRef) (*OciRef, error) { + c.mu.Lock() + defer c.mu.Unlock() + + // Minimum amount of information we need to make a positive match + if ociInstance.InstanceID == "" && ociInstance.PrivateIPAddress == "" && ociInstance.PublicIPAddress == "" { + return nil, errors.New("instance id or an IP address is required to resolve details") + } + + if c.unownedInstances[ociInstance] { + // We already know this instance is not part of a configured pool. Return early and avoid additional API calls. + klog.V(4).Infof("Node " + ociInstance.Name + " is known to not be a member of any of the specified instance pool(s)") + return nil, errInstanceInstancePoolNotFound + } + + // Look for the instance in each of the specified pool(s) + for _, nextInstancePool := range c.poolCache { + // Skip searching instance pool if it's instance count is 0. + if *nextInstancePool.Size == 0 { + klog.V(4).Infof("skipping over instance pool %s since it is empty", *nextInstancePool.Id) + continue + } + // List instances in the next pool + listInstancePoolInstances, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{ + CompartmentId: common.String(ociInstance.CompartmentID), + InstancePoolId: nextInstancePool.Id, + }) + if err != nil { + return nil, err + } + + for _, poolMember := range listInstancePoolInstances.Items { + // Skip comparing this instance if it is not in the Running state + if strings.ToLower(*poolMember.State) != strings.ToLower(string(core.InstanceLifecycleStateRunning)) { + klog.V(4).Infof("skipping over instance %s: since it is not in the running state: %s", *poolMember.Id, *poolMember.State) + continue + } + listVnicAttachments, err := c.computeClient.ListVnicAttachments(context.Background(), core.ListVnicAttachmentsRequest{ + CompartmentId: common.String(*poolMember.CompartmentId), + InstanceId: poolMember.Id, + }) + if err != nil { + klog.Errorf("list vNIC attachments for %s failed: %v", *poolMember.Id, err) + return nil, err + } + klog.V(5).Infof("ListVnicAttachments() response for %s: %v", *poolMember.Id, listVnicAttachments.Items) + for _, vnicAttachment := range listVnicAttachments.Items { + // Skip this attachment if the vNIC is not live + if core.VnicAttachmentLifecycleStateAttached != vnicAttachment.LifecycleState { + klog.V(4).Infof("skipping vNIC on instance %s: since it is not active", *poolMember.Id) + continue + } + getVnicResp, err := c.virtualNetworkClient.GetVnic(context.Background(), core.GetVnicRequest{ + VnicId: vnicAttachment.VnicId, + }) + if err != nil { + klog.Errorf("get vNIC for %s failed: %v", *poolMember.Id, err) + return nil, err + } + klog.V(5).Infof("GetVnic() response for vNIC %s: %v", *vnicAttachment.Id, getVnicResp.Vnic) + // Preferably we match by instanceID, but we can match by private or public IP + if *poolMember.Id == ociInstance.InstanceID || + (getVnicResp.Vnic.PrivateIp != nil && *getVnicResp.Vnic.PrivateIp == ociInstance.PrivateIPAddress) || + (getVnicResp.Vnic.PublicIp != nil && *getVnicResp.Vnic.PublicIp == ociInstance.PublicIPAddress) { + klog.V(4).Info(*poolMember.DisplayName, " is a member of "+*nextInstancePool.Id) + // Return a complete instance details. + if ociInstance.Name == "" { + ociInstance.Name = *poolMember.DisplayName + } + ociInstance.InstanceID = *poolMember.Id + ociInstance.PoolID = *nextInstancePool.Id + ociInstance.CompartmentID = *poolMember.CompartmentId + ociInstance.AvailabilityDomain = strings.Split(*poolMember.AvailabilityDomain, ":")[1] + ociInstance.Shape = *poolMember.Shape + ociInstance.PrivateIPAddress = *getVnicResp.Vnic.PrivateIp + // Public IP is optional + if getVnicResp.Vnic.PublicIp != nil { + ociInstance.PublicIPAddress = *getVnicResp.Vnic.PublicIp + } + return &ociInstance, nil + } + } + } + } + + c.unownedInstances[ociInstance] = true + klog.V(4).Infof(ociInstance.Name + " is not a member of any of the specified instance pool(s)") + return nil, errInstanceInstancePoolNotFound +} + +func (c *instancePoolCache) getInstancePool(id string) (*core.InstancePool, error) { + c.mu.Lock() + defer c.mu.Unlock() + + return c.getInstancePoolWithoutLock(id) +} + +func (c *instancePoolCache) getInstancePoolWithoutLock(id string) (*core.InstancePool, error) { + instancePool := c.poolCache[id] + if instancePool == nil { + return nil, errors.New("instance pool was not found in the cache") + } + + return instancePool, nil +} + +func (c *instancePoolCache) setInstancePool(np *core.InstancePool) { + c.mu.Lock() + defer c.mu.Unlock() + + c.poolCache[*np.Id] = np + c.targetSize[*np.Id] = *np.Size +} + +func (c *instancePoolCache) getInstanceSummaries(poolID string) (*[]core.InstanceSummary, error) { + c.mu.Lock() + defer c.mu.Unlock() + + return c.getInstanceSummariesWithoutLock(poolID) +} + +func (c *instancePoolCache) getInstanceSummariesWithoutLock(poolID string) (*[]core.InstanceSummary, error) { + instanceSummaries := c.instanceSummaryCache[poolID] + if instanceSummaries == nil { + return nil, errors.New("instance summaries for instance pool id " + poolID + " were not found in cache") + } + + return instanceSummaries, nil +} + +func (c *instancePoolCache) setInstanceSummaries(instancePoolID string, is *[]core.InstanceSummary) { + c.mu.Lock() + defer c.mu.Unlock() + + c.instanceSummaryCache[instancePoolID] = is +} + +func (c *instancePoolCache) setSize(instancePoolID string, size int) error { + + if instancePoolID == "" { + return errors.New("instance-pool is required") + } + + getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{ + InstancePoolId: common.String(instancePoolID), + }) + if err != nil { + return err + } + + updateDetails := core.UpdateInstancePoolDetails{ + Size: common.Int(size), + InstanceConfigurationId: getInstancePoolResp.InstanceConfigurationId, + } + + _, err = c.computeManagementClient.UpdateInstancePool(context.Background(), core.UpdateInstancePoolRequest{ + InstancePoolId: common.String(instancePoolID), + UpdateInstancePoolDetails: updateDetails, + }) + if err != nil { + return err + } + + c.mu.Lock() + defer c.mu.Unlock() + + c.targetSize[instancePoolID] = size + + return c.waitForInstancePoolState(context.Background(), instancePoolID, core.InstancePoolLifecycleStateRunning) +} + +func (c *instancePoolCache) waitForInstancePoolState(ctx context.Context, instancePoolID string, desiredState core.InstancePoolLifecycleStateEnum) error { + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + err := wait.PollImmediateUntil( + // TODO we need a better implementation of this function + internalPollInterval, + func() (bool, error) { + getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{ + InstancePoolId: common.String(instancePoolID), + }) + if err != nil { + klog.Errorf("getInstancePool failed. Retrying: %+v", err) + return false, err + } else if getInstancePoolResp.LifecycleState != desiredState { + klog.V(4).Infof("waiting for instance-pool %s to enter state: %s (current state: %s)", instancePoolID, + desiredState, getInstancePoolResp.LifecycleState) + return false, nil + } + klog.V(3).Infof("instance pool %s is in desired state: %s", instancePoolID, desiredState) + + return true, nil + }, timeoutCtx.Done()) + if err != nil { + // may be wait.ErrWaitTimeout + return err + } + return nil +} + +func (c *instancePoolCache) getSize(id string) (int, error) { + c.mu.Lock() + defer c.mu.Unlock() + + size, ok := c.targetSize[id] + if !ok { + return -1, errors.New("target size not found") + } + + return size, nil +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go new file mode 100644 index 000000000000..e2d055e8d6b5 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go @@ -0,0 +1,513 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "fmt" + "gopkg.in/gcfg.v1" + "os" + "strconv" + "strings" + "time" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + kubeletapis "k8s.io/kubelet/pkg/apis" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + + "github.com/pkg/errors" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core" +) + +var ( + internalPollInterval = 1 * time.Minute + errInstanceInstancePoolNotFound = errors.New("instance-pool not found for instance") +) + +// InstancePoolManager defines the operations required for an *instance-pool based* autoscaler. +type InstancePoolManager interface { + // Refresh triggers refresh of cached resources. + Refresh() error + // Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc. + Cleanup() error + + // GetInstancePools returns list of registered InstancePools. + GetInstancePools() []*InstancePoolNodeGroup + // GetInstancePoolNodes returns InstancePool nodes. + GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error) + // GetInstancePoolForInstance returns InstancePool to which the given instance belongs. + GetInstancePoolForInstance(instance OciRef) (*InstancePoolNodeGroup, error) + // GetInstancePoolTemplateNode returns a template node for InstancePool. + GetInstancePoolTemplateNode(ip InstancePoolNodeGroup) (*apiv1.Node, error) + // GetInstancePoolSize gets the InstancePool size. + GetInstancePoolSize(ip InstancePoolNodeGroup) (int, error) + // SetInstancePoolSize sets the InstancePool size. + SetInstancePoolSize(ip InstancePoolNodeGroup, size int) error + // DeleteInstances deletes the given instances. All instances must be controlled by the same InstancePool. + DeleteInstances(ip InstancePoolNodeGroup, instances []OciRef) error +} + +// InstancePoolManagerImpl is the implementation of an instance-pool based autoscaler on OCI. +type InstancePoolManagerImpl struct { + cfg *CloudConfig + shapeGetter ShapeGetter + staticInstancePools map[string]*InstancePoolNodeGroup + lastRefresh time.Time + // caches the instance pool and instance summary objects received from OCI. + // All interactions with OCI's API should go through the poolCache. + instancePoolCache *instancePoolCache +} + +// CreateInstancePoolManager constructs the InstancePoolManager object. +func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (InstancePoolManager, error) { + + var err error + var configProvider common.ConfigurationProvider + var cloudConfig = &CloudConfig{} + + // cloudConfigPath is the optional file of variables passed in with the --cloud-config flag, which takes precedence over environment variables + if cloudConfigPath != "" { + config, fileErr := os.Open(cloudConfigPath) + if fileErr != nil { + klog.Fatalf("could not open cloud provider configuration %s: %#v", cloudConfigPath, fileErr) + } + defer config.Close() + if config != nil { + if err := gcfg.ReadInto(cloudConfig, config); err != nil { + klog.Errorf("could not read config: %v", err) + return nil, err + } + } + } + // Fall back to environment variables + if cloudConfig.Global.CompartmentID == "" { + cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar) + } else if !cloudConfig.Global.UseInstancePrinciples { + if os.Getenv(ociUseInstancePrincipalEnvVar) == "true" { + cloudConfig.Global.UseInstancePrinciples = true + } + if os.Getenv(ociRegionEnvVar) != "" { + cloudConfig.Global.Region = os.Getenv(ociRegionEnvVar) + } + } + if cloudConfig.Global.RefreshInterval == 0 { + if os.Getenv(ociRefreshInterval) != "" { + klog.V(4).Info("using a custom cache refresh interval %v...", os.Getenv(ociRefreshInterval)) + cloudConfig.Global.RefreshInterval, _ = time.ParseDuration(os.Getenv(ociRefreshInterval)) + } else { + cloudConfig.Global.RefreshInterval = defaultRefreshInterval + } + } + + clientConfig := common.CustomClientConfiguration{ + RetryPolicy: newRetryPolicy(), + } + + if os.Getenv(ociUseInstancePrincipalEnvVar) == "true" { + klog.V(4).Info("using instance principals...") + region := os.Getenv(ociRegionEnvVar) + if region == "" { + klog.Fatalf("OCI_REGION is required when OCI_USE_INSTANCE_PRINCIPAL is set to true") + } + configProvider, err = auth.InstancePrincipalConfigurationProviderForRegion(common.StringToRegion(region)) + if err != nil { + return nil, err + } + } else { + klog.Info("using default configuration provider") + configProvider = common.DefaultConfigProvider() + } + providerRegion, _ := configProvider.Region() + klog.Infof("OCI provider region: %s ", providerRegion) + + computeMgmtClient, err := core.NewComputeManagementClientWithConfigurationProvider(configProvider) + if err != nil { + return nil, errors.Wrap(err, "unable to create compute management client") + } + computeMgmtClient.SetCustomClientConfiguration(clientConfig) + + computeClient, err := core.NewComputeClientWithConfigurationProvider(configProvider) + if err != nil { + return nil, errors.Wrap(err, "unable to create compute client") + } + computeClient.SetCustomClientConfiguration(clientConfig) + + networkClient, err := core.NewVirtualNetworkClientWithConfigurationProvider(configProvider) + if err != nil { + return nil, errors.Wrap(err, "unable to create virtual network client") + } + networkClient.SetCustomClientConfiguration(clientConfig) + + cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar) + + // Not passed by --cloud-config or environment variable, attempt to use the tenancy ID as the compartment ID + if cloudConfig.Global.CompartmentID == "" { + tenancyID, err := configProvider.TenancyOCID() + if err != nil { + return nil, errors.Wrap(err, "unable to retrieve tenancy ID") + } + cloudConfig.Global.CompartmentID = tenancyID + } + + ipManager := &InstancePoolManagerImpl{ + cfg: cloudConfig, + staticInstancePools: map[string]*InstancePoolNodeGroup{}, + shapeGetter: createShapeGetter(ShapeClientImpl{computeMgmtClient: computeMgmtClient, computeClient: computeClient}), + instancePoolCache: newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient), + } + + // Contains all the specs from the args that give us the pools. + for _, arg := range discoveryOpts.NodeGroupSpecs { + ip, err := instancePoolFromArg(arg) + if err != nil { + return nil, fmt.Errorf("unable to construct instance pool from argument: %v", err) + } + + ip.manager = ipManager + ip.kubeClient = kubeClient + + ipManager.staticInstancePools[ip.Id()] = ip + } + + // wait until we have an initial full poolCache. + err = wait.PollImmediateInfinite( + 10*time.Second, + func() (bool, error) { + err := ipManager.Refresh() + if err != nil { + klog.Errorf("unable to fill cache on startup. Retrying: %+v", err) + return false, nil + } + + return true, nil + }) + if err != nil { + return nil, err + } + + return ipManager, nil +} + +// instancePoolFromArg parses a instancepool spec represented in the form of `::` and produces an instance pool wrapper spec object +func instancePoolFromArg(value string) (*InstancePoolNodeGroup, error) { + + if !strings.Contains(value, ociInstancePoolResourceIdent) { + return nil, fmt.Errorf("instance pool manager does not work with resources of type: %s", value) + } + + tokens := strings.SplitN(value, ":", 3) + if len(tokens) != 3 || !strings.HasPrefix(tokens[2], "ocid") { + return nil, fmt.Errorf("incorrect instance configuration: %s", value) + } + + spec := &InstancePoolNodeGroup{} + if size, err := strconv.Atoi(tokens[0]); err == nil { + spec.minSize = size + } else { + return nil, fmt.Errorf("failed to set pool min size: %s %v", tokens[0], err) + } + + if size, err := strconv.Atoi(tokens[1]); err == nil { + spec.maxSize = size + } else { + return nil, fmt.Errorf("failed to set pool max size: %s %v", tokens[1], err) + } + + spec.id = tokens[2] + + klog.Infof("static instance pool wrapper spec constructed: %+v", spec) + + return spec, nil +} + +// Refresh triggers refresh of cached resources. +func (m *InstancePoolManagerImpl) Refresh() error { + if m.lastRefresh.Add(m.cfg.Global.RefreshInterval).After(time.Now()) { + return nil + } + + return m.forceRefresh() +} + +func (m *InstancePoolManagerImpl) forceRefresh() error { + if m.cfg == nil { + return errors.New("instance pool manager does have a required config") + } + err := m.instancePoolCache.rebuild(m.staticInstancePools, *m.cfg) + if err != nil { + return err + } + + m.lastRefresh = time.Now() + klog.Infof("Refreshed instance-pool list, next refresh after %v", m.lastRefresh.Add(m.cfg.Global.RefreshInterval)) + return nil +} + +// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc. +func (m *InstancePoolManagerImpl) Cleanup() error { + return nil +} + +// GetInstancePools returns list of registered InstancePools. +func (m *InstancePoolManagerImpl) GetInstancePools() []*InstancePoolNodeGroup { + var instancePools []*InstancePoolNodeGroup + for _, np := range m.staticInstancePools { + instancePools = append(instancePools, np) + } + return instancePools +} + +// GetInstancePoolNodes returns InstancePool nodes that are not in a terminal state. +func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error) { + + klog.V(4).Infof("getting instances for node pool: %q", ip.Id()) + + instanceSummaries, err := m.instancePoolCache.getInstanceSummaries(ip.Id()) + if err != nil { + return nil, err + } + + var providerInstances []cloudprovider.Instance + for _, instance := range *instanceSummaries { + status := &cloudprovider.InstanceStatus{} + switch *instance.State { + case string(core.InstanceLifecycleStateRunning): + status.State = cloudprovider.InstanceRunning + case string(core.InstanceLifecycleStateCreatingImage): + status.State = cloudprovider.InstanceCreating + case string(core.InstanceLifecycleStateStarting): + status.State = cloudprovider.InstanceCreating + case string(core.InstanceLifecycleStateMoving): + status.State = cloudprovider.InstanceCreating + case string(core.InstanceLifecycleStateProvisioning): + status.State = cloudprovider.InstanceCreating + case string(core.InstanceLifecycleStateTerminating): + status.State = cloudprovider.InstanceDeleting + case string(core.InstanceLifecycleStateStopping): + status.State = cloudprovider.InstanceDeleting + } + + // Instance not in a terminal or unknown state, ok to add. + if status.State != 0 { + providerInstances = append(providerInstances, cloudprovider.Instance{ + Id: *instance.Id, + Status: status, + }) + } + } + + return providerInstances, nil +} + +// GetInstancePoolForInstance returns InstancePool to which the given instance belongs. If +// PoolID is not set on the specified OciRef, we will look for a match. +func (m *InstancePoolManagerImpl) GetInstancePoolForInstance(instanceDetails OciRef) (*InstancePoolNodeGroup, error) { + + if instanceDetails.PoolID != "" { + // It's possible that this instance belongs to an instance pool that was not specified via --nodes argument. + return m.staticInstancePools[instanceDetails.PoolID], nil + } + + if instanceDetails.CompartmentID == "" { + // cfg.Global.CompartmentID would be set to tenancy OCID at runtime if compartment was not set. + instanceDetails.CompartmentID = m.cfg.Global.CompartmentID + } + + // Details are missing from this instance - including the pool ID. + // Try to resolve them, though it may not be a member of an instance-pool we manage. + resolvedInstanceDetails, err := m.instancePoolCache.findInstanceByDetails(instanceDetails) + if err != nil { + return nil, err + } else if resolvedInstanceDetails == nil { + return nil, nil + } + + kubeClient := m.staticInstancePools[resolvedInstanceDetails.PoolID].kubeClient + + // Optionally annotate & label the node so that it does not need to be searched for in subsequent iterations. + _ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociInstanceIDAnnotation, resolvedInstanceDetails.InstanceID) + _ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociInstancePoolIDAnnotation, resolvedInstanceDetails.PoolID) + _ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociAnnotationCompartmentID, resolvedInstanceDetails.CompartmentID) + _ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelTopologyZone, resolvedInstanceDetails.AvailabilityDomain) + _ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelFailureDomainBetaZone, resolvedInstanceDetails.AvailabilityDomain) + _ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelInstanceType, resolvedInstanceDetails.Shape) + _ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelInstanceTypeStable, resolvedInstanceDetails.Shape) + _ = setNodeProviderID(kubeClient, resolvedInstanceDetails.Name, resolvedInstanceDetails.InstanceID) + + return m.staticInstancePools[resolvedInstanceDetails.PoolID], nil +} + +// GetInstancePoolTemplateNode returns a template node for the InstancePool. +func (m *InstancePoolManagerImpl) GetInstancePoolTemplateNode(ip InstancePoolNodeGroup) (*apiv1.Node, error) { + + instancePool, err := m.instancePoolCache.getInstancePool(ip.Id()) + if err != nil { + return nil, err + } + + node, err := m.buildNodeFromTemplate(instancePool) + if err != nil { + return nil, err + } + + return node, nil +} + +// GetInstancePoolSize gets the instance-pool size. +func (m *InstancePoolManagerImpl) GetInstancePoolSize(ip InstancePoolNodeGroup) (int, error) { + return m.instancePoolCache.getSize(ip.Id()) +} + +// SetInstancePoolSize sets instance-pool size. +func (m *InstancePoolManagerImpl) SetInstancePoolSize(np InstancePoolNodeGroup, size int) error { + + err := m.instancePoolCache.setSize(np.Id(), size) + if err != nil { + return err + } + + // Interface says this function should wait until node group size is updated. + + // We do not wait for the work request to finish or nodes become active on purpose. This allows + // the autoscaler to make decisions quicker especially since the autoscaler is aware of + // unregistered nodes in addition to registered nodes. + + return nil +} + +// DeleteInstances deletes the given instances. All instances must be controlled by the same instance-pool. +func (m *InstancePoolManagerImpl) DeleteInstances(instancePool InstancePoolNodeGroup, instances []OciRef) error { + klog.Infof("DeleteInstances called on instance pool %s", instancePool.Id()) + + for _, instance := range instances { + // removeInstance auto decrements instance pool size. + detached := m.instancePoolCache.removeInstance(instancePool, instance.InstanceID) + if !detached { + return fmt.Errorf("could not delete instance %s from instance pool %s", instance.InstanceID, instancePool.Id()) + } + } + + return nil +} + +func (m *InstancePoolManagerImpl) buildNodeFromTemplate(instancePool *core.InstancePool) (*apiv1.Node, error) { + + node := apiv1.Node{} + nodeName := fmt.Sprintf("%s-%d", "inst", 555555) + + ocidParts := strings.Split(*instancePool.Id, ".") + instanceIDPlaceholder := ocidParts[0] + "." + "instance" + "." + ocidParts[2] + "." + ocidParts[3] + ".tbd" + + annotations := make(map[string]string) + annotations[ociAnnotationCompartmentID] = *instancePool.CompartmentId + annotations[ociInstancePoolIDAnnotation] = *instancePool.Id + annotations[ociInstanceIDAnnotation] = instanceIDPlaceholder + + node.ObjectMeta = metav1.ObjectMeta{ + Name: nodeName, + Labels: map[string]string{}, + Annotations: annotations, + } + + node.Status = apiv1.NodeStatus{ + Capacity: apiv1.ResourceList{}, + } + shape, err := m.shapeGetter.GetInstancePoolShape(instancePool) + + if err != nil { + return nil, err + } + + if shape.GPU > 0 { + node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{ + Key: "nvidia.com/gpu", + Value: "", + Effect: "NoSchedule", + }) + } + + node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(shape.CPU), resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(shape.MemoryInBytes), resource.DecimalSI) + node.Status.Capacity[ResourceGPU] = *resource.NewQuantity(int64(shape.GPU), resource.DecimalSI) + + node.Status.Allocatable = node.Status.Capacity + + availabilityDomain, err := getInstancePoolAvailabilityDomain(instancePool) + if err != nil { + return nil, err + } + + node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabelsForInstancePool(instancePool, nodeName, shape.Name, availabilityDomain)) + + node.Status.Conditions = cloudprovider.BuildReadyConditions() + return &node, nil +} + +// getInstancePoolAvailabilityDomain determines the availability of the instance pool. +// This breaks down if the customer specifies more than one placement configuration, +// so best practices should be a node pool per AD if customers care about it during scheduling. +// if there are more than 1AD defined, then we return the first one always. +func getInstancePoolAvailabilityDomain(ip *core.InstancePool) (string, error) { + if len(ip.PlacementConfigurations) == 0 { + // At least one placement configuration is required for an instance pool, so we should not get here. + return "", fmt.Errorf("instance-pool %q does not have the required placement configurations", *ip.Id) + } + + if len(ip.PlacementConfigurations) > 1 { + klog.Warningf("instance-pool %q has more than 1 placement config so picking first availability domain", *ip.Id) + } + + // Get the availability domain which is by default in the format of `Uocm:PHX-AD-1` + // and remove the hash prefix. + availabilityDomain := strings.Split(*ip.PlacementConfigurations[0].AvailabilityDomain, ":")[1] + return availabilityDomain, nil +} + +func buildGenericLabelsForInstancePool(instancePool *core.InstancePool, nodeName, shape, availabilityDomain string) map[string]string { + result := make(map[string]string) + result[kubeletapis.LabelArch] = cloudprovider.DefaultArch + result[apiv1.LabelArchStable] = cloudprovider.DefaultArch + + result[kubeletapis.LabelOS] = cloudprovider.DefaultOS + result[apiv1.LabelOSStable] = cloudprovider.DefaultOS + + parts := strings.Split(*instancePool.Id, ".") + if len(parts) == 5 { + // backward compatibility with older pod labels + result[apiv1.LabelZoneRegion] = parts[3] + result[apiv1.LabelZoneRegionStable] = parts[3] + } + + result[apiv1.LabelInstanceType] = shape + result[apiv1.LabelInstanceTypeStable] = shape + + result[apiv1.LabelZoneFailureDomain] = availabilityDomain + // backward compatibility with older pod labels + result[apiv1.LabelZoneFailureDomainStable] = availabilityDomain + + result[apiv1.LabelHostname] = nodeName + + return result +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go new file mode 100644 index 000000000000..e4e6cc7ac9ab --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go @@ -0,0 +1,511 @@ +package oci + +import ( + "context" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core" + "reflect" + "testing" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common" +) + +type mockComputeManagementClient struct { + err error + getInstancePoolResponse core.GetInstancePoolResponse + getInstancePoolInstanceResponse core.GetInstancePoolInstanceResponse + listInstancePoolInstancesResponse core.ListInstancePoolInstancesResponse + updateInstancePoolResponse core.UpdateInstancePoolResponse + detachInstancePoolInstanceResponse core.DetachInstancePoolInstanceResponse +} + +type mockVirtualNetworkClient struct { + err error + getVnicResponse core.GetVnicResponse +} + +type mockComputeClient struct { + err error + listVnicAttachmentsResponse core.ListVnicAttachmentsResponse +} + +func (m *mockComputeClient) ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error) { + return m.listVnicAttachmentsResponse, m.err +} + +func (m *mockVirtualNetworkClient) GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error) { + return m.getVnicResponse, m.err +} + +func (m *mockComputeManagementClient) ListInstancePoolInstances(_ context.Context, _ core.ListInstancePoolInstancesRequest) (core.ListInstancePoolInstancesResponse, error) { + return m.listInstancePoolInstancesResponse, m.err +} + +func (m *mockComputeManagementClient) GetInstancePool(context.Context, core.GetInstancePoolRequest) (core.GetInstancePoolResponse, error) { + return m.getInstancePoolResponse, m.err +} + +func (m *mockComputeManagementClient) UpdateInstancePool(context.Context, core.UpdateInstancePoolRequest) (core.UpdateInstancePoolResponse, error) { + return m.updateInstancePoolResponse, m.err +} + +func (m *mockComputeManagementClient) GetInstancePoolInstance(context.Context, core.GetInstancePoolInstanceRequest) (core.GetInstancePoolInstanceResponse, error) { + return m.getInstancePoolInstanceResponse, m.err +} + +func (m *mockComputeManagementClient) DetachInstancePoolInstance(context.Context, core.DetachInstancePoolInstanceRequest) (core.DetachInstancePoolInstanceResponse, error) { + return m.detachInstancePoolInstanceResponse, m.err +} + +var computeClient = &mockComputeClient{ + err: nil, + listVnicAttachmentsResponse: core.ListVnicAttachmentsResponse{ + RawResponse: nil, + Items: []core.VnicAttachment{{ + Id: common.String("ocid1.vnic.oc1.phx.abc"), + LifecycleState: core.VnicAttachmentLifecycleStateAttached, + }}, + }, +} + +var computeManagementClient = &mockComputeManagementClient{ + err: nil, + getInstancePoolResponse: core.GetInstancePoolResponse{ + InstancePool: core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"), + LifecycleState: core.InstancePoolLifecycleStateRunning, + Size: common.Int(2), + }, + }, + listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{ + RawResponse: nil, + Items: []core.InstanceSummary{{ + Id: common.String("ocid1.instance.oc1.phx.aaa1"), + AvailabilityDomain: common.String("Uocm:PHX-AD-2"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-1ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.8"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }, { + Id: common.String("ocid1.instance.oc1.phx.aaacachemiss"), + AvailabilityDomain: common.String("Uocm:PHX-AD-2"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-2ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.8"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }}, + }, +} + +var virtualNetworkClient = &mockVirtualNetworkClient{ + err: nil, + getVnicResponse: core.GetVnicResponse{ + RawResponse: nil, + Vnic: core.Vnic{ + Id: common.String("ocid1.vnic.oc1.phx.abyhqljsxigued23s7ywgcqlbpqfiysgnhxj672awzjluhoopzf7l7wvm6rq"), + PrivateIp: common.String("10.0.20.59"), + PublicIp: common.String("129.146.58.250"), + }, + }, +} + +func TestInstancePoolFromArgs(t *testing.T) { + + value := `1:5:ocid1.instancepool.oc1.phx.aaaaaaaah` + instanceNodePool, err := instancePoolFromArg(value) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if instanceNodePool.minSize != 1 { + t.Errorf("got minSize %d ; wanted minSize 1", instanceNodePool.minSize) + } + + if instanceNodePool.maxSize != 5 { + t.Errorf("got maxSize %d ; wanted maxSize 1", instanceNodePool.maxSize) + } + + if instanceNodePool.id != "ocid1.instancepool.oc1.phx.aaaaaaaah" { + t.Errorf("got ocid %q ; wanted id \"ocid1.instancepool.oc1.phx.aaaaaaaah\"", instanceNodePool.id) + } + + value = `1:5:ocid1.nodepool.oc1.phx.aaaaaaaah` + _, err = instancePoolFromArg(value) + if err == nil { + t.Fatal("expected error processing an oke based node-pool") + } + + value = `1:5:incorrect:ocid1.instancepool.oc1.phx.aaaaaaaah` + _, err = instancePoolFromArg(value) + if err == nil { + t.Fatal("expected error of an invalid instance pool") + } +} + +func TestGetSetInstancePoolSize(t *testing.T) { + nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient) + nodePoolCache.targetSize["ocid1.instancepool.oc1.phx.aaaaaaaai"] = 5 + + manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache} + size, err := manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if size != 5 { + t.Errorf("got size %d ; wanted size 5", size) + } + + err = manager.SetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"}, 6) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + size, err = manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if size != 6 { + t.Errorf("got size %d ; wanted size 6", size) + } + +} + +func TestGetInstancePoolForInstance(t *testing.T) { + + nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient) + nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + Size: common.Int(1), + } + + var cloudConfig = CloudConfig{} + cloudConfig.Global.CompartmentID = "compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf" + + manager := &InstancePoolManagerImpl{ + cfg: &cloudConfig, + staticInstancePools: map[string]*InstancePoolNodeGroup{ + "ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, + "ocid1.instancepool.oc1.phx.aaaaaaaa2": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa2"}, + }, + instancePoolCache: nodePoolCache, + } + + // first verify instance pool can be found when only the instance id is specified. + np, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" { + t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id()) + } + + // next, verify a valid instance can be found if it is currently missing from the cache. + np, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaacachemiss"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" { + t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1s\"", np.Id()) + } + + // next, verify an invalid instance cant be found if it is missing from the cache and pool. + _, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaadne"}) + if err != errInstanceInstancePoolNotFound { + t.Fatalf("epected error looking for an invalid instance") + } + + // verify an invalid instance pool produces an error. + ip, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaadne", PoolID: "ocid1.instancepool.oc1.phx.aaaaaaaadne"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if ip != nil { + t.Fatalf("expected nil looking for an instance with invalid instance & pool ids") + } + + // next verify instance pool can be found when the instance pool id is specified directly. + _, err = manager.GetInstancePoolForInstance(OciRef{PoolID: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + // next verify instance pool can be found when only the private IP is specified. + np, err = manager.GetInstancePoolForInstance(OciRef{ + PrivateIPAddress: "10.0.20.59"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" { + t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id()) + } + + // now verify node pool can be found via lookup up by instance id in poolCache + np, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" { + t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id()) + } + +} + +func TestGetInstancePoolNodes(t *testing.T) { + + nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient) + nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaa1"), + LifecycleState: core.InstancePoolLifecycleStateRunning, + } + nodePoolCache.instanceSummaryCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &[]core.InstanceSummary{{ + Id: common.String("ocid1.instance.oc1.phx.aaa1"), + AvailabilityDomain: common.String("PHX-AD-2"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }, { + Id: common.String("ocid1.instance.oc1.phx.aaa2"), + AvailabilityDomain: common.String("PHX-AD-1"), + State: common.String(string(core.InstanceLifecycleStateTerminating)), + }, + } + + expected := []cloudprovider.Instance{ + { + Id: "ocid1.instance.oc1.phx.aaa1", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceRunning, + }, + }, + { + Id: "ocid1.instance.oc1.phx.aaa2", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceDeleting, + }, + }, + } + + manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache, cfg: &CloudConfig{}} + instances, err := manager.GetInstancePoolNodes(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}) + if err != nil { + t.Fatalf("received unexpected error; %+v", err) + } + + if !reflect.DeepEqual(instances, expected) { + t.Errorf("got %+v\nwanted %+v", instances, expected) + } + + err = manager.forceRefresh() + if err != nil { + t.Fatalf("received unexpected error refreshing cache; %+v", err) + } +} + +func TestGetInstancePoolAvailabilityDomain(t *testing.T) { + testCases := map[string]struct { + np *core.InstancePool + result string + expectedErr bool + }{ + "single ad": { + np: &core.InstancePool{ + Id: common.String("id"), + LifecycleState: "", + PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{ + AvailabilityDomain: common.String("hash:US-ASHBURN-1"), + PrimarySubnetId: common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"), + }}, + Size: common.Int(2), + }, + result: "US-ASHBURN-1", + }, + "multi-ad": { + np: &core.InstancePool{ + Id: common.String("id"), + LifecycleState: "", + PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{ + AvailabilityDomain: common.String("hash:US-ASHBURN-1"), + PrimarySubnetId: common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"), + }, { + AvailabilityDomain: common.String("hash:US-ASHBURN-2"), + PrimarySubnetId: common.String("ocid1.subnet.oc1.phx.aaaaaaaa2"), + }}, + Size: common.Int(2), + }, + result: "US-ASHBURN-1", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + ad, err := getInstancePoolAvailabilityDomain(tc.np) + if tc.expectedErr { + if err == nil { + t.Fatalf("expected err but not nil") + } + return + } + + if ad != tc.result { + t.Errorf("got %q ; wanted %q", ad, tc.result) + } + }) + } +} + +func TestGetInstancePoolsAndInstances(t *testing.T) { + + var computeManagementClient = &mockComputeManagementClient{ + getInstancePoolResponse: core.GetInstancePoolResponse{ + InstancePool: core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"), + PlacementConfigurations: nil, + Size: common.Int(2), + }, + }, + listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{ + Items: []core.InstanceSummary{{ + Id: common.String("ocid1.instance.oc1.phx.aaa1"), + AvailabilityDomain: common.String("Uocm:PHX-AD-2"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-1ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.8"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }, { + Id: common.String("ocid1.instance.oc1.phx.aaaterminal"), + AvailabilityDomain: common.String("Uocm:PHX-AD-2"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-2ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.8"), + State: common.String(string(core.InstanceLifecycleStateTerminated)), + }}, + }, + } + + cloudConfig := &CloudConfig{} + cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + manager := &InstancePoolManagerImpl{ + cfg: cloudConfig, + staticInstancePools: map[string]*InstancePoolNodeGroup{ + "ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, + }, + instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient), + } + + // Populate cache(s) (twice to increase code coverage). + _ = manager.Refresh() + err := manager.Refresh() + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + err = manager.forceRefresh() + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + instancePoolNodeGroups := manager.GetInstancePools() + if got := len(instancePoolNodeGroups); got != 1 { + t.Fatalf("expected 1 (static) instance pool, got %d", got) + } + instances, err := manager.GetInstancePoolNodes(*instancePoolNodeGroups[0]) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + // Should not pick up terminated instance. + if got := len(instances); got != 1 { + t.Fatalf("expected 1 instance, got %d", got) + } + + instancePoolNodeGroup, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: instances[0].Id}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if !reflect.DeepEqual(instancePoolNodeGroup, instancePoolNodeGroups[0]) { + t.Errorf("got %+v\nwanted %+v", instancePoolNodeGroup, instancePoolNodeGroups[0]) + } +} + +func TestDeleteInstances(t *testing.T) { + + var computeManagementClient = &mockComputeManagementClient{ + getInstancePoolResponse: core.GetInstancePoolResponse{ + InstancePool: core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"), + LifecycleState: core.InstancePoolLifecycleStateRunning, + Size: common.Int(2), + }, + }, + listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{ + Items: []core.InstanceSummary{{ + Id: common.String("ocid1.instance.oc1.phx.aaa1"), + AvailabilityDomain: common.String("Uocm:PHX-AD-1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-1ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.16"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }, { + Id: common.String("ocid1.instance.oc1.phx.aaa2"), + AvailabilityDomain: common.String("Uocm:PHX-AD-1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + DisplayName: common.String("inst-2ncvn-ociinstancepool"), + Shape: common.String("VM.Standard2.16"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }}, + }, + } + + cloudConfig := &CloudConfig{} + cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + manager := &InstancePoolManagerImpl{ + cfg: cloudConfig, + staticInstancePools: map[string]*InstancePoolNodeGroup{ + "ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, + }, + instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient), + } + // Populate cache(s). + manager.Refresh() + + instances, err := manager.GetInstancePoolNodes(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + // Should not pick up terminated instance. + if got := len(instances); got != 2 { + t.Fatalf("expected 2 instance, got %d", got) + } + // Check size before and after delete + size, err := manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if size != 2 { + t.Errorf("got size %d ; wanted size 2 before delete", size) + } + + instanceToDelete := OciRef{ + AvailabilityDomain: "PHX-AD-1", + Name: "inst-2ncvn-ociinstancepool", + InstanceID: "ocid1.instance.oc1.phx.aaa2", + PoolID: "ocid1.instancepool.oc1.phx.aaaaaaaa1", + } + err = manager.DeleteInstances(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, []OciRef{instanceToDelete}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + size, err = manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + if size != 1 { + t.Errorf("got size %d ; wanted size 1 *after* delete", size) + } +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_ref.go b/cluster-autoscaler/cloudprovider/oci/oci_ref.go new file mode 100644 index 000000000000..3b0383802acb --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_ref.go @@ -0,0 +1,66 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + apiv1 "k8s.io/api/core/v1" +) + +// OciRef contains s reference to some entity in OCI world. +type OciRef struct { + AvailabilityDomain string + Name string + CompartmentID string + InstanceID string + PoolID string + PrivateIPAddress string + PublicIPAddress string + Shape string +} + +func nodeToOciRef(n *apiv1.Node) (OciRef, error) { + return OciRef{ + Name: n.ObjectMeta.Name, + AvailabilityDomain: n.Labels[apiv1.LabelZoneFailureDomain], + CompartmentID: n.Annotations[ociAnnotationCompartmentID], + InstanceID: n.Annotations[ociInstanceIDAnnotation], + PoolID: n.Annotations[ociInstancePoolIDAnnotation], + PrivateIPAddress: getNodeInternalAddress(n), + PublicIPAddress: getNodeExternalAddress(n), + Shape: n.Labels[apiv1.LabelInstanceType], + }, nil +} + +// getNodeInternalAddress returns the first private address of the node and an empty string if none are found. +func getNodeInternalAddress(node *apiv1.Node) string { + for _, address := range node.Status.Addresses { + if address.Type == apiv1.NodeInternalIP { + return address.Address + } + } + return "" +} + +// getNodeExternalAddress returns the first public address of the node and an empty string if none are found. +func getNodeExternalAddress(node *apiv1.Node) string { + for _, address := range node.Status.Addresses { + if address.Type == apiv1.NodeExternalIP { + return address.Address + } + } + return "" +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_shape.go b/cluster-autoscaler/cloudprovider/oci/oci_shape.go new file mode 100644 index 000000000000..dc0ead2b6d2c --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_shape.go @@ -0,0 +1,136 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "context" + "fmt" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core" + "k8s.io/klog/v2" +) + +// ShapeGetter returns the oci shape attributes for the pool. +type ShapeGetter interface { + GetInstancePoolShape(pool *core.InstancePool) (*Shape, error) +} + +// ShapeClient is an interface around the GetInstanceConfiguration and ListShapes calls. +type ShapeClient interface { + GetInstanceConfiguration(context.Context, core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) + ListShapes(context.Context, core.ListShapesRequest) (core.ListShapesResponse, error) +} + +// ShapeClientImpl is the implementation for fetching shape information. +type ShapeClientImpl struct { + // Can fetch instance configs (flexible shapes) + computeMgmtClient core.ComputeManagementClient + // Can fetch shapes directly + computeClient core.ComputeClient +} + +// GetInstanceConfiguration gets the instance configuration. +func (cc ShapeClientImpl) GetInstanceConfiguration(ctx context.Context, req core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) { + return cc.computeMgmtClient.GetInstanceConfiguration(ctx, req) +} + +// ListShapes lists the shapes. +func (cc ShapeClientImpl) ListShapes(ctx context.Context, req core.ListShapesRequest) (core.ListShapesResponse, error) { + return cc.computeClient.ListShapes(ctx, req) +} + +// Shape includes the resource attributes of a given shape which should be used +// for constructing node templates. +type Shape struct { + Name string + CPU float32 + GPU int + MemoryInBytes float32 +} + +// createShapeGetter creates a new oci shape getter. +func createShapeGetter(shapeClient ShapeClient) ShapeGetter { + return &shapeGetterImpl{ + shapeClient: shapeClient, + cache: map[string]*Shape{}, + } +} + +type shapeGetterImpl struct { + shapeClient ShapeClient + cache map[string]*Shape +} + +func (osf *shapeGetterImpl) GetInstancePoolShape(ip *core.InstancePool) (*Shape, error) { + + shape := &Shape{} + + klog.V(5).Info("fetching shape configuration details for instance-pool " + *ip.Id) + + instanceConfig, err := osf.shapeClient.GetInstanceConfiguration(context.Background(), core.GetInstanceConfigurationRequest{ + InstanceConfigurationId: ip.InstanceConfigurationId, + }) + if err != nil { + return nil, err + } + + if instanceConfig.InstanceDetails == nil { + return nil, fmt.Errorf("instance configuration details for instance %s has not been set", *ip.Id) + } + + if instanceDetails, ok := instanceConfig.InstanceDetails.(core.ComputeInstanceDetails); ok { + // flexible shape use details or look up the static shape details below. + if instanceDetails.LaunchDetails != nil && instanceDetails.LaunchDetails.ShapeConfig != nil { + if instanceDetails.LaunchDetails.Shape != nil { + shape.Name = *instanceDetails.LaunchDetails.Shape + } + if instanceDetails.LaunchDetails.ShapeConfig.Ocpus != nil { + shape.CPU = *instanceDetails.LaunchDetails.ShapeConfig.Ocpus + // Minimum amount of memory unless explicitly set higher + shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.Ocpus * 1024 * 1024 * 1024 + } + if instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs != nil { + shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs * 1024 * 1024 * 1024 + } + } else { + allShapes, _ := osf.shapeClient.ListShapes(context.Background(), core.ListShapesRequest{ + CompartmentId: instanceConfig.CompartmentId, + }) + for _, nextShape := range allShapes.Items { + if *nextShape.Shape == *instanceDetails.LaunchDetails.Shape { + shape.Name = *nextShape.Shape + if nextShape.Ocpus != nil { + shape.CPU = *nextShape.Ocpus + } + if nextShape.MemoryInGBs != nil { + shape.MemoryInBytes = *nextShape.MemoryInGBs * 1024 * 1024 * 1024 + } + if nextShape.Gpus != nil { + shape.GPU = *nextShape.Gpus + } + } + } + } + } else { + return nil, fmt.Errorf("(compute) instance configuration for instance-pool %s not found", *ip.Id) + } + + // Didn't find a match + if shape.Name == "" { + return nil, fmt.Errorf("shape information for instance-pool %s not found", *ip.Id) + } + return shape, nil +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go b/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go new file mode 100644 index 000000000000..70e985370af0 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go @@ -0,0 +1,237 @@ +package oci + +import ( + "context" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + kubeletapis "k8s.io/kubelet/pkg/apis" + "reflect" + "strings" + "testing" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core" +) + +type mockShapeClient struct { + err error + listShapeResp core.ListShapesResponse + getInstanceConfigResp core.GetInstanceConfigurationResponse +} + +func (m *mockShapeClient) ListShapes(_ context.Context, _ core.ListShapesRequest) (core.ListShapesResponse, error) { + return m.listShapeResp, m.err +} + +func (m *mockShapeClient) GetInstanceConfiguration(context.Context, core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) { + return m.getInstanceConfigResp, m.err +} + +var launchDetails = core.InstanceConfigurationLaunchInstanceDetails{ + CompartmentId: nil, + DisplayName: nil, + CreateVnicDetails: nil, + Shape: common.String("VM.Standard.E3.Flex"), + ShapeConfig: &core.InstanceConfigurationLaunchInstanceShapeConfigDetails{ + Ocpus: common.Float32(8), + MemoryInGBs: common.Float32(128), + }, + SourceDetails: nil, +} +var instanceDetails = core.ComputeInstanceDetails{ + LaunchDetails: &launchDetails, +} + +var shapeClient = &mockShapeClient{ + err: nil, + listShapeResp: core.ListShapesResponse{ + Items: []core.Shape{ + { + Shape: common.String("VM.Standard2.8"), + Ocpus: common.Float32(8), + MemoryInGBs: common.Float32(120), + }, + }, + }, + getInstanceConfigResp: core.GetInstanceConfigurationResponse{ + RawResponse: nil, + InstanceConfiguration: core.InstanceConfiguration{ + CompartmentId: nil, + Id: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"), + TimeCreated: nil, + DefinedTags: nil, + DisplayName: nil, + FreeformTags: nil, + InstanceDetails: instanceDetails, + DeferredFields: nil, + }, + Etag: nil, + OpcRequestId: nil, + }, +} + +func TestGetShape(t *testing.T) { + + testCases := map[string]struct { + shape string + expected *Shape + }{ + "flex shape": { + shape: "VM.Standard.E3.Flex", + expected: &Shape{ + Name: "VM.Standard.E3.Flex", + CPU: 8, + MemoryInBytes: float32(128) * 1024 * 1024 * 1024, + GPU: 0, + }, + }, + } + + for name, tc := range testCases { + shapeGetter := createShapeGetter(shapeClient) + + t.Run(name, func(t *testing.T) { + shape, err := shapeGetter.GetInstancePoolShape(&core.InstancePool{Id: &tc.shape, InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1")}) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(shape, tc.expected) { + t.Errorf("wanted %+v ; got %+v", tc.expected, shape) + } + + if !strings.Contains(tc.shape, "Flex") { + // we can't poolCache flex shapes so only check poolCache on non flex shapes + cacheShape, ok := shapeGetter.(*shapeGetterImpl).cache[tc.shape] + if !ok { + t.Error("shape not found in poolCache") + } + + if !reflect.DeepEqual(cacheShape, tc.expected) { + t.Errorf("wanted %+v ; got %+v", tc.expected, shape) + } + } + + }) + } +} + +func TestGetInstancePoolTemplateNode(t *testing.T) { + instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient) + instancePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"), + CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaa1"), + LifecycleState: core.InstancePoolLifecycleStateRunning, + PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{ + AvailabilityDomain: common.String("hash:US-ASHBURN-1"), + PrimarySubnetId: common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"), + }}, + } + instancePoolCache.instanceSummaryCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &[]core.InstanceSummary{{ + Id: common.String("ocid1.instance.oc1.phx.aaa1"), + AvailabilityDomain: common.String("PHX-AD-2"), + State: common.String(string(core.InstanceLifecycleStateRunning)), + }, + } + + cloudConfig := &CloudConfig{} + cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + var manager = &InstancePoolManagerImpl{ + cfg: cloudConfig, + shapeGetter: createShapeGetter(shapeClient), + staticInstancePools: map[string]*InstancePoolNodeGroup{ + "ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, + }, + instancePoolCache: instancePoolCache, + } + + instancePoolNodeGroups := manager.GetInstancePools() + + if got := len(instancePoolNodeGroups); got != 1 { + t.Fatalf("expected 1 (static) instance pool, got %d", got) + } + nodeTemplate, err := manager.GetInstancePoolTemplateNode(*instancePoolNodeGroups[0]) + if err != nil { + t.Fatalf("received unexpected error refreshing cache; %+v", err) + } + labels := nodeTemplate.GetLabels() + if labels == nil { + t.Fatalf("expected labels on node object") + } + // Double check the shape label. + if got := labels[apiv1.LabelInstanceTypeStable]; got != "VM.Standard.E3.Flex" { + t.Fatalf("expected shape label %s to be set to VM.Standard.E3.Flex: %v", apiv1.LabelInstanceTypeStable, nodeTemplate.Labels) + } + + // Also check the AD label for good measure. + if got := labels[apiv1.LabelTopologyZone]; got != "US-ASHBURN-1" { + t.Fatalf("expected AD zone label %s to be set to US-ASHBURN-1: %v", apiv1.LabelTopologyZone, nodeTemplate.Labels) + } + +} + +func TestBuildGenericLabels(t *testing.T) { + + shapeName := "VM.Standard2.8" + np := &core.InstancePool{ + Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaah"), + Size: common.Int(2), + } + + nodeName := "node1" + availabilityDomain := "US-ASHBURN-1" + + expected := map[string]string{ + kubeletapis.LabelArch: cloudprovider.DefaultArch, + apiv1.LabelArchStable: cloudprovider.DefaultArch, + kubeletapis.LabelOS: cloudprovider.DefaultOS, + apiv1.LabelOSStable: cloudprovider.DefaultOS, + apiv1.LabelZoneRegion: "phx", + apiv1.LabelZoneRegionStable: "phx", + apiv1.LabelInstanceType: shapeName, + apiv1.LabelInstanceTypeStable: shapeName, + apiv1.LabelZoneFailureDomain: availabilityDomain, + apiv1.LabelZoneFailureDomainStable: availabilityDomain, + apiv1.LabelHostname: nodeName, + } + + launchDetails := core.InstanceConfigurationLaunchInstanceDetails{ + Shape: common.String("VM.Standard2.8"), + } + + instanceDetails := core.ComputeInstanceDetails{ + LaunchDetails: &launchDetails, + } + + // For list shapes + mockShapeClient := &mockShapeClient{ + err: nil, + listShapeResp: core.ListShapesResponse{ + Items: []core.Shape{ + {Shape: common.String("VM.Standard2.4"), Ocpus: common.Float32(4), MemoryInGBs: common.Float32(60)}, + {Shape: common.String("VM.Standard2.8"), Ocpus: common.Float32(8), MemoryInGBs: common.Float32(120)}}, + }, + getInstanceConfigResp: core.GetInstanceConfigurationResponse{ + InstanceConfiguration: core.InstanceConfiguration{ + Id: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"), + InstanceDetails: instanceDetails, + }, + }, + } + shapeGetter := createShapeGetter(mockShapeClient) + + manager := InstancePoolManagerImpl{ + shapeGetter: shapeGetter, + } + + shape, err := manager.shapeGetter.GetInstancePoolShape(np) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + output := buildGenericLabelsForInstancePool(np, nodeName, shape.Name, availabilityDomain) + if !reflect.DeepEqual(output, expected) { + t.Fatalf("got %+v\nwanted %+v", output, expected) + } + +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_util.go b/cluster-autoscaler/cloudprovider/oci/oci_util.go new file mode 100644 index 000000000000..20659f97566e --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_util.go @@ -0,0 +1,211 @@ +/* +Copyright 2021 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package oci + +import ( + "context" + "fmt" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + "math" + "math/rand" + "net" + "net/http" + "time" + + "github.com/pkg/errors" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common" + "k8s.io/kubernetes/pkg/apis/scheduling" +) + +// IsRetryable returns true if the given error is retryable. +func IsRetryable(err error) bool { + if err == nil { + return false + } + + err = errors.Cause(err) + + // Retry on network timeout errors + if err, ok := err.(net.Error); ok && err.Timeout() { + return true + } + + // handle oci retryable errors. + serviceErr, ok := common.IsServiceError(err) + if !ok { + return false + } + + switch serviceErr.GetHTTPStatusCode() { + case http.StatusTooManyRequests, http.StatusGatewayTimeout, + http.StatusInternalServerError, http.StatusBadGateway: + return true + default: + return false + } +} + +func newRetryPolicy() *common.RetryPolicy { + return NewRetryPolicyWithMaxAttempts(uint(8)) +} + +// NewRetryPolicyWithMaxAttempts returns a RetryPolicy with the specified max retryAttempts +func NewRetryPolicyWithMaxAttempts(retryAttempts uint) *common.RetryPolicy { + isRetryableOperation := func(r common.OCIOperationResponse) bool { + return IsRetryable(r.Error) + } + + nextDuration := func(r common.OCIOperationResponse) time.Duration { + // you might want wait longer for next retry when your previous one failed + // this function will return the duration as: + // 1s, 2s, 4s, 8s, 16s, 32s, 64s etc... + return time.Duration(math.Pow(float64(2), float64(r.AttemptNumber-1))) * time.Second + } + + policy := common.NewRetryPolicy( + retryAttempts, isRetryableOperation, nextDuration, + ) + return &policy +} + +// Missing resource requests on kube-proxy +// Flannel missing priority + +func buildCSINodePod() *apiv1.Pod { + priority := scheduling.SystemCriticalPriority + return &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("csi-oci-node-%d", rand.Int63()), + Namespace: "kube-system", + Labels: map[string]string{ + "app": "csi-oci-node", + }, + }, + Spec: apiv1.PodSpec{ + Containers: []apiv1.Container{ + { + Image: "iad.ocir.io/oracle/cloud-provider-oci:latest", + }, + }, + Priority: &priority, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodRunning, + Conditions: []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + }, + }, + } +} + +func annotateNode(kubeClient kubernetes.Interface, nodeName string, key string, value string) error { + + if nodeName == "" { + return errors.New("node name is required") + } + if kubeClient == nil { + return errors.New("kubeconfig is required") + } + + node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err != nil { + klog.Errorf("failed to get node %s %+v", nodeName, err) + return err + } + + annotations := node.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + + if v := annotations[key]; v != value { + node.Annotations[key] = value + _, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to annotate node %s %+v", nodeName, err) + return err + } + klog.V(3).Infof("updated annotation %s=%s on node: %s", key, value, nodeName) + } + return nil +} + +func labelNode(kubeClient kubernetes.Interface, nodeName string, key string, value string) error { + + if nodeName == "" { + return errors.New("node name is required") + } + if kubeClient == nil { + return errors.New("kubeconfig is required") + } + + node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err != nil { + klog.Errorf("failed to get node %s %+v", nodeName, err) + return err + } + + labels := node.GetLabels() + if labels == nil { + labels = map[string]string{} + } + + if v := labels[key]; v != value { + node.Labels[key] = value + _, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to label node %s %+v", nodeName, err) + return err + } + klog.V(3).Infof("updated label %s=%s on node: %s", key, value, nodeName) + } + return nil +} + +func setNodeProviderID(kubeClient kubernetes.Interface, nodeName string, value string) error { + + if nodeName == "" { + return errors.New("node name is required") + } + if kubeClient == nil { + return errors.New("kubeconfig is required") + } + + node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + + if err != nil { + klog.Errorf("failed to get node %s %+v", nodeName, err) + return err + } + + if node.Spec.ProviderID != value { + node.Spec.ProviderID = value + _, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to update node's provider ID %s %+v", nodeName, err) + return err + } + klog.V(3).Infof("updated provider ID on node: %s", nodeName) + } + return nil +} diff --git a/cluster-autoscaler/cloudprovider/oci/oci_util_test.go b/cluster-autoscaler/cloudprovider/oci/oci_util_test.go new file mode 100644 index 000000000000..6de61894df34 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/oci/oci_util_test.go @@ -0,0 +1,12 @@ +package oci + +import ( + "testing" +) + +func TestSetProviderID(t *testing.T) { + err := setNodeProviderID(nil, "", "") + if err == nil { + t.Fatal("expected error") + } +} diff --git a/hack/boilerplate/boilerplate.py b/hack/boilerplate/boilerplate.py index d25a1a4564dd..f12dfeeffd4b 100755 --- a/hack/boilerplate/boilerplate.py +++ b/hack/boilerplate/boilerplate.py @@ -157,7 +157,8 @@ def file_extension(filename): "cluster-autoscaler/cloudprovider/digitalocean/godo", "cluster-autoscaler/cloudprovider/magnum/gophercloud", "cluster-autoscaler/cloudprovider/ionoscloud/ionos-cloud-sdk-go", - "cluster-autoscaler/cloudprovider/hetzner/hcloud-go"] + "cluster-autoscaler/cloudprovider/hetzner/hcloud-go", + "cluster-autoscaler/cloudprovider/oci"] # list all the files contain 'DO NOT EDIT', but are not generated skipped_ungenerated_files = ['hack/build-ui.sh', 'hack/lib/swagger.sh', diff --git a/hack/verify-spelling.sh b/hack/verify-spelling.sh index 63d7f5049fda..5a3a8189d49f 100755 --- a/hack/verify-spelling.sh +++ b/hack/verify-spelling.sh @@ -23,4 +23,4 @@ DIR=$(dirname $0) go install ${DIR}/../../../github.com/client9/misspell/cmd/misspell # Spell checking -git ls-files --full-name | grep -v -e vendor | grep -v cluster-autoscaler/cloudprovider/magnum/gophercloud| grep -v cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud-sdk-go-v3 | grep -v cluster-autoscaler/cloudprovider/digitalocean/godo | grep -v cluster-autoscaler/cloudprovider/hetzner/hcloud-go | grep -v cluster-autoscaler/cloudprovider/bizflycloud/gobizfly | grep -E -v 'cluster-autoscaler/cloudprovider/brightbox/(go-cache|gobrightbox|k8ssdk|linkheader)' | xargs misspell -error -o stderr +git ls-files --full-name | grep -v -e vendor | grep -v cluster-autoscaler/cloudprovider/magnum/gophercloud| grep -v cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud-sdk-go-v3 | grep -v cluster-autoscaler/cloudprovider/digitalocean/godo | grep -v cluster-autoscaler/cloudprovider/hetzner/hcloud-go | grep -v cluster-autoscaler/cloudprovider/bizflycloud/gobizfly | grep -v cluster-autoscaler/cloudprovider/oci/oci-go-sdk | grep -E -v 'cluster-autoscaler/cloudprovider/brightbox/(go-cache|gobrightbox|k8ssdk|linkheader)' | xargs misspell -error -o stderr