Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

azurerm_kubernetes_cluster, azurerm_kubernetes_cluster_node_pool - support for the gpu_instance_profile property #23887

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,19 @@ func resourceKubernetesClusterNodePoolSchema() map[string]*pluginsdk.Schema {
ForceNew: true,
},

"gpu_instance": {
Type: pluginsdk.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice([]string{
string(agentpools.GPUInstanceProfileMIGOneg),
string(managedclusters.GPUInstanceProfileMIGTwog),
string(managedclusters.GPUInstanceProfileMIGThreeg),
string(managedclusters.GPUInstanceProfileMIGFourg),
string(managedclusters.GPUInstanceProfileMIGSeveng),
}, false),
},

"kubelet_disk_type": {
Type: pluginsdk.TypeString,
Optional: true,
Expand Down Expand Up @@ -468,6 +481,10 @@ func resourceKubernetesClusterNodePoolCreate(d *pluginsdk.ResourceData, meta int
Count: utils.Int64(int64(count)),
}

if gpuInstanceProfile := d.Get("gpu_instance").(string); gpuInstanceProfile != "" {
profile.GpuInstanceProfile = utils.ToPtr(agentpools.GPUInstanceProfile(gpuInstanceProfile))
}

if osSku := d.Get("os_sku").(string); osSku != "" {
profile.OsSKU = utils.ToPtr(agentpools.OSSKU(osSku))
}
Expand Down Expand Up @@ -845,6 +862,10 @@ func resourceKubernetesClusterNodePoolRead(d *pluginsdk.ResourceData, meta inter
d.Set("kubelet_disk_type", string(*v))
}

if v := props.GpuInstanceProfile; v != nil {
d.Set("gpu_instance", string(*v))
}

if props.CreationData != nil {
d.Set("snapshot_id", props.CreationData.SourceResourceId)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,21 @@ func TestAccKubernetesClusterNodePool_snapshotId(t *testing.T) {
})
}

func TestAccKubernetesClusterNodePool_gpuInstance(t *testing.T) {
data := acceptance.BuildTestData(t, "azurerm_kubernetes_cluster_node_pool", "test")
r := KubernetesClusterNodePoolResource{}

data.ResourceTest(t, r, []acceptance.TestStep{
{
Config: r.gpuInstance(data),
Check: acceptance.ComposeTestCheckFunc(
check.That(data.ResourceName).ExistsInAzure(r),
),
},
data.ImportStep(),
})
}

func (t KubernetesClusterNodePoolResource) Exists(ctx context.Context, clients *clients.Client, state *pluginsdk.InstanceState) (*bool, error) {
id, err := agentpools.ParseAgentPoolID(state.ID)
if err != nil {
Expand Down Expand Up @@ -2706,3 +2721,38 @@ resource "azurerm_kubernetes_cluster_node_pool" "test" {
}
`, data.Locations.Primary, data.RandomInteger, data.RandomString)
}

func (KubernetesClusterNodePoolResource) gpuInstance(data acceptance.TestData) string {
return fmt.Sprintf(`
provider "azurerm" {
features {}
}

resource "azurerm_resource_group" "test" {
name = "acctestRG-aks-%[2]d"
location = "%[1]s"
}

resource "azurerm_kubernetes_cluster" "test" {
name = "acctestaks%[2]d"
location = azurerm_resource_group.test.location
resource_group_name = azurerm_resource_group.test.name
dns_prefix = "acctestaks%[2]d"
default_node_pool {
name = "default"
node_count = 1
vm_size = "Standard_D2s_v3"
}
identity {
type = "SystemAssigned"
}
}

resource "azurerm_kubernetes_cluster_node_pool" "test" {
name = "internal"
kubernetes_cluster_id = azurerm_kubernetes_cluster.test.id
vm_size = "Standard_ND96asr_v4"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please use a smaller, less expensive vm. We don't need to spin up a vm with 96 vCPUs and 900 GB memory to test this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it! I've changed the vmsize.

gpu_instance = "MIG1g"
}
`, data.Locations.Primary, data.RandomInteger)
}
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,21 @@ func TestAccKubernetesCluster_snapshotId(t *testing.T) {
})
}

func TestAccKubernetesCluster_gpuInstance(t *testing.T) {
data := acceptance.BuildTestData(t, "azurerm_kubernetes_cluster", "test")
r := KubernetesClusterResource{}

data.ResourceTest(t, r, []acceptance.TestStep{
{
Config: r.gpuInstance(data),
Check: acceptance.ComposeTestCheckFunc(
check.That(data.ResourceName).ExistsInAzure(r),
),
},
data.ImportStep(),
})
}

func (KubernetesClusterResource) basicAvailabilitySetConfig(data acceptance.TestData) string {
return fmt.Sprintf(`
provider "azurerm" {
Expand Down Expand Up @@ -3103,3 +3118,34 @@ resource "azurerm_kubernetes_cluster" "test" {
}
`, data.Locations.Primary, data.RandomInteger, data.RandomString)
}

func (KubernetesClusterResource) gpuInstance(data acceptance.TestData) string {
return fmt.Sprintf(`
provider "azurerm" {
features {}
}

resource "azurerm_resource_group" "test" {
name = "acctestRG-aks-%[2]d"
location = "%[1]s"
}

resource "azurerm_kubernetes_cluster" "test" {
name = "acctestaks%[2]d"
location = azurerm_resource_group.test.location
resource_group_name = azurerm_resource_group.test.name
dns_prefix = "acctestaks%[2]d"

default_node_pool {
name = "default"
node_count = 1
vm_size = "Standard_ND96asr_v4"
gpu_instance = "MIG1g"
}

identity {
type = "SystemAssigned"
}
}
`, data.Locations.Primary, data.RandomInteger)
}
27 changes: 27 additions & 0 deletions internal/services/containers/kubernetes_nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,19 @@ func SchemaDefaultNodePool() *pluginsdk.Schema {
ForceNew: true,
},

"gpu_instance": {
Type: pluginsdk.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice([]string{
string(managedclusters.GPUInstanceProfileMIGOneg),
string(managedclusters.GPUInstanceProfileMIGTwog),
string(managedclusters.GPUInstanceProfileMIGThreeg),
string(managedclusters.GPUInstanceProfileMIGFourg),
string(managedclusters.GPUInstanceProfileMIGSeveng),
}, false),
},

"kubelet_disk_type": {
Type: pluginsdk.TypeString,
Optional: true,
Expand Down Expand Up @@ -916,6 +929,10 @@ func ConvertDefaultNodePoolToAgentPool(input *[]managedclusters.ManagedClusterAg
}
}

if defaultCluster.GpuInstanceProfile != nil {
agentpool.Properties.GpuInstanceProfile = utils.ToPtr(agentpools.GPUInstanceProfile(*defaultCluster.GpuInstanceProfile))
}

return agentpool
}

Expand Down Expand Up @@ -1046,6 +1063,10 @@ func ExpandDefaultNodePool(d *pluginsdk.ResourceData) (*[]managedclusters.Manage
profile.CapacityReservationGroupID = utils.String(capacityReservationGroupId)
}

if gpuInstanceProfile := raw["gpu_instance"].(string); gpuInstanceProfile != "" {
profile.GpuInstanceProfile = utils.ToPtr(managedclusters.GPUInstanceProfile(gpuInstanceProfile))
}

count := raw["node_count"].(int)
maxCount := raw["max_count"].(int)
minCount := raw["min_count"].(int)
Expand Down Expand Up @@ -1325,6 +1346,11 @@ func FlattenDefaultNodePool(input *[]managedclusters.ManagedClusterAgentPoolProf
enableHostEncryption = *agentPool.EnableEncryptionAtHost
}

gpuInstanceProfile := ""
if agentPool.GpuInstanceProfile != nil {
gpuInstanceProfile = string(*agentPool.GpuInstanceProfile)
}

maxCount := 0
if agentPool.MaxCount != nil {
maxCount = int(*agentPool.MaxCount)
Expand Down Expand Up @@ -1471,6 +1497,7 @@ func FlattenDefaultNodePool(input *[]managedclusters.ManagedClusterAgentPoolProf
"enable_host_encryption": enableHostEncryption,
"custom_ca_trust_enabled": customCaTrustEnabled,
"fips_enabled": enableFIPS,
"gpu_instance": gpuInstanceProfile,
"host_group_id": hostGroupID,
"kubelet_disk_type": kubeletDiskType,
"max_count": maxCount,
Expand Down
2 changes: 2 additions & 0 deletions website/docs/r/kubernetes_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ A `default_node_pool` block supports the following:

* `enable_node_public_ip` - (Optional) Should nodes in this Node Pool have a Public IP Address? `temporary_name_for_rotation` must be specified when changing this property.

* `gpu_instance` - (Optional) Specifies the GPU MIG instance profile for supported GPU VM SKU. The allowed values are `MIG1g`, `MIG2g`, `MIG3g`, `MIG4g` and `MIG7g`. Changing this forces a new resource to be created.

* `host_group_id` - (Optional) Specifies the ID of the Host Group within which this AKS Cluster should be created. Changing this forces a new resource to be created.

* `kubelet_config` - (Optional) A `kubelet_config` block as defined below. `temporary_name_for_rotation` must be specified when changing this block.
Expand Down
2 changes: 2 additions & 0 deletions website/docs/r/kubernetes_cluster_node_pool.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ The following arguments are supported:

~> **Note:** FIPS support is in Public Preview - more information and details on how to opt into the Preview can be found in [this article](https://docs.microsoft.com/azure/aks/use-multiple-node-pools#add-a-fips-enabled-node-pool-preview).

* `gpu_instance` - (Optional) Specifies the GPU MIG instance profile for supported GPU VM SKU. The allowed values are `MIG1g`, `MIG2g`, `MIG3g`, `MIG4g` and `MIG7g`. Changing this forces a new resource to be created.

* `kubelet_disk_type` - (Optional) The type of disk used by kubelet. Possible values are `OS` and `Temporary`.

* `max_pods` - (Optional) The maximum number of pods that can run on each agent. Changing this forces a new resource to be created.
Expand Down