Skip to content

Commit

Permalink
Run Arm64 nodepool to e2e test runs (dapr#5111)
Browse files Browse the repository at this point in the history
* Add arm cluster

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* fix typos

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* test pipeline

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* syntax

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* syntax

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* syntax

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* syntax

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* syntax

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>

* fixes

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* change vm

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* fix arm prefix

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* undo del

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* change image caching key

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* Support building e2e apps for arm64

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* fix expression

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* Change region logic

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* change region randomness

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* add switch statement

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

* Update messages fro arm

Signed-off-by: Addison Juarez <adjuarez@microsoft.com>

Signed-off-by: Addison Juarez <ajuarez@microsoft.com>
Signed-off-by: Addison Juarez <adjuarez@microsoft.com>
Co-authored-by: Addison Juarez <ajuarez@microsoft.com>
Co-authored-by: Artur Souza <artursouza.ms@outlook.com>
  • Loading branch information
3 people authored Sep 15, 2022
1 parent 2b39f26 commit c6b1067
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 12 deletions.
16 changes: 13 additions & 3 deletions .build-tools/cmd/zz-e2e-perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func (c *cmdE2EPerf) getCachedImage() (string, error) {
}

// If cache is enable, try pulling from cache first
cachedImage := fmt.Sprintf("%s/%s-%s:%s-%s", c.flags.CacheRegistry, c.cmdType, c.flags.Name, c.flags.TargetOS, hashDir)
cachedImage := fmt.Sprintf("%s/%s-%s:%s-%s-%s", c.flags.CacheRegistry, c.cmdType, c.flags.Name, c.flags.TargetOS, c.flags.TargetArch, hashDir)
return cachedImage, nil
}

Expand Down Expand Up @@ -305,12 +305,22 @@ func (c *cmdE2EPerf) buildDockerImage(cachedImage string) error {

// Build the Docker image
fmt.Printf("Building Docker image: %s\n", destImage)
e := exec.Command("docker",
args := []string{
"build",
"-f", dockerfile,
"-t", destImage,
filepath.Join(appDir, c.flags.Name, "."),
)
}
switch c.flags.TargetArch {
case "arm64":
args = append(args, "--platform", c.flags.TargetOS+"/arm64/v8")
case "amd64":
args = append(args, "--platform", c.flags.TargetOS+"/amd64")
default:
args = append(args, "--platform", c.flags.TargetOS+"/amd64")
}
e := exec.Command("docker", args...)

e.Stdout = os.Stdout
e.Stderr = os.Stderr
err = e.Run()
Expand Down
37 changes: 28 additions & 9 deletions .github/workflows/dapr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ on:
# Dispatch on external events
repository_dispatch:
types: [e2e-test]

env:
# Version of Go
GOVER: "1.19"
Expand All @@ -50,6 +49,7 @@ env:
HA_MODE: true
# Space-separated of supported Azure regions: one will be picked randomly for each cluster
AZURE_REGIONS: "westus3"
AZURE_ARM_REGIONS: "eastus"
# Container registry where to cache e2e test images
DAPR_CACHE_REGISTRY: "dapre2eacr.azurecr.io"
# Whether to collect TCP dumps
Expand Down Expand Up @@ -120,22 +120,22 @@ jobs:
- name: Deploy the test cluster
if: env.TEST_PREFIX != ''
run: |
# Select two random Azure regions
# Select one random Azure region
REGIONS=(${{ env.AZURE_REGIONS }})
REGIONS_SIZE=${#REGIONS[@]}
REGIONS_IDX=$(($RANDOM % $REGIONS_SIZE))
REGION1=${REGIONS[$REGIONS_IDX]}
echo "REGION1=${REGION1}" >> $GITHUB_ENV
REGIONS_IDX=$(($RANDOM % $REGIONS_SIZE))
REGION2=${REGIONS[$REGIONS_IDX]}
echo "REGION2=${REGION2}" >> $GITHUB_ENV
echo "Deploying to Azure regions: Linux=${REGION1} Windows=${REGION2}"
REGION3=${{ env.AZURE_ARM_REGIONS }}
echo "REGION3=${REGION3}" >> $GITHUB_ENV
echo "Deploying to Azure regions: Linux_amd64=${REGION1} Windows=${REGION2} Linux_arm64=${REGION3}"
# Tags
DATE_TAG=$(date --iso-8601=seconds)
echo "Tags: date=${DATE_TAG}"
# Deploy both Linux and Windows cluster
# Deploy Linux arm64/amd64 and Windows clusters
# Retry the deployment twice in case of transient failures (such as capacity constraints)
success=false
for i in 1 2 3; do
Expand All @@ -147,6 +147,7 @@ jobs:
namePrefix="${{ env.TEST_PREFIX }}" \
location1=${REGION1} \
location2=${REGION2} \
location3=${REGION3} \
dateTag="${DATE_TAG}" \
diagLogAnalyticsWorkspaceResourceId="${{ secrets.AZURE_DIAG_LOG_ANALYTICS_WORKSPACE_ID }}" \
diagStorageResourceId="${{ secrets.AZURE_DIAG_STORAGE_ID }}" \
Expand All @@ -172,6 +173,7 @@ jobs:
| --- | --- | --- |
| Linux | `Dapr-E2E-${{ env.TEST_PREFIX }}l` | ${{ env.REGION1 }} |
| Windows | `Dapr-E2E-${{ env.TEST_PREFIX }}w` | ${{ env.REGION2 }} |
| Linux_arm64 | `Dapr-E2E-${{ env.TEST_PREFIX }}la` | ${{ env.REGION3 }} |
- name: Update PR comment for failure
if: failure() && env.PR_NUMBER != ''
uses: marocchino/sticky-pull-request-comment@v2.2.0
Expand All @@ -187,11 +189,12 @@ jobs:
| --- | --- | --- |
| Linux | `Dapr-E2E-${{ env.TEST_PREFIX }}l` | ${{ env.REGION1 }} |
| Windows | `Dapr-E2E-${{ env.TEST_PREFIX }}w` | ${{ env.REGION2 }} |
| Linux_arm64 | `Dapr-E2E-${{ env.TEST_PREFIX }}la` | ${{ env.REGION3 }} |
Please check the logs for details on the failure.
build:
name: Build for ${{ matrix.target_os }}
name: Build for ${{ matrix.target_os }} on ${{ matrix.target_arch }}
runs-on: ${{ matrix.os }}
env:
GOOS: ${{ matrix.target_os }}
Expand All @@ -208,6 +211,9 @@ jobs:
target_os: linux
- os: windows-2019
target_os: windows
- target_arch: arm64
target_os: linux
os: ubuntu-latest
steps:
- name: Set up for scheduled test
if: github.event_name != 'repository_dispatch'
Expand Down Expand Up @@ -286,6 +292,8 @@ jobs:
TEST_PREFIX=""
if [ "${{ env.TARGET_OS }}" == "windows" ] ; then
TEST_PREFIX="dapre2e${SUFFIX}w"
elif [ "${{ env.TARGET_ARCH }}" == "arm64" ] ; then
TEST_PREFIX="dapre2e${SUFFIX}la"
else
TEST_PREFIX="dapre2e${SUFFIX}l"
fi
Expand Down Expand Up @@ -372,7 +380,7 @@ jobs:
Please check the logs for details on the error.
test-e2e:
name: End-to-end ${{ matrix.target_os }} tests
name: End-to-end ${{ matrix.target_os }} on ${{ matrix.target_arch }} tests
needs:
- build
- deploy-infrastructure
Expand All @@ -387,7 +395,10 @@ jobs:
fail-fast: false
matrix:
target_os: [linux, windows]
target_arch: [amd64]
target_arch: [amd64, arm64]
exclude:
- target_os: windows
target_arch: arm64
steps:
- name: Set up log paths
run: |
Expand Down Expand Up @@ -458,6 +469,8 @@ jobs:
TEST_PREFIX=""
if [ "${{ env.TARGET_OS }}" == "windows" ] ; then
TEST_PREFIX="dapre2e${SUFFIX}w"
elif [ "${{ env.TARGET_ARCH }}" == "arm64" ] ; then
TEST_PREFIX="dapre2e${SUFFIX}la"
else
TEST_PREFIX="dapre2e${SUFFIX}l"
fi
Expand Down Expand Up @@ -632,3 +645,9 @@ jobs:
echo "Starting removal of resource group Dapr-E2E-${{ env.TEST_PREFIX }}w"
az group delete --no-wait --yes --name "Dapr-E2E-${{ env.TEST_PREFIX }}w" || true
shell: bash
- name: Delete Arm64 cluster
run: |
# We are not waiting for these commands to complete, and we're ignoring errors
echo "Starting removal of resource group Dapr-E2E-${{ env.TEST_PREFIX }}la"
az group delete --no-wait --yes --name "Dapr-E2E-${{ env.TEST_PREFIX }}la" || true
shell: bash
28 changes: 28 additions & 0 deletions tests/test-infra/azure-aks.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ param namePrefix string
@description('The location of the resources')
param location string = resourceGroup().location

@description('If enabled, add a ARM64 pool')
param enableArm bool = false

@description('If enabled, add a Windows pool')
param enableWindows bool = false

Expand All @@ -26,6 +29,9 @@ param linuxVMSize string = 'Standard_DS2_v2'
@description('VM size to use for Windows nodes, if enabled')
param windowsVMSize string = 'Standard_DS3_v2'

@description('VM size to use for ARM64 nodes if enabled')
param armVMSize string = 'Standard_D2ps_v5'

@description('If set, sends certain diagnostic logs to Log Analytics')
param diagLogAnalyticsWorkspaceResourceId string = ''

Expand Down Expand Up @@ -126,6 +132,28 @@ resource aks 'Microsoft.ContainerService/managedClusters@2021-07-01' = {
vnetSubnetID: aksVNet::defaultSubnet.id
tags: {}
}
] : [] , enableArm ? [
{
name: 'armpol'
osDiskSizeGB: osDiskSizeGB
enableAutoScaling: false
count: 2
vmSize: armVMSize
osType: 'Linux'
type: 'VirtualMachineScaleSets'
mode: 'User'
maxPods: 110
availabilityZones: [
'1'
'2'
'3'
]
nodeLabels: {}
nodeTaints: []
enableNodePublicIP: false
vnetSubnetID: enableWindows ? aksVNet::defaultSubnet.id : null
tags: {}
}
] : [])
networkProfile: union({
loadBalancerSku: 'standard'
Expand Down
28 changes: 28 additions & 0 deletions tests/test-infra/azure-all.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ param location1 string
@description('The location of the second set of resources')
param location2 string

@description('The location of the third set of resources')
param location3 string

@description('Optional value for the date tag for resource groups')
param dateTag string = ''

Expand Down Expand Up @@ -56,6 +59,7 @@ module linuxCluster 'azure.bicep' = {
namePrefix: '${namePrefix}l'
location: location1
enableWindows: false
enableArm : false
diagLogAnalyticsWorkspaceResourceId: diagLogAnalyticsWorkspaceResourceId
diagStorageResourceId: diagStorageResourceId
enableCosmosDB: enableCosmosDB
Expand All @@ -78,9 +82,33 @@ module windowsCluster 'azure.bicep' = {
namePrefix: '${namePrefix}w'
location: location2
enableWindows: true
enableArm : false
diagLogAnalyticsWorkspaceResourceId: diagLogAnalyticsWorkspaceResourceId
diagStorageResourceId: diagStorageResourceId
enableCosmosDB: enableCosmosDB
enableServiceBus: enableServiceBus
}
}

// Deploy the Arm cluster in the third location
resource ArmResources 'Microsoft.Resources/resourceGroups@2020-10-01' = {
name: 'Dapr-E2E-${namePrefix}la'
location: location3
tags: dateTag != '' ? {
date: dateTag
} : {}
}
module armCluster 'azure.bicep' = {
name: 'armCluster'
scope: ArmResources
params: {
namePrefix: '${namePrefix}la'
location: location3
enableWindows: false
enableArm : true
diagLogAnalyticsWorkspaceResourceId: diagLogAnalyticsWorkspaceResourceId
diagStorageResourceId: diagStorageResourceId
enableCosmosDB: enableCosmosDB
enableServiceBus: enableServiceBus
}
}
4 changes: 4 additions & 0 deletions tests/test-infra/azure.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ param location string = resourceGroup().location
@description('If enabled, add a Windows pool')
param enableWindows bool = false

@description('If enabled, add a ARM64 pool')
param enableArm bool = false

@description('If set, sends certain diagnostic logs to Log Analytics')
param diagLogAnalyticsWorkspaceResourceId string = ''

Expand All @@ -42,6 +45,7 @@ module aksModule './azure-aks.bicep' = {
namePrefix: namePrefix
location: location
enableWindows: enableWindows
enableArm: enableArm
diagLogAnalyticsWorkspaceResourceId: diagLogAnalyticsWorkspaceResourceId
diagStorageResourceId: diagStorageResourceId
}
Expand Down

0 comments on commit c6b1067

Please sign in to comment.