Skip to content

Commit

Permalink
test: configurable stability iterations timeout (#4022)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackfrancis authored Nov 12, 2020
1 parent 0e07cf9 commit e780eeb
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 14 deletions.
8 changes: 8 additions & 0 deletions test/e2e/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ fi
if [ "$STABILITY_ITERATIONS" == "" ]; then
STABILITY_ITERATIONS=3
fi
if [ "$STABILITY_TIMEOUT_SECONDS" == "" ]; then
STABILITY_TIMEOUT_SECONDS=5
fi

if [ -n "$ADD_NODE_POOL_INPUT" ]; then
cat > ${TMP_DIR}/addpool-input.json <<END
Expand Down Expand Up @@ -182,6 +185,7 @@ docker run --rm \
-e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \
-e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \
-e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \
-e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \
-e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \
-e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \
-e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \
Expand Down Expand Up @@ -324,6 +328,7 @@ if [ -n "$ADD_NODE_POOL_INPUT" ]; then
-e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \
-e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \
-e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \
-e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \
-e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \
-e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \
-e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \
Expand Down Expand Up @@ -440,6 +445,7 @@ if [ "${SCALE_CLUSTER}" = "true" ]; then
-e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \
-e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \
-e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \
-e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \
-e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \
-e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \
-e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \
Expand Down Expand Up @@ -531,6 +537,7 @@ if [ "${UPGRADE_CLUSTER}" = "true" ]; then
-e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \
-e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \
-e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \
-e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \
-e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \
-e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \
-e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \
Expand Down Expand Up @@ -611,6 +618,7 @@ if [ "${SCALE_CLUSTER}" = "true" ]; then
-e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \
-e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \
-e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \
-e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \
-e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \
-e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \
-e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \
Expand Down
1 change: 1 addition & 0 deletions test/e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type Config struct {
CleanUpIfFail bool `envconfig:"CLEANUP_IF_FAIL" default:"false"`
RetainSSH bool `envconfig:"RETAIN_SSH" default:"true"`
StabilityIterations int `envconfig:"STABILITY_ITERATIONS" default:"3"`
StabilityTimeoutSeconds int `envconfig:"STABILITY_TIMEOUT_SECONDS" default:"5"`
ClusterInitPodName string `envconfig:"CLUSTER_INIT_POD_NAME" default:""`
ClusterInitJobName string `envconfig:"CLUSTER_INIT_JOB_NAME" default:""`
Timeout time.Duration `envconfig:"TIMEOUT" default:"20m"`
Expand Down
20 changes: 8 additions & 12 deletions test/e2e/kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,10 @@ var _ = BeforeSuite(func() {
if hasAddon, _ := eng.HasAddon("coredns"); hasAddon {
dnsAddonName = common.CoreDNSAddonName
}
stabilityCommandTimeout = 3 * time.Second
if eng.ExpandedDefinition.Properties.OrchestratorProfile.KubernetesConfig.NetworkPolicy == api.NetworkPolicyCalico {
stabilityCommandTimeout = 10 * time.Second
} else if eng.ExpandedDefinition.Properties.OrchestratorProfile.KubernetesConfig.NetworkPolicy == api.NetworkPolicyAzure {
stabilityCommandTimeout = 15 * time.Second
}
Expect(dnsAddonName).NotTo(Equal(""))

stabilityCommandTimeout = time.Duration(cfg.StabilityTimeoutSeconds) * time.Second

if !cfg.IsCustomCloudProfile() {
env, err = azure.EnvironmentFromName("AzurePublicCloud") // TODO get this programmatically
if err != nil {
Expand Down Expand Up @@ -895,7 +891,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu

By("Ensuring that we have stable external DNS resolution as we recycle a bunch of pods")
name := fmt.Sprintf("alpine-%s", cfg.Name)
command := fmt.Sprintf("nc -vz bbc.co.uk 80 || nc -vz google.com 443 || nc -vz microsoft.com 80")
command := fmt.Sprintf("time nc -vz bbc.co.uk 80 || nc -vz google.com 443 || nc -vz microsoft.com 80")
deploymentCommand := fmt.Sprintf("%s && while true; do sleep 1; done || echo unable to make external connections or resolve dns", command)
// Ensure across all nodes
successes, err := deployment.RunDeploymentMultipleTimes(deployment.RunLinuxDeploy, "alpine", name, deploymentCommand, deploymentReplicasCount, cfg.StabilityIterations, 1*time.Second, timeoutWhenWaitingForPodOutboundAccess, cfg.Timeout)
Expand Down Expand Up @@ -951,7 +947,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
It("should have stable external container networking as we recycle a bunch of pods", func() {
// Test for basic UDP networking
name := fmt.Sprintf("alpine-%s", cfg.Name)
command := fmt.Sprintf("nc -vz 8.8.8.8 53 || nc -vz 8.8.4.4 53")
command := fmt.Sprintf("time nc -vz 8.8.8.8 53 || nc -vz 8.8.4.4 53")
deploymentCommand := fmt.Sprintf("%s && while true; do sleep 1; done || echo unable to connect externally against known listeners", command)
// Ensure across all nodes
successes, err := deployment.RunDeploymentMultipleTimes(deployment.RunLinuxDeploy, "alpine", name, deploymentCommand, deploymentReplicasCount, cfg.StabilityIterations, 1*time.Second, timeoutWhenWaitingForPodOutboundAccess, cfg.Timeout)
Expand All @@ -964,7 +960,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu

// Use curl to test responsive DNS lookup + TCP 443 connectivity
name = fmt.Sprintf("alpine-%s", cfg.Name)
command = fmt.Sprintf("curl --head https://www.bing.com 1> /dev/null || curl --head https://google.com 1> /dev/null || curl --head https://microsoft.com 1> /dev/null")
command = fmt.Sprintf("time curl --head https://www.bing.com 1> /dev/null || curl --head https://google.com 1> /dev/null || curl --head https://microsoft.com 1> /dev/null")
deploymentCommand = fmt.Sprintf("%s && while true; do sleep 1; done || echo unable to curl externally against known endpoints", command)
// Ensure across all nodes
successes, err = deployment.RunDeploymentMultipleTimes(deployment.RunLinuxDeploy, "byrnedo/alpine-curl", name, deploymentCommand, deploymentReplicasCount, cfg.StabilityIterations, 1*time.Second, timeoutWhenWaitingForPodOutboundAccess, cfg.Timeout)
Expand All @@ -978,8 +974,8 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu

It("should have stable internal container networking as we recycle a bunch of pods", func() {
name := fmt.Sprintf("alpine-%s", cfg.Name)
command := fmt.Sprintf("nc -vz kubernetes 443 && nc -vz kubernetes.default.svc 443 && nc -vz kubernetes.default.svc.cluster.local 443")
deploymentCommand := fmt.Sprintf("%s && while true; do sleep 1; done || echo unable to reach internal kubernetes endpoints", command)
command := fmt.Sprintf("time nc -vz kubernetes 443 && nc -vz kubernetes.default.svc 443 && nc -vz kubernetes.default.svc.cluster.local 443")
deploymentCommand := fmt.Sprintf("time %s && while true; do sleep 1; done || echo unable to reach internal kubernetes endpoints", command)
// Ensure across all nodes
successes, err := deployment.RunDeploymentMultipleTimes(deployment.RunLinuxDeploy, "alpine", name, deploymentCommand, deploymentReplicasCount, cfg.StabilityIterations, 1*time.Second, timeoutWhenWaitingForPodOutboundAccess, cfg.Timeout)
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -1012,7 +1008,7 @@ var _ = Describe("Azure Container Cluster using the Kubernetes Orchestrator", fu
Expect(err).NotTo(HaveOccurred())
By("Creating another pod that will connect to the php-apache pod")
r := rand.New(rand.NewSource(time.Now().UnixNano()))
commandString := fmt.Sprintf("nc -vz %s.default.svc.cluster.local 80", longRunningApacheDeploymentName)
commandString := fmt.Sprintf("time nc -vz %s.default.svc.cluster.local 80", longRunningApacheDeploymentName)
consumerPodName := fmt.Sprintf("consumer-pod-%s-%v", cfg.Name, r.Intn(99999))
deploymentCommand := fmt.Sprintf("%s && while true; do sleep 1; done || echo unable to connect to in-cluster web listener", commandString)
// Ensure across all nodes
Expand Down
8 changes: 8 additions & 0 deletions test/e2e/kubernetes/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,14 @@ func WaitOnTerminated(name, namespace, containerName string, sleep, containerExe
}
duration := t2.Sub(t1)
if duration >= containerExecutionTimeout {
err := pod.Logs()
if err != nil {
log.Printf("Unable to print pod logs for pod %s: %s", pod.Metadata.Name, err)
}
err = pod.Describe()
if err != nil {
log.Printf("Unable to describe pod %s: %s", pod.Metadata.Name, err)
}
return false, errors.Errorf("execution time %s is greater than timeout %s\n", duration.String(), containerExecutionTimeout.String())
}
return true, nil
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/test_cluster_configs/network_policy/azure.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
{
"env": {
"STABILITY_TIMEOUT_SECONDS": "15"
},
"apiModel": {
"apiVersion": "vlabs",
"properties": {
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/test_cluster_configs/network_policy/calico.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"env": {
"STABILITY_ITERATIONS": "0"
"STABILITY_ITERATIONS": "0",
"STABILITY_TIMEOUT_SECONDS": "10"
},
"apiModel": {
"apiVersion": "vlabs",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"env": {
"CREATE_VNET": true,
"STABILITY_ITERATIONS": "0"
"STABILITY_ITERATIONS": "0",
"STABILITY_TIMEOUT_SECONDS": "10"
},
"apiModel": {
"apiVersion": "vlabs",
Expand Down

0 comments on commit e780eeb

Please sign in to comment.