Skip to content

Commit aafaf9a

Browse files
authored
[SPK-277] Fault tolerance test 1 (apache#174)
* Renamed pnf to performance * Working fault tolerance 1 * Renamed for Pedro suggestions, and removing driver * Environmental variables
1 parent dd47865 commit aafaf9a

File tree

5 files changed

+119
-2
lines changed

5 files changed

+119
-2
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
package com.stratio.pnf.fault;
3+
4+
import com.stratio.qa.cucumber.testng.CucumberRunner;
5+
import com.stratio.spark.tests.utils.BaseTest;
6+
import cucumber.api.CucumberOptions;
7+
import org.testng.annotations.Test;
8+
9+
@CucumberOptions(features = {
10+
"src/test/resources/features/pnf/fault/executors-dropsdown.feature"
11+
})
12+
public class FT_SSD_001_ExecutorDropDown_IT extends BaseTest {
13+
14+
public FT_SSD_001_ExecutorDropDown_IT() {
15+
}
16+
17+
@Test(enabled = true, groups = {"FT_SSD_001_ExecutorDropDown"})
18+
public void kafkaCoverage() throws Exception {
19+
new CucumberRunner(this.getClass()).runCukes();
20+
}
21+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
@rest
2+
Feature: [Stability test] Executor Dropdowns
3+
4+
Background:
5+
6+
Given I open a ssh connection to '${DCOS_CLI_HOST}' with user 'root' and password 'stratio'
7+
#Check dispatcher and spark-coverage are deployed
8+
Then in less than '20' seconds, checking each '10' seconds, the command output 'dcos task | grep "${SPARK_FW_NAME}\." | grep R | wc -l' contains '1'
9+
Then in less than '20' seconds, checking each '10' seconds, the command output 'dcos task | grep spark-coverage | grep R | wc -l' contains '1'
10+
11+
#Obtain mesos master
12+
Given I open a ssh connection to '${DCOS_IP}' with user 'root' and password 'stratio'
13+
Given I run 'getent hosts leader.mesos | awk '{print $1}'' in the ssh connection and save the value in environment variable 'MESOS_MASTER'
14+
15+
#Clean all drivers from spark-dispatcher
16+
Then I clean all the drivers in the dispatcher with name '${SPARK_FW_NAME}' in dcos host '${CLUSTER_ID}.labs.stratio.com' with mesos master '!{MESOS_MASTER}:5050' with user 'admin' and password '1234'
17+
18+
Scenario:[Kafka Executor Dropdowns][01] Launch Kafka job, and kill executor to check if another is launched
19+
20+
#Now launch the work
21+
Given I set sso token using host '${CLUSTER_ID}.labs.stratio.com' with user 'admin' and password '1234'
22+
And I securely send requests to '${CLUSTER_ID}.labs.stratio.com:443'
23+
24+
When I send a 'POST' request to '/service/${SPARK_FW_NAME}/v1/submissions/create' based on 'schemas/pf/SparkCoverage/kafka_curl.json' as 'json' with:
25+
| $.appResource | UPDATE | http://spark-coverage.marathon.mesos:9000/jobs/kafka-${COVERAGE_VERSION}.jar | n/a |
26+
| $.sparkProperties['spark.jars'] | UPDATE | http://spark-coverage.marathon.mesos:9000/jobs/kafka-${COVERAGE_VERSION}.jar | n/a |
27+
| $.sparkProperties['spark.mesos.executor.docker.image'] | UPDATE | ${SPARK_DOCKER_IMAGE}:${STRATIO_SPARK_VERSION} | n/a |
28+
| $.appArgs[0] | UPDATE | gosec1.node.paas.labs.stratio.com:9092 | n/a |
29+
30+
Then the service response status must be '200' and its response must contain the text '"success" : true'
31+
32+
#Save the driver launched id
33+
Then I save the value from field in service response 'submissionId' in variable 'driverKafka'
34+
35+
#Wait until the executor is running
36+
Then in less than '200' seconds, checking each '10' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select((.name | contains("AT-kafka")) and (.id | contains("!{driverKafka}"))) | .tasks | .[] | .state' | grep "TASK_RUNNING" ' contains 'RUNNING'
37+
38+
#Get the host where the Executor is deployed
39+
Then I run 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select((.name | contains("AT-kafka")) and (.id | contains("!{driverKafka}"))) | .tasks | .[] |.slave_id' | tr -d '"'' in the ssh connection and save the value in environment variable 'SLAVE_ID'
40+
Then I run 'curl -s !{MESOS_MASTER}:5050/slaves | jq '.slaves[] | select(.id=="!{SLAVE_ID}") | .hostname' | tr -d '"'' in the ssh connection and save the value in environment variable 'EXECUTOR_IP'
41+
42+
#Ssh into the host, get Spark docker executor and kill them
43+
Then I run 'echo !{EXECUTOR_IP}' in the ssh connection
44+
Then I open a ssh connection to '!{EXECUTOR_IP}' with user 'root' and password 'stratio'
45+
46+
#Wait first for having a running docker
47+
Then I run 'docker ps | grep ${SPARK_DOCKER_IMAGE}:${STRATIO_SPARK_VERSION} | cut -d ' ' -f 1 | while read x; do cmd=$(docker inspect $x | jq '.[]|.Args'); echo $x $cmd; done | grep org.apache.spark.executor.CoarseGrainedExecutorBackend | cut -d ' ' -f 1' in the ssh connection and save the value in environment variable 'DOCKER_ID'
48+
Then I run 'docker rm -f !{DOCKER_ID}' in the ssh connection
49+
50+
#Check there are a TASK_FAILED (killed by us)
51+
Then in less than '200' seconds, checking each '10' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select((.name | contains("AT-kafka")) and (.id | contains("!{driverKafka}"))) | .completed_tasks | .[] | .state' | grep FAILED | wc -l' contains '1'
52+
53+
#Check a new Task has launched
54+
Then in less than '200' seconds, checking each '10' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select((.name | contains("AT-kafka")) and (.id | contains("!{driverKafka}"))) | .tasks | .[] | .state'' contains 'TASK_RUNNING'
55+
56+
#Check we are processing windows, getting an initial value, and seeing if it's being incremented after few seconds
57+
Given I open a ssh connection to '${DCOS_CLI_HOST}' with user 'root' and password 'stratio'
58+
Then I run 'dcos task log !{driverKafka} stdout --lines=1000000 | grep "###" | wc -l' in the ssh connection and save the value in environment variable 'PREVIOUS_WINDOW'
59+
Then in less than '100' seconds, checking each '10' seconds, the command output 'if [ $(dcos task log !{driverKafka} stdout --lines=1000000 | grep "###" | wc -l) -gt "!{PREVIOUS_WINDOW}" ]; then echo "true"; fi' contains 'true'
60+
61+
#Now kill the process
62+
#(We send a JSON because the step from cucumber, doesn't support empty posts submissions)
63+
Then I set sso token using host '${CLUSTER_ID}.labs.stratio.com' with user 'admin' and password '1234'
64+
Then I securely send requests to '${CLUSTER_ID}.labs.stratio.com:443'
65+
Then I send a 'POST' request to '/service/${SPARK_FW_NAME}/v1/submissions/kill/!{driverKafka}' based on 'schemas/pf/SparkCoverage/kafka_curl.json' as 'json' with:
66+
| $.appResource | UPDATE | http://spark-coverage.marathon.mesos:9000/jobs/kafka-${COVERAGE_VERSION}.jar | n/a |
67+
68+
Then the service response status must be '200' and its response must contain the text '"success" : true'
69+
70+
#Check exit is clean
71+
Then in less than '200' seconds, checking each '10' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select(.name == "${SPARK_FW_NAME}") | .completed_tasks | map(select(.name | contains ("AT-kafka"))) | map(select(.id == "!{driverKafka}")) | .[] | .state' | grep "TASK_KILLED" | wc -l' contains '1'
72+
73+
Then in less than '10' seconds, checking each '5' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select(.name == "${SPARK_FW_NAME}") | .completed_tasks | map(select(.name | contains ("AT-kafka"))) | map(select(.id == "!{driverKafka}")) | .[] | .statuses' | grep "TASK_RUNNING" | wc -l' contains '1'
74+
Then in less than '10' seconds, checking each '5' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select(.name == "${SPARK_FW_NAME}") | .completed_tasks | map(select(.name | contains ("AT-kafka"))) | map(select(.id == "!{driverKafka}")) | .[] | .statuses' | grep "TASK_FAILED" | wc -l' contains '0'
75+
Then in less than '10' seconds, checking each '5' seconds, the command output 'curl -s !{MESOS_MASTER}:5050/frameworks | jq '.frameworks[] | select(.name == "${SPARK_FW_NAME}") | .completed_tasks | map(select(.name | contains ("AT-kafka"))) | map(select(.id == "!{driverKafka}")) | .[] | .statuses' | grep "TASK_KILLED" | wc -l' contains '1'
76+
77+
78+

testsAT/src/test/resources/features/pnf/performance/performance-test.feature

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
@rest
2-
Feature: [Spark HDFS Coverage] HDFS Coverage tests
2+
Feature: [Spark Performance Tests] Spark performance tests
33

44
Background:
55
#Show parameters of execution

testsAT/testng-fault.xml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd" >
2+
3+
<suite name="Fault Tolerance Tests" thread-count="1" preserve-order="true">
4+
5+
<!-- First we launch the test that will don't touch calico networks-->
6+
<test name="fault_tolerance_set_withouth_managing_calico" preserve-order="true">
7+
<groups>
8+
<run>
9+
<include name="FT_SSD_001_ExecutorDropDown"/>
10+
</run>
11+
</groups>
12+
13+
<packages>
14+
<package name="com.stratio.pnf.fault.*" />
15+
</packages>
16+
</test>
17+
18+
</suite>

testsAT/testng-pnf.xml renamed to testsAT/testng-perf.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd" >
22

3-
<suite name="Full TestsAT in Nightlies" thread-count="1" preserve-order="true">
3+
<suite name="Full Performance tests" thread-count="1" preserve-order="true">
44

55
<test name="performance" preserve-order="true">
66
<groups>

0 commit comments

Comments
 (0)