Skip to content

Commit efbfadb

Browse files
Merge pull request #1097 from RevathiKotla/release-3.3.1
Fix Cluster partition submit script
2 parents 2addaeb + fba4caf commit efbfadb

File tree

3 files changed

+10
-9
lines changed

3 files changed

+10
-9
lines changed

ansible/roles/spark-cluster-data-products-submit/defaults/main.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ spark_cluster:
2626
driver_memory: 7g
2727
memory_fraction: 0.3
2828
storage_fraction: 0.5
29-
executor_core: 3
30-
executor_memory: 12G
31-
num_executors: 5
29+
executor_core: 5
30+
executor_memory: 19G
31+
num_executors: 23
3232

3333
analytics:
3434
home: "/tmp"
@@ -39,4 +39,4 @@ sink_topic: "{{ env }}.telemetry.sink"
3939
druid_broker_host: "{{groups['raw-broker'][0]}}"
4040
producer_env: "dev.sunbird"
4141
spark_output_temp_dir: /mount/data/analytics/tmp/
42-
reports_container: "reports"
42+
reports_container: "reports"

ansible/roles/spark-cluster-data-products-submit/templates/cluster-config.json.j2

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"spark.sql.autoBroadcastJoinThreshold" : "-1",
1919
"spark.dynamicAllocation.enabled" :"true",
2020
"spark.shuffle.service.enabled" :"true",
21+
"spark.scheduler.mode" : "FAIR",
2122
"spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{dp_azure_account_name}} -Dreports_storage_secret={{dp_vault_azure_account_key}}",
2223
"spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{dp_azure_account_name}} -Dreports_storage_secret={{dp_vault_azure_account_key}}"
2324
}
24-
}
25+
}

ansible/roles/spark-cluster-data-products-submit/templates/submit-script.j2

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,17 @@ if [ "$mode" = "via-partition" ]; then
6767
for i in $(seq 0 $parallelisation $endPartitions)
6868
do
6969
# add partitions to config
70-
partitionString="\"delta\":0,\"partitions\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]"
70+
partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]"
7171
if [ -z "$start_date" ]; then
7272
job_config=$(config $job)
73-
finalConfig=${job_config/'"delta":0'/$partitionString}
73+
finalConfig=${job_config/'\"delta\":0'/$partitionString}
7474
echo $finalConfig
7575
echo "Running $job by partitions."
7676
classVariable="org.ekstep.analytics.job.JobExecutor"
7777
argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]"
7878
else
7979
job_config=$(config $job '__endDate__')
80-
finalConfig=${job_config/'"delta":0'/$partitionString}
80+
finalConfig=${job_config/'\"delta\":0'/$partitionString}
8181
echo $finalConfig
8282
echo "Running $job by partitions via Replay-Supervisor."
8383
classVariable="org.ekstep.analytics.job.ReplaySupervisor"
@@ -109,4 +109,4 @@ else
109109
finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable}
110110
echo $finalRequestBody
111111
curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}"
112-
fi
112+
fi

0 commit comments

Comments
 (0)