@@ -9,6 +9,9 @@ set :shared_conf_path, "/u/apps/spark/shared/conf"
99set :spark_jar_path , "hdfs://hadoop-production/user/sparkles"
1010set :gateway , nil
1111set :keep_releases , 5
12+ set :branch , fetch ( :branch , `git symbolic-ref --short HEAD` . gsub ( "\s " , "" ) )
13+ # turns out that fetch(:sha), when combined with packserv, will show only the latest sha on packserv
14+ set :local_sha , `git rev-parse HEAD` . rstrip
1215
1316DATANODES = ( 2 ..47 ) . map { |i | "dn%02d.chi.shopify.com" % i }
1417OTHERNODES = [ "hadoop-etl1.chi.shopify.com" , "spark-etl1.chi.shopify.com" , "reports-reportify-etl3.chi.shopify.com" , "reports-reportify-skydb4.chi.shopify.com" , "platfora2.chi.shopify.com" ]
@@ -41,11 +44,22 @@ namespace :deploy do
4144 end
4245
4346 task :upload_to_hdfs , :roles => :uploader , :on_no_matching_servers => :continue do
44- run "hdfs dfs -copyFromLocal -f #{ release_path } /lib/spark-assembly-*.jar hdfs://hadoop-production/user/sparkles/spark-assembly-#{ fetch ( :sha ) } .jar"
47+ raw_binary_path = "./assembly/target/scala-2.10/spark-assembly-1.3.0-SNAPSHOT-hadoop2.5.0.jar"
48+ modified_binary_path = "./lib/spark-assembly-#{ fetch ( :local_sha ) } .jar"
49+ if fetch ( :branch ) == "master"
50+ run "hdfs dfs -copyFromLocal -f #{ release_path } /lib/spark-assembly-*.jar hdfs://hadoop-production/user/sparkles/spark-assembly-#{ fetch ( :sha ) } .jar"
51+ else
52+ unless File . exist? ( modified_binary_path )
53+ system ( "mvn package -DskipTests -Phadoop-2.4 -Dhadoop.version=2.5.0 -Pyarn -Phive" )
54+ system ( "mv #{ raw_binary_path } #{ modified_binary_path } " )
55+ end
56+ system ( "hdfs dfs -copyFromLocal #{ modified_binary_path } hdfs://nn01.chi.shopify.com/user/sparkles" )
57+ end
4558 end
4659
4760 task :test_spark_jar , :roles => :uploader , :on_no_master_servers => :continue do
48- run "sudo -u azkaban sh -c '. /u/virtualenvs/starscream/bin/activate && cd /u/apps/starscream/current && PYTHON_ENV=production SPARK_OPTS=\" spark.yarn.jar=hdfs://hadoop-production/user/sparkles/spark-assembly-#{ fetch ( :sha ) } .jar\" exec python shopify/tools/canary.py'"
61+ spark_yarn_jar_sha = fetch ( :branch ) == "master" ? fetch ( :sha ) : fetch ( :local_sha )
62+ run "sudo -u azkaban sh -c '. /u/virtualenvs/starscream/bin/activate && cd /u/apps/starscream/current && PYTHON_ENV=production SPARK_OPTS=\" spark.yarn.jar=hdfs://hadoop-production/user/sparkles/spark-assembly-#{ spark_yarn_jar_sha } .jar\" exec python shopify/tools/canary.py'"
4963 end
5064
5165 task :prevent_gateway do
0 commit comments