@@ -43,21 +43,26 @@ import org.apache.spark.rpc.RpcAddress
43
43
import scala .collection .mutable
44
44
45
45
object CoarseCookSchedulerBackend {
46
- def fetchUri (uri : String ): String =
47
- Option (URI .create(uri).getScheme).map(_.toLowerCase) match {
48
- case Some (" http" ) => s " curl -O $uri"
49
- case Some (" spark-rsync" ) =>
50
- val regex = " ^spark-rsync://" .r
51
- val cleanURI = regex.replaceFirstIn(uri, " " )
52
- " RSYNC_CONNECT_PROG=" + " \" " + " knc spark-rsync@%H $SPARK_DRIVER_PULL_PORT" + " \" " +
53
- s " rsync $$ SPARK_DRIVER_PULL_HOST::spark/ ${cleanURI} ./ "
54
- case Some (" hdfs" ) => s " $$ HADOOP_COMMAND fs -copyToLocal $uri . "
55
- case Some (" rover" ) =>
56
- val storage = " /opt/ts/services/storage-client.ts_storage_client/bin/storage"
57
- s " $storage -X-Dtwosigma.logdir= $$ {MESOS_SANDBOX} cp $uri . "
58
- case None | Some (" file" ) => s " cp $uri . "
59
- case Some (x) => sys.error(s " $x not supported yet " )
60
- }
46
+
47
+ // A collection of regexes for extracting information from an URI
48
+ private val HTTP_URI_REGEX = """ http://(.*)""" .r
49
+ private val RSYNC_URI_REGEX = """ rsync://(.*)""" .r
50
+ private val SPARK_RSYNC_URI_REGEX = """ spark-rsync://(.*)""" .r
51
+ private val HDFS_URI_REGEX = """ hdfs://(.*)""" .r
52
+
53
+ private [spark] def fetchURI (uri : String ): String = uri.toLowerCase match {
54
+ case HTTP_URI_REGEX (httpURI) =>
55
+ s " curl -O http:// $httpURI"
56
+ case RSYNC_URI_REGEX (file) =>
57
+ s " rsync $file ./ "
58
+ case SPARK_RSYNC_URI_REGEX (file) =>
59
+ " RSYNC_CONNECT_PROG=\" knc spark-rsync@%H $SPARK_DRIVER_PULL_PORT\" " +
60
+ s " rsync $$ SPARK_DRIVER_PULL_HOST::spark/ $file ./ "
61
+ case HDFS_URI_REGEX (file) =>
62
+ s " $$ HADOOP_COMMAND fs -copyToLocal hdfs:// $file . "
63
+ case _ =>
64
+ sys.error(s " $uri not supported yet " )
65
+ }
61
66
62
67
def apply (scheduler : TaskSchedulerImpl , sc : SparkContext , cookHost : String ,
63
68
cookPort : Int ): CoarseGrainedSchedulerBackend = {
@@ -176,7 +181,7 @@ class CoarseCookSchedulerBackend(
176
181
override def applicationAttemptId (): Option [String ] = Some (applicationId())
177
182
178
183
def createJob (numCores : Double ): Job = {
179
- import CoarseCookSchedulerBackend .fetchUri
184
+ import CoarseCookSchedulerBackend .fetchURI
180
185
181
186
val jobId = UUID .randomUUID()
182
187
executorUUIDWriter(jobId)
@@ -215,20 +220,20 @@ class CoarseCookSchedulerBackend(
215
220
216
221
val keystoreUri = conf.getOption(" spark.executor.keyStoreFilename" )
217
222
val keystorePull = keystoreUri.map { uri =>
218
- s " ${fetchUri (uri)} && mv $$ (basename $uri) spark-executor-keystore "
223
+ s " ${fetchURI (uri)} && mv $$ (basename $uri) spark-executor-keystore "
219
224
}
220
225
221
226
val urisCommand =
222
227
uriValues.map { uri =>
223
- s " [ ! -e $$ (basename $uri) ] && ${fetchUri (uri)} && tar -xvzf $$ (basename $uri) " +
228
+ s " [ ! -e $$ (basename $uri) ] && ${fetchURI (uri)} && tar -xvzf $$ (basename $uri) " +
224
229
" || (echo \" ERROR FETCHING\" && exit 1)"
225
230
}
226
231
227
232
val shippedTarballs : Seq [String ] = conf.getOption(" spark.cook.shippedTarballs" )
228
233
.fold(Seq [String ]()){ tgz => tgz.split(" ," ).map(_.trim).toList }
229
234
230
235
val shippedTarballsCommand = shippedTarballs.map { uri =>
231
- s " [ ! -e $$ (basename $uri) ] && ${fetchUri (uri)} && tar -xvzf $$ (basename $uri) "
236
+ s " [ ! -e $$ (basename $uri) ] && ${fetchURI (uri)} && tar -xvzf $$ (basename $uri) "
232
237
}
233
238
234
239
logDebug(s " command: $commandString" )
@@ -244,7 +249,7 @@ class CoarseCookSchedulerBackend(
244
249
val remoteConfFetch = if (remoteHdfsConf.nonEmpty) {
245
250
val name = Paths .get(remoteHdfsConf).getFileName
246
251
Seq (
247
- fetchUri (remoteHdfsConf),
252
+ fetchURI (remoteHdfsConf),
248
253
" mkdir HADOOP_CONF_DIR" ,
249
254
s " tar --strip-components=1 -xvzf $name -C HADOOP_CONF_DIR " ,
250
255
// This must be absolute because we cd into the spark directory
0 commit comments