Skip to content

Commit 07a384f

Browse files
tsudukimjeanlyn
authored andcommitted
[SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path
escape spaces in the arguments. Author: Masayoshi TSUZUKI <tsudukim@oss.nttdata.co.jp> Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Closes apache#5447 from tsudukim/feature/SPARK-6568-2 and squashes the following commits: 3f9a188 [Masayoshi TSUZUKI] modified some errors. ed46047 [Masayoshi TSUZUKI] avoid scalastyle errors. 1784239 [Masayoshi TSUZUKI] removed Utils.formatPath. e03f289 [Masayoshi TSUZUKI] removed testWindows from Utils.resolveURI and Utils.resolveURIs. replaced SystemUtils.IS_OS_WINDOWS to Utils.isWindows. removed Utils.formatPath from PythonRunner.scala. 84c33d0 [Masayoshi TSUZUKI] - use resolveURI in nonLocalPaths - run tests for Windows path only on Windows 016128d [Masayoshi TSUZUKI] fixed to use File.toURI() 2c62e3b [Masayoshi TSUZUKI] Merge pull request apache#1 from sarutak/SPARK-6568-2 7019a8a [Masayoshi TSUZUKI] Merge branch 'master' of https://github.com/apache/spark into feature/SPARK-6568-2 45946ee [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-6568-2 10f1c73 [Kousuke Saruta] Added a comment 93c3c40 [Kousuke Saruta] Merge branch 'classpath-handling-fix' of github.com:sarutak/spark into SPARK-6568-2 649da82 [Kousuke Saruta] Fix classpath handling c7ba6a7 [Masayoshi TSUZUKI] [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path
1 parent d230d2f commit 07a384f

File tree

5 files changed

+89
-80
lines changed

5 files changed

+89
-80
lines changed

core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
package org.apache.spark.deploy
1919

2020
import java.net.URI
21+
import java.io.File
2122

2223
import scala.collection.mutable.ArrayBuffer
2324
import scala.collection.JavaConversions._
25+
import scala.util.Try
2426

2527
import org.apache.spark.api.python.PythonUtils
2628
import org.apache.spark.util.{RedirectThread, Utils}
@@ -81,16 +83,13 @@ object PythonRunner {
8183
throw new IllegalArgumentException("Launching Python applications through " +
8284
s"spark-submit is currently only supported for local files: $path")
8385
}
84-
val windows = Utils.isWindows || testWindows
85-
var formattedPath = if (windows) Utils.formatWindowsPath(path) else path
86-
87-
// Strip the URI scheme from the path
88-
formattedPath =
89-
new URI(formattedPath).getScheme match {
90-
case null => formattedPath
91-
case Utils.windowsDrive(d) if windows => formattedPath
92-
case _ => new URI(formattedPath).getPath
93-
}
86+
// get path when scheme is file.
87+
val uri = Try(new URI(path)).getOrElse(new File(path).toURI)
88+
var formattedPath = uri.getScheme match {
89+
case null => path
90+
case "file" | "local" => uri.getPath
91+
case _ => null
92+
}
9493

9594
// Guard against malformed paths potentially throwing NPE
9695
if (formattedPath == null) {
@@ -99,7 +98,9 @@ object PythonRunner {
9998

10099
// In Windows, the drive should not be prefixed with "/"
101100
// For instance, python does not understand "/C:/path/to/sheep.py"
102-
formattedPath = if (windows) formattedPath.stripPrefix("/") else formattedPath
101+
if (Utils.isWindows && formattedPath.matches("/[a-zA-Z]:/.*")) {
102+
formattedPath = formattedPath.stripPrefix("/")
103+
}
103104
formattedPath
104105
}
105106

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1704,11 +1704,6 @@ private[spark] object Utils extends Logging {
17041704
*/
17051705
val windowsDrive = "([a-zA-Z])".r
17061706

1707-
/**
1708-
* Format a Windows path such that it can be safely passed to a URI.
1709-
*/
1710-
def formatWindowsPath(path: String): String = path.replace("\\", "/")
1711-
17121707
/**
17131708
* Indicates whether Spark is currently running unit tests.
17141709
*/
@@ -1806,37 +1801,24 @@ private[spark] object Utils extends Logging {
18061801
* If the supplied path does not contain a scheme, or is a relative path, it will be
18071802
* converted into an absolute path with a file:// scheme.
18081803
*/
1809-
def resolveURI(path: String, testWindows: Boolean = false): URI = {
1810-
1811-
// In Windows, the file separator is a backslash, but this is inconsistent with the URI format
1812-
val windows = isWindows || testWindows
1813-
val formattedPath = if (windows) formatWindowsPath(path) else path
1814-
1815-
val uri = new URI(formattedPath)
1816-
if (uri.getPath == null) {
1817-
throw new IllegalArgumentException(s"Given path is malformed: $uri")
1818-
}
1819-
1820-
Option(uri.getScheme) match {
1821-
case Some(windowsDrive(d)) if windows =>
1822-
new URI("file:/" + uri.toString.stripPrefix("/"))
1823-
case None =>
1824-
// Preserve fragments for HDFS file name substitution (denoted by "#")
1825-
// For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application
1826-
val fragment = uri.getFragment
1827-
val part = new File(uri.getPath).toURI
1828-
new URI(part.getScheme, part.getPath, fragment)
1829-
case Some(other) =>
1830-
uri
1804+
def resolveURI(path: String): URI = {
1805+
try {
1806+
val uri = new URI(path)
1807+
if (uri.getScheme() != null) {
1808+
return uri
1809+
}
1810+
} catch {
1811+
case e: URISyntaxException =>
18311812
}
1813+
new File(path).getAbsoluteFile().toURI()
18321814
}
18331815

18341816
/** Resolve a comma-separated list of paths. */
1835-
def resolveURIs(paths: String, testWindows: Boolean = false): String = {
1817+
def resolveURIs(paths: String): String = {
18361818
if (paths == null || paths.trim.isEmpty) {
18371819
""
18381820
} else {
1839-
paths.split(",").map { p => Utils.resolveURI(p, testWindows) }.mkString(",")
1821+
paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
18401822
}
18411823
}
18421824

@@ -1847,8 +1829,7 @@ private[spark] object Utils extends Logging {
18471829
Array.empty
18481830
} else {
18491831
paths.split(",").filter { p =>
1850-
val formattedPath = if (windows) formatWindowsPath(p) else p
1851-
val uri = new URI(formattedPath)
1832+
val uri = resolveURI(p)
18521833
Option(uri.getScheme).getOrElse("file") match {
18531834
case windowsDrive(d) if windows => false
18541835
case "local" | "file" => false

core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package org.apache.spark.deploy
1919

2020
import org.scalatest.FunSuite
2121

22+
import org.apache.spark.util.Utils
23+
2224
class PythonRunnerSuite extends FunSuite {
2325

2426
// Test formatting a single path to be added to the PYTHONPATH
@@ -28,10 +30,14 @@ class PythonRunnerSuite extends FunSuite {
2830
assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py")
2931
assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py")
3032
assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py")
31-
assert(PythonRunner.formatPath("C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
32-
assert(PythonRunner.formatPath("/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
33-
assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
34-
"C:/a/b/spark.py")
33+
if (Utils.isWindows) {
34+
assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
35+
"C:/a/b/spark.py")
36+
assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) ===
37+
"C:/a/b/spark.py")
38+
assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) ===
39+
"C:/a b/spark.py")
40+
}
3541
intercept[IllegalArgumentException] { PythonRunner.formatPath("one:two") }
3642
intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:s3:xtremeFS") }
3743
intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:/path/to/some.py") }
@@ -45,14 +51,15 @@ class PythonRunnerSuite extends FunSuite {
4551
Array("/app.py", "/spark.py"))
4652
assert(PythonRunner.formatPaths("me.py,file:/you.py,local:/we.py") ===
4753
Array("me.py", "/you.py", "/we.py"))
48-
assert(PythonRunner.formatPaths("C:/a/b/spark.py", testWindows = true) ===
49-
Array("C:/a/b/spark.py"))
50-
assert(PythonRunner.formatPaths("/C:/a/b/spark.py", testWindows = true) ===
51-
Array("C:/a/b/spark.py"))
52-
assert(PythonRunner.formatPaths("C:/free.py,pie.py", testWindows = true) ===
53-
Array("C:/free.py", "pie.py"))
54-
assert(PythonRunner.formatPaths("lovely.py,C:/free.py,file:/d:/fry.py", testWindows = true) ===
55-
Array("lovely.py", "C:/free.py", "d:/fry.py"))
54+
if (Utils.isWindows) {
55+
assert(PythonRunner.formatPaths("C:\\a\\b\\spark.py", testWindows = true) ===
56+
Array("C:/a/b/spark.py"))
57+
assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) ===
58+
Array("C:/free.py", "pie.py"))
59+
assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py",
60+
testWindows = true) ===
61+
Array("lovely.py", "C:/free.py", "d:/fry.py"))
62+
}
5663
intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") }
5764
intercept[IllegalArgumentException] { PythonRunner.formatPaths("two,three,four:five:six") }
5865
intercept[IllegalArgumentException] { PythonRunner.formatPaths("hdfs:/some.py,foo.py") }

core/src/test/scala/org/apache/spark/util/UtilsSuite.scala

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -367,51 +367,58 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
367367
}
368368

369369
test("resolveURI") {
370-
def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
370+
def assertResolves(before: String, after: String): Unit = {
371371
// This should test only single paths
372372
assume(before.split(",").length === 1)
373373
// Repeated invocations of resolveURI should yield the same result
374-
def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString
374+
def resolve(uri: String): String = Utils.resolveURI(uri).toString
375375
assert(resolve(after) === after)
376376
assert(resolve(resolve(after)) === after)
377377
assert(resolve(resolve(resolve(after))) === after)
378378
// Also test resolveURIs with single paths
379-
assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after))
380-
assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after))
379+
assert(new URI(Utils.resolveURIs(before)) === new URI(after))
380+
assert(new URI(Utils.resolveURIs(after)) === new URI(after))
381381
}
382-
val cwd = System.getProperty("user.dir")
382+
val rawCwd = System.getProperty("user.dir")
383+
val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
383384
assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar")
384385
assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar")
385386
assertResolves("spark.jar", s"file:$cwd/spark.jar")
386-
assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar")
387-
assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
388-
assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true)
389-
assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
390-
assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
391-
assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true)
392-
intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") }
393-
intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") }
387+
assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar")
388+
assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt")
389+
if (Utils.isWindows) {
390+
assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt")
391+
assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt")
392+
}
393+
assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt")
394+
assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt")
395+
assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt")
396+
assertResolves("file:foo", s"file:foo")
397+
assertResolves("file:foo:baby", s"file:foo:baby")
394398
}
395399

396400
test("resolveURIs with multiple paths") {
397-
def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
401+
def assertResolves(before: String, after: String): Unit = {
398402
assume(before.split(",").length > 1)
399-
assert(Utils.resolveURIs(before, testWindows) === after)
400-
assert(Utils.resolveURIs(after, testWindows) === after)
403+
assert(Utils.resolveURIs(before) === after)
404+
assert(Utils.resolveURIs(after) === after)
401405
// Repeated invocations of resolveURIs should yield the same result
402-
def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows)
406+
def resolve(uri: String): String = Utils.resolveURIs(uri)
403407
assert(resolve(after) === after)
404408
assert(resolve(resolve(after)) === after)
405409
assert(resolve(resolve(resolve(after))) === after)
406410
}
407-
val cwd = System.getProperty("user.dir")
411+
val rawCwd = System.getProperty("user.dir")
412+
val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
408413
assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
409414
assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
410415
assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
411-
assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5",
412-
s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5")
413-
assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi",
414-
s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true)
416+
assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6",
417+
s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6")
418+
if (Utils.isWindows) {
419+
assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
420+
s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4")
421+
}
415422
}
416423

417424
test("nonLocalPaths") {
@@ -426,6 +433,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
426433
assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty)
427434
assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") ===
428435
Array("hdfs:/spark.jar", "s3:/smart.jar"))
436+
assert(Utils.nonLocalPaths("hdfs:/spark.jar,path to/a.jar,s3:/smart.jar") ===
437+
Array("hdfs:/spark.jar", "s3:/smart.jar"))
429438
assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") ===
430439
Array("hdfs:/spark.jar", "s3:/smart.jar"))
431440
assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") ===
@@ -547,7 +556,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
547556
val targetDir = new File(tempDir, "target-dir")
548557
Files.write("some text", sourceFile, UTF_8)
549558

550-
val path = new Path("file://" + sourceDir.getAbsolutePath)
559+
val path =
560+
if (Utils.isWindows) {
561+
new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/"))
562+
} else {
563+
new Path("file://" + sourceDir.getAbsolutePath)
564+
}
551565
val conf = new Configuration()
552566
val fs = Utils.getHadoopFileSystem(path.toString, conf)
553567

@@ -567,7 +581,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
567581
val destInnerFile = new File(destInnerDir, sourceFile.getName)
568582
assert(destInnerFile.isFile())
569583

570-
val filePath = new Path("file://" + sourceFile.getAbsolutePath)
584+
val filePath =
585+
if (Utils.isWindows) {
586+
new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/"))
587+
} else {
588+
new Path("file://" + sourceFile.getAbsolutePath)
589+
}
571590
val testFileDir = new File(tempDir, "test-filename")
572591
val testFileName = "testFName"
573592
val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf)

repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ class SparkILoop(
206206
// e.g. file:/C:/my/path.jar -> C:/my/path.jar
207207
SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
208208
} else {
209-
SparkILoop.getAddedJars
209+
// We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20).
210+
SparkILoop.getAddedJars.map { jar => new URI(jar).getPath }
210211
}
211212
// work around for Scala bug
212213
val totalClassPath = addedJars.foldLeft(
@@ -1109,7 +1110,7 @@ object SparkILoop extends Logging {
11091110
if (settings.classpath.isDefault)
11101111
settings.classpath.value = sys.props("java.class.path")
11111112

1112-
getAddedJars.foreach(settings.classpath.append(_))
1113+
getAddedJars.map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_))
11131114

11141115
repl process settings
11151116
}

0 commit comments

Comments
 (0)