Skip to content

Commit 71aea02

Browse files
committed
[SPARK-32467][UI] Avoid encoding URL twice on https redirect
### What changes were proposed in this pull request? When https is enabled for Spark UI, an HTTP request will be redirected as an encoded HTTPS URL: https://github.com/apache/spark/pull/10238/files#diff-f79a5ead735b3d0b34b6b94486918e1cR312 When we create the redirect url, we will call getRequestURI and getQueryString. Both two methods may return an encoded string. However, we pass them directly to the following URI constructor ``` URI(String scheme, String authority, String path, String query, String fragment) ``` As this URI constructor assumes both path and query parameters are decoded strings, it will encode them again. This makes the redirect URL encoded twice. This problem is on stage page with HTTPS enabled. The URL of "/taskTable" contains query parameter `order%5B0%5D%5Bcolumn%5D`. After encoded it becomes `order%255B0%255D%255Bcolumn%255D` and it will be decoded as `order%5B0%5D%5Bcolumn%5D` instead of `order[0][dir]`. When the parameter `order[0][dir]` is missing, there will be an excetpion from: https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala#L176 and the stage page fail to load. To fix the problem, we can try decoding the query parameters before encoding it. This is to make sure we encode the URL ### Why are the changes needed? Fix a UI issue when HTTPS is enabled ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A new Unit test + manually test on a cluster Closes #29271 from gengliangwang/urlEncode. Authored-by: Gengliang Wang <gengliang.wang@databricks.com> Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
1 parent 1c6dff7 commit 71aea02

File tree

2 files changed

+44
-6
lines changed

2 files changed

+44
-6
lines changed

core/src/main/scala/org/apache/spark/ui/JettyUtils.scala

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.ui
1919

20-
import java.net.{URI, URL}
20+
import java.net.{URI, URL, URLDecoder}
2121
import java.util.EnumSet
2222
import javax.servlet.DispatcherType
2323
import javax.servlet.http._
@@ -377,8 +377,7 @@ private[spark] object JettyUtils extends Logging {
377377
if (baseRequest.isSecure) {
378378
return
379379
}
380-
val httpsURI = createRedirectURI(scheme, baseRequest.getServerName, securePort,
381-
baseRequest.getRequestURI, baseRequest.getQueryString)
380+
val httpsURI = createRedirectURI(scheme, securePort, baseRequest)
382381
response.setContentLength(0)
383382
response.sendRedirect(response.encodeRedirectURL(httpsURI))
384383
baseRequest.setHandled(true)
@@ -440,16 +439,34 @@ private[spark] object JettyUtils extends Logging {
440439
handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType]))
441440
}
442441

442+
private def decodeURL(url: String, encoding: String): String = {
443+
if (url == null) {
444+
null
445+
} else {
446+
URLDecoder.decode(url, encoding)
447+
}
448+
}
449+
443450
// Create a new URI from the arguments, handling IPv6 host encoding and default ports.
444-
private def createRedirectURI(
445-
scheme: String, server: String, port: Int, path: String, query: String) = {
451+
private def createRedirectURI(scheme: String, port: Int, request: Request): String = {
452+
val server = request.getServerName
446453
val redirectServer = if (server.contains(":") && !server.startsWith("[")) {
447454
s"[${server}]"
448455
} else {
449456
server
450457
}
451458
val authority = s"$redirectServer:$port"
452-
new URI(scheme, authority, path, query, null).toString
459+
val queryEncoding = if (request.getQueryEncoding != null) {
460+
request.getQueryEncoding
461+
} else {
462+
// By default decoding the URI as "UTF-8" should be enough for SparkUI
463+
"UTF-8"
464+
}
465+
// The request URL can be raw or encoded here. To avoid the request URL being
466+
// encoded twice, let's decode it here.
467+
val requestURI = decodeURL(request.getRequestURI, queryEncoding)
468+
val queryString = decodeURL(request.getQueryString, queryEncoding)
469+
new URI(scheme, authority, requestURI, queryString, null).toString
453470
}
454471

455472
def toVirtualHosts(connectors: String*): Array[String] = connectors.map("@" + _).toArray

core/src/test/scala/org/apache/spark/ui/UISuite.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,27 @@ class UISuite extends SparkFunSuite {
262262
}
263263
}
264264

265+
test("SPARK-32467: Avoid encoding URL twice on https redirect") {
266+
val (conf, securityMgr, sslOptions) = sslEnabledConf()
267+
val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
268+
try {
269+
val serverAddr = s"http://localhost:${serverInfo.boundPort}"
270+
271+
val (_, ctx) = newContext("/ctx1")
272+
serverInfo.addHandler(ctx, securityMgr)
273+
274+
TestUtils.withHttpConnection(new URL(s"$serverAddr/ctx%281%29?a%5B0%5D=b")) { conn =>
275+
assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
276+
val location = Option(conn.getHeaderFields().get("Location"))
277+
.map(_.get(0)).orNull
278+
val expectedLocation = s"https://localhost:${serverInfo.securePort.get}/ctx(1)?a[0]=b"
279+
assert(location == expectedLocation)
280+
}
281+
} finally {
282+
stopServer(serverInfo)
283+
}
284+
}
285+
265286
test("http -> https redirect applies to all URIs") {
266287
val (conf, securityMgr, sslOptions) = sslEnabledConf()
267288
val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)

0 commit comments

Comments
 (0)