Skip to content

Commit 8a3815f

Browse files
attilapirosdongjoon-hyun
authored andcommitted
[SPARK-34789][TEST] Introduce Jetty based construct for integration tests where HTTP server is used
### What changes were proposed in this pull request? Introducing a new test construct: ``` withHttpServer() { baseURL => ... } ``` Which starts and stops a Jetty server to serve files via HTTP. Moreover this PR uses this new construct in the test `Run SparkRemoteFileTest using a remote data file`. ### Why are the changes needed? Before this PR github URLs was used like "https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt". This connects two Spark version in an unhealthy way like connecting the "master" branch which is moving part with the committed test code which is a non-moving (as it might be even released). So this way a test running for an earlier version of Spark expects something (filename, content, path) from a the latter release and what is worse when the moving version is changed the earlier test will break. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit test. Closes #31935 from attilapiros/SPARK-34789. Authored-by: “attilapiros” <piros.attila.zsolt@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
1 parent 7ff9d2e commit 8a3815f

File tree

2 files changed

+30
-6
lines changed

2 files changed

+30
-6
lines changed

core/src/main/scala/org/apache/spark/TestUtils.scala

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark
1919

2020
import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
21-
import java.net.{HttpURLConnection, URI, URL}
21+
import java.net.{HttpURLConnection, InetSocketAddress, URI, URL}
2222
import java.nio.charset.StandardCharsets
2323
import java.nio.file.{Files => JavaFiles, Paths}
2424
import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
@@ -41,6 +41,11 @@ import scala.util.Try
4141
import com.google.common.io.{ByteStreams, Files}
4242
import org.apache.commons.lang3.StringUtils
4343
import org.apache.log4j.PropertyConfigurator
44+
import org.eclipse.jetty.server.Handler
45+
import org.eclipse.jetty.server.Server
46+
import org.eclipse.jetty.server.handler.DefaultHandler
47+
import org.eclipse.jetty.server.handler.HandlerList
48+
import org.eclipse.jetty.server.handler.ResourceHandler
4449
import org.json4s.JsonAST.JValue
4550
import org.json4s.jackson.JsonMethods.{compact, render}
4651

@@ -364,6 +369,22 @@ private[spark] object TestUtils {
364369
}
365370
}
366371

372+
def withHttpServer(resBaseDir: String = ".")(body: URL => Unit): Unit = {
373+
// 0 as port means choosing randomly from the available ports
374+
val server = new Server(new InetSocketAddress(Utils.localCanonicalHostName, 0))
375+
val resHandler = new ResourceHandler()
376+
resHandler.setResourceBase(resBaseDir)
377+
val handlers = new HandlerList()
378+
handlers.setHandlers(Array[Handler](resHandler, new DefaultHandler()))
379+
server.setHandler(handlers)
380+
server.start()
381+
try {
382+
body(server.getURI.toURL)
383+
} finally {
384+
server.stop()
385+
}
386+
}
387+
367388
/**
368389
* Wait until at least `numExecutors` executors are up, or throw `TimeoutException` if the waiting
369390
* time elapsed before `numExecutors` executors up. Exposed for testing.

resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package org.apache.spark.deploy.k8s.integrationtest
1818

1919
import io.fabric8.kubernetes.api.model.Pod
2020

21+
import org.apache.spark.TestUtils
2122
import org.apache.spark.launcher.SparkLauncher
2223

2324
private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
@@ -99,9 +100,12 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
99100
}
100101

101102
test("Run SparkRemoteFileTest using a remote data file", k8sTestTag) {
102-
sparkAppConf
103-
.set("spark.files", REMOTE_PAGE_RANK_DATA_FILE)
104-
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
103+
assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
104+
TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
105+
sparkAppConf
106+
.set("spark.files", baseURL.toString + REMOTE_PAGE_RANK_DATA_FILE)
107+
runSparkRemoteCheckAndVerifyCompletion(appArgs = Array(REMOTE_PAGE_RANK_FILE_NAME))
108+
}
105109
}
106110
}
107111

@@ -110,7 +114,6 @@ private[spark] object BasicTestsSuite {
110114
val CONTAINER_LOCAL_FILE_DOWNLOAD_PATH = "/var/spark-data/spark-files"
111115
val CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE =
112116
s"$CONTAINER_LOCAL_FILE_DOWNLOAD_PATH/pagerank_data.txt"
113-
val REMOTE_PAGE_RANK_DATA_FILE =
114-
"https://raw.githubusercontent.com/apache/spark/master/data/mllib/pagerank_data.txt"
117+
val REMOTE_PAGE_RANK_DATA_FILE = "data/mllib/pagerank_data.txt"
115118
val REMOTE_PAGE_RANK_FILE_NAME = "pagerank_data.txt"
116119
}

0 commit comments

Comments
 (0)