Skip to content

[SPARK-13808][test-maven] Don't build assembly in dev/run-tests #11701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ else
SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
fi

if [ ! -d "$SPARK_JARS_DIR" ]; then
if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING" ]; then
echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
echo "You need to build Spark before running this program." 1>&2
exit 1
else
LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
fi

LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"

# Add the launcher build dir to the classpath if requested.
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow this is not being picked up (test is failing because the Main class is not found). Is this path correct for maven too?

Expand Down
13 changes: 0 additions & 13 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,16 +349,6 @@ def build_spark_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_spark_assembly_sbt(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
sbt_goals = ["assembly/assembly"]
profiles_and_goals = build_profiles + sbt_goals
print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
" ".join(profiles_and_goals))
exec_sbt(profiles_and_goals)


def build_apache_spark(build_tool, hadoop_version):
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
`maven`). Defaults to using `sbt`."""
Expand Down Expand Up @@ -574,9 +564,6 @@ def main():
if build_tool == "sbt":
# Note: compatibility tests only supported in sbt for now
detect_binary_inop_with_mima()
# Since we did not build assembly/assembly before running dev/mima, we need to
# do it here because the tests still rely on it; see SPARK-13294 for details.
build_spark_assembly_sbt(hadoop_version)

# run the test suites
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,27 @@ List<String> buildClassPath(String appClassPath) throws IOException {
boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
if (prependClasses || isTesting) {
String scala = getScalaVersion();
List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
"streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
"yarn", "launcher",
"common/network-common", "common/network-shuffle", "common/network-yarn");
List<String> projects = Arrays.asList(
"common/network-common",
"common/network-shuffle",
"common/network-yarn",
"common/sketch",
"common/tags",
"common/unsafe",
"core",
"examples",
"graphx",
"launcher",
"mllib",
"repl",
"sql/catalyst",
"sql/core",
"sql/hive",
"sql/hive-thriftserver",
"streaming",
"tools",
"yarn"
);
if (prependClasses) {
if (!isTesting) {
System.err.println(
Expand Down
27 changes: 23 additions & 4 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,27 @@ def print_red(text):
LOGGER = logging.getLogger()


def run_individual_python_test(test_name, pyspark_python):
def get_spark_dist_classpath():
original_working_dir = os.getcwd()
os.chdir(SPARK_HOME)
cp = subprocess_check_output(
["./build/sbt", "-Phive", "export assembly/managedClasspath"], universal_newlines=True)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vanzin, as part of your next patch will SPARK_DIST_CLASSPATH just point to the libs directory? If so, we can remove this as part of that patch.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially. I'll make a note to take a look at this.

cp = cp.strip().split("\n")[-1]
os.chdir(original_working_dir)
return cp


def run_individual_python_test(test_name, pyspark_python, spark_dist_classpath):
env = dict(os.environ)
env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
env.update({
# Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes
# launched by the tests have access to the correct test-time classpath.
'SPARK_DIST_CLASSPATH': spark_dist_classpath,
'SPARK_TESTING': '1',
'SPARK_PREPEND_CLASSES': '1',
'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
})
LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
start_time = time.time()
try:
Expand Down Expand Up @@ -175,14 +192,16 @@ def main():
priority = 100
task_queue.put((priority, (python_exec, test_goal)))

spark_dist_classpath = get_spark_dist_classpath()

def process_queue(task_queue):
while True:
try:
(priority, (python_exec, test_goal)) = task_queue.get_nowait()
except Queue.Empty:
break
try:
run_individual_python_test(test_goal, python_exec)
run_individual_python_test(test_goal, python_exec, spark_dist_classpath)
finally:
task_queue.task_done()

Expand Down