Skip to content

Commit 1a97f0d

Browse files
committed
SPARK-1119 and other build improvements
1. Makes assembly and examples jar naming consistent in maven/sbt. 2. Updates make-distribution.sh to use Maven and fixes some bugs. 3. Updates the create-release script to call make-distribution script.
1 parent ea8cea8 commit 1a97f0d

File tree

7 files changed

+70
-41
lines changed

7 files changed

+70
-41
lines changed

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
<properties>
3535
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
36-
<spark.jar.basename>${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
36+
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
3737
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
3838
<deb.pkg.name>spark</deb.pkg.name>
3939
<deb.install.path>/usr/share/spark</deb.install.path>

bin/compute-classpath.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
5050
else
5151
# Else use spark-assembly jar from either RELEASE or assembly directory
5252
if [ -f "$FWDIR/RELEASE" ]; then
53-
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
53+
ASSEMBLY_JAR=`ls "$FWDIR"/lib/spark-assembly*hadoop*.jar`
5454
else
55-
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
55+
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar`
5656
fi
5757
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
5858
fi

bin/run-example

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,15 @@ fi
4040
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
4141
# to avoid the -sources and -doc packages that are built by publish-local.
4242
EXAMPLES_DIR="$FWDIR"/examples
43-
SPARK_EXAMPLES_JAR=""
44-
if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
45-
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
43+
44+
if [ -f "$FWDIR/RELEASE" ]; then
45+
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
46+
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
47+
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
4648
fi
49+
4750
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
48-
echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
51+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
4952
echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
5053
exit 1
5154
fi

dev/create-release/create-release.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@ rm -rf spark-$RELEASE_VERSION
8383

8484
make_binary_release() {
8585
NAME=$1
86-
MAVEN_FLAGS=$2
87-
86+
FLAGS=$2
8887
cp -r spark spark-$RELEASE_VERSION-bin-$NAME
88+
8989
cd spark-$RELEASE_VERSION-bin-$NAME
90-
export MAVEN_OPTS="-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
91-
mvn $MAVEN_FLAGS -DskipTests clean package
92-
find . -name test-classes -type d | xargs rm -rf
93-
find . -name classes -type d | xargs rm -rf
90+
./make-distribution.sh $FLAGS --name $NAME --tgz
9491
cd ..
92+
cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
93+
rm -rf spark-$RELEASE_VERSION-bin-$NAME
94+
9595
tar cvzf spark-$RELEASE_VERSION-bin-$NAME.tgz spark-$RELEASE_VERSION-bin-$NAME
9696
echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour \
9797
--output spark-$RELEASE_VERSION-bin-$NAME.tgz.asc \
@@ -105,9 +105,9 @@ make_binary_release() {
105105
rm -rf spark-$RELEASE_VERSION-bin-$NAME
106106
}
107107

108-
make_binary_release "hadoop1" "-Dhadoop.version=1.0.4"
109-
make_binary_release "cdh4" "-Dhadoop.version=2.0.0-mr1-cdh4.2.0"
110-
make_binary_release "hadoop2" "-Pyarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0"
108+
make_binary_release "hadoop1" "--hadoop 1.0.4"
109+
make_binary_release "cdh4" "--hadoop 2.0.0-mr1-cdh4.2.0"
110+
make_binary_release "hadoop2" "--with-yarn --hadoop 2.2.0"
111111

112112
# Copy data
113113
echo "Copying release tarballs"

examples/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@
187187
<artifactId>maven-shade-plugin</artifactId>
188188
<configuration>
189189
<shadedArtifactAttached>false</shadedArtifactAttached>
190-
<outputFile>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
190+
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
191191
<artifactSet>
192192
<includes>
193193
<include>*:*</include>

make-distribution.sh

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
2929
# --hadoop VERSION: Builds against specified version of Hadoop.
3030
# --with-yarn: Enables support for Hadoop YARN.
31+
# --with-hive: Enable support for reading Hive tables.
32+
# --name: A moniker for the release target. Defaults to the Hadoop verison.
3133
#
3234
# Recommended deploy/testing procedure (standalone mode):
3335
# 1) Rsync / deploy the dist/ dir to one host
@@ -41,25 +43,20 @@
4143
FWDIR="$(cd `dirname $0`; pwd)"
4244
DISTDIR="$FWDIR/dist"
4345

44-
# Get version from SBT
45-
export TERM=dumb # Prevents color codes in SBT output
46-
47-
VERSIONSTRING=$($FWDIR/sbt/sbt "show version")
48-
46+
VERSION=$(mvn help:evaluate -Dexpression=project.version |grep -v "INFO")
4947
if [ $? == -1 ] ;then
50-
echo -e "You need sbt installed and available on your path."
51-
echo -e "Download sbt from http://www.scala-sbt.org/"
48+
echo -e "You need Maven installed to build Spark."
49+
echo -e "Download Maven from https://maven.apache.org."
5250
exit -1;
5351
fi
5452

55-
VERSION=$(echo "${VERSIONSTRING}" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
56-
echo "Version is ${VERSION}"
57-
5853
# Initialize defaults
5954
SPARK_HADOOP_VERSION=1.0.4
6055
SPARK_YARN=false
56+
SPARK_HIVE=false
6157
SPARK_TACHYON=false
6258
MAKE_TGZ=false
59+
NAME=none
6360

6461
# Parse arguments
6562
while (( "$#" )); do
@@ -71,23 +68,37 @@ while (( "$#" )); do
7168
--with-yarn)
7269
SPARK_YARN=true
7370
;;
71+
--with-hive)
72+
SPARK_HIVE=true
73+
;;
7474
--with-tachyon)
7575
SPARK_TACHYON=true
7676
;;
7777
--tgz)
7878
MAKE_TGZ=true
7979
;;
80+
--name)
81+
NAME="$2"
82+
shift
83+
;;
8084
esac
8185
shift
8286
done
8387

88+
if [ "$NAME" == "none" ]; then
89+
NAME=$SPARK_HADOOP_VERSION
90+
fi
91+
92+
echo "Spark version is $VERSION"
93+
8494
if [ "$MAKE_TGZ" == "true" ]; then
85-
echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
95+
echo "Making spark-$VERSION-bin-$NAME.tgz"
8696
else
87-
echo "Making distribution for Spark $VERSION in $DISTDIR..."
97+
echo "Making distribution for Spark $VERSION in $DISTDIR..."
8898
fi
8999

90100
echo "Hadoop version set to $SPARK_HADOOP_VERSION"
101+
echo "Release name set to $NAME"
91102
if [ "$SPARK_YARN" == "true" ]; then
92103
echo "YARN enabled"
93104
else
@@ -100,20 +111,32 @@ else
100111
echo "Tachyon Disabled"
101112
fi
102113

103-
# Build fat JAR
104-
export SPARK_HADOOP_VERSION
105-
export SPARK_YARN
114+
# Build uber fat JAR
106115
cd $FWDIR
107116

108-
"sbt/sbt" "assembly/assembly"
117+
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
118+
119+
if [ "$SPARK_HIVE" == "true" ]; then
120+
MAYBE_HIVE="-Phive"
121+
else
122+
MAYBE_HIVE=""
123+
fi
124+
125+
if [ "$SPARK_YARN" == "true" ]; then
126+
mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
127+
-Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
128+
else
129+
mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
130+
fi
109131

110132
# Make directories
111133
rm -rf "$DISTDIR"
112-
mkdir -p "$DISTDIR/jars"
134+
mkdir -p "$DISTDIR/lib"
113135
echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
114136

115137
# Copy jars
116-
cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/"
138+
cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
139+
cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
117140

118141
# Copy other things
119142
mkdir "$DISTDIR"/conf
@@ -135,25 +158,26 @@ if [ "$SPARK_TACHYON" == "true" ]; then
135158
wget "$TACHYON_URL"
136159

137160
tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
138-
cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/jars"
161+
cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
139162
mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
140163
cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
141164
cp -r "tachyon-${TACHYON_VERSION}"/src/main/java/tachyon/web/resources "$DISTDIR/tachyon/src/main/java/tachyon/web"
142165

143166
if [[ `uname -a` == Darwin* ]]; then
144167
# need to run sed differently on osx
145-
nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
168+
nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
146169
else
147-
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
170+
sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
148171
fi
149172

150173
popd > /dev/null
151174
rm -rf $TMPD
152175
fi
153176

154177
if [ "$MAKE_TGZ" == "true" ]; then
155-
TARDIR="$FWDIR/spark-$VERSION"
178+
TARDIR_NAME=spark-$VERSION-bin-$NAME
179+
TARDIR="$FWDIR/$TARDIR_NAME"
156180
cp -r "$DISTDIR" "$TARDIR"
157-
tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
181+
tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$FWDIR" "$TARDIR_NAME"
158182
rm -rf "$TARDIR"
159183
fi

project/SparkBuild.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,9 @@ object SparkBuild extends Build {
411411
)
412412

413413
def examplesSettings = sharedSettings ++ Seq(
414-
name := "spark-examples",
414+
name := "spark-examples",
415+
jarName in assembly <<= version map {
416+
v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
415417
libraryDependencies ++= Seq(
416418
"com.twitter" %% "algebird-core" % "0.1.11",
417419
"org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),

0 commit comments

Comments
 (0)