Skip to content

Commit f9c60e3

Browse files
committed
Merge pull request #9 from a-roberts/patch-6
Provide params and multithreaded Spark build
2 parents 88dd270 + 8c57166 commit f9c60e3

File tree

1 file changed

+41
-20
lines changed

1 file changed

+41
-20
lines changed

als/build.sh

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,49 @@
1-
#build the jni cpp code and cuda code for ALS
21
#!/bin/bash
3-
#unamestr=`uname -m`
2+
3+
if [ "$1" = "help" ]; then
4+
echo ""
5+
echo "Use this script to build the JNI .cpp code and CUDA code for ALS, optionally Spark too"
6+
echo ""
7+
echo "build.sh spark 2.5 -> only build Spark with profiles hardcoded here (Hadoop 2.5)"
8+
echo ""
9+
echo "build.sh spark 2.6 dist -> build Spark and create the distribution package (Hadoop 2.6)"
10+
echo ""
11+
echo "build.sh spark 2.7 dist tgz -> build Spark, create the distribution package, tarball and zip it (Hadoop 2.7)"
12+
echo ""
13+
fi
14+
415
if [ -z ${JAVA_HOME} ]; then
5-
echo "Please set JAVA_HOME!"
6-
exit 1
7-
else
8-
echo "use existing JAVA_HOME " $JAVA_HOME
16+
echo "Please set your JAVA_HOME to point to your Java installation"
17+
exit 1
918
fi
1019

11-
if [ -z ${CUDA_ROOT} ]; then
12-
echo "Please set CUDA_ROOT to the cuda installation, say /usr/local/cuda !"
13-
exit 1
14-
else
15-
echo "use existing CUDA_ROOT " $CUDA_ROOT
20+
if [ -z ${CUDA_HOME} ]; then
21+
echo "Please set your CUDA_HOME to point to your CUDA installation e.g. /usr/local/cuda"
22+
exit 1
1623
fi
1724

18-
echo "compile the cuda & native code"
19-
$CUDA_ROOT/bin/nvcc -shared -D_USE_GPU_ -I/usr/include -I$JAVA_HOME/include -I$JAVA_HOME/include/linux ../utilities.cu src/cuda/als.cu src/CuMFJNIInterface.cpp -o libGPUALS.so -Xcompiler "-fPIC" -m64 -use_fast_math -rdc=true -gencode arch=compute_35,code=sm_35 -gencode arch=compute_35,code=compute_35 -O3 -Xptxas -dlcm=ca -L{$CUDA_ROOT}/lib64 -lcublas -lcusparse
25+
echo "Using JAVA_HOME: $JAVA_HOME"
26+
echo "Using CUDA_HOME: $CUDA_HOME"
27+
echo "Compiling the CUDA and native code"
2028

21-
#echo "build spark"
22-
#SPARK_HOME=../../Spark-MLlib/
23-
#cd $SPARK_HOME
24-
#build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
29+
$CUDA_HOME/bin/nvcc -shared -D_USE_GPU_ -I/usr/include -I$JAVA_HOME/include -I$JAVA_HOME/include/linux ../utilities.cu src/cuda/als.cu src/CuMFJNIInterface.cpp -o libGPUALS.so -Xcompiler "-fPIC" -m64 -use_fast_math -rdc=true -gencode arch=compute_35,code=sm_35 -gencode arch=compute_35,code=compute_35 -O3 -Xptxas -dlcm=ca -L{$CUDA_ROOT}/lib64 -lcublas -lcusparse
2530

26-
#echo "build spark distribution"
27-
#cd $SPARK_HOME
28-
#./dev/make-distribution.sh -Pnetlib-lgpl -Pyarn -Phadoop-2.7 -Dhadoop.version=2.7.2
31+
if [ "$1" = "spark" ]; then
32+
SPARK_HOME=../../SparkGPU/
33+
echo "Building Spark from $SPARK_HOME, will include profiles for Yarn, Hadoop, Hive, Hive-Thriftserver by default, edit this to override (e.g. for SparkR, Kinesis)"
34+
cd $SPARK_HOME
35+
# Prevents OoM issues on IBM Power LE and JDK 8
36+
export MAVEN_OPTS="-Xmx4g"
37+
PROFILES="-Pyarn -Phadoop-$2 -Phive -Phive-thriftserver"
38+
# -T 1C means: run with multiple threads, one per core, this is OK for Spark
39+
build/mvn -T 1C $PROFILES -DskipTests package
40+
# Should we create the distribution package?
41+
if [ "$3" = "dist" ]; then
42+
# Should we tarball and zip it?
43+
if [ "$4" = "tgz" ]; then
44+
dev/make-distribution.sh $PROFILES --tgz
45+
else
46+
dev/make-distribution.sh $PROFILES
47+
fi
48+
fi
49+
fi

0 commit comments

Comments
 (0)