Skip to content

Commit a646a84

Browse files
committed
Add SPARK_HOME and remove start-master process by default
1 parent c659b29 commit a646a84

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ From the spark instance, you could reach the MongoDB instance using `mongodb` ho
1919
You can find a small dataset example in `/home/ubuntu/times.json` which you can load using [initDocuments.scala](spark/files/initDocuments.scala) :
2020

2121
```
22-
${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_2.10:1.0.0 -i ./initDocuments.scala
22+
${SPARK_HOME}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_${SCALA_VERSION}:${MONGO_SPARK_VERSION} -i ./initDocuments.scala
2323
```
2424

2525

@@ -28,7 +28,7 @@ For examples, please see [reduceByKey.scala](spark/files/reduceByKey.scala) to q
2828
Run the `spark shell` by executing:
2929

3030
```sh
31-
${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_2.10:1.0.0
31+
${SPARK_HOME}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_${SCALA_VERSION}:${MONGO_SPARK_VERSION}
3232
```
3333

3434
You can also append `-i <file.scala>` to execute a scala file via the spark shell.

spark/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ ENV HOME /home/ubuntu
1313
ENV SPARK_VERSION 1.6.2
1414
ENV HADOOP_VERSION 2.6
1515
ENV MONGO_SPARK_VERSION 1.0.0
16-
ENV SCALA_VERSION 2.11
16+
ENV SCALA_VERSION 2.10
1717

1818
WORKDIR ${HOME}
1919

20+
ENV ${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
21+
2022
COPY files/times.json /home/ubuntu/times.json
2123
COPY files/readme.txt /home/ubuntu/readme.txt
2224
COPY files/reduceByKey.scala /home/ubuntu/reduceByKey.scala
@@ -31,5 +33,3 @@ tar xvf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
3133

3234
RUN rm -fv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
3335

34-
# Run single node of spark
35-
RUN ${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/sbin/start-master.sh

spark/files/readme.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,13 @@
44
mongoimport -h <mongodb ip> -d spark -c times ./times.json
55

66
# Or you can just use initDocuments.scala to import using Spark itself
7-
${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_2.10:1.0.0 -i ./initDocuments.scala
7+
${SPARK_HOME}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb/spark.output" --packages org.mongodb.spark:mongo-spark-connector_2.10:1.0.0 -i ./initDocuments.scala
88

99
# Run spark-shell
10-
${HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb:27107/spark.output" --packages org.mongodb.spark:mongo-spark-connector_2.10:1.0.0
10+
${SPARK_HOME}/bin/spark-shell --conf "spark.mongodb.input.uri=mongodb://mongodb:27017/spark.times" --conf "spark.mongodb.output.uri=mongodb://mongodb:27107/spark.output" --packages org.mongodb.spark:mongo-spark-connector_${SCALA_VERSION}:${MONGO_SPARK_VERSION}
1111

1212
# Or you can run scala file through the shell by specifying `-i <file.scala>`
13+
14+
# start 1 master/worker
15+
${SPARK_HOME}/sbin/start-master.sh
16+
${SPARK_HOME}/sbin/start-slave.sh spark://spark:7077

0 commit comments

Comments
 (0)