Skip to content

Commit 472ce64

Browse files
committed
Added spark deploy scripts, with a wrapper script to set AWS credentials for sampleclean.
Documented deploy process in deploy/README.md See 'sampleclean' branch of github.com/thisisdhaas/spark-ec2 for installation/setup scripts.
1 parent 4e2a868 commit 472ce64

File tree

9 files changed

+1011
-2
lines changed

9 files changed

+1011
-2
lines changed

deploy/README.md

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
This folder contains a script, `sampleclean-ec2`, for launching SampleClean
2+
instances on Amazon EC2.
3+
4+
The script calls the spark-ec2 script (documented at
5+
http://spark.apache.org/docs/latest/ec2-scripts.html), and can be called as
6+
either:
7+
8+
```shell
9+
./sampleclean-ec2 CREDENTIALS_DIRECTORY SPARK-EC2-ARG [SPARK-EC2-ARG ...]
10+
```
11+
12+
or:
13+
14+
```shell
15+
./sampleclean-ec2 SPARK-EC2-ARG [SPARK-EC2-ARG ...]
16+
```
17+
18+
In the latter case, `CREDENTIALS_DIRECTORY` will be set to the value of the
19+
environment variable `$AWS_CREDENTIALS_DIR` if it exists, or the default value
20+
`$HOME/.ssh/aws/sampleclean`.
21+
22+
Either way, `CREDENTIALS_DIRECTORY` must be a path to a directory containing AWS
23+
credentials needed for using AWS and EC2:
24+
25+
* A `*.csv` file containing IAM credentials for accessing AWS on your behalf.
26+
* A `KEYNAME.pem` file containing the EC2 keypair corresponding to `KEYNAME`.
27+
28+
For help with `SPARK-EC2-ARGs`, run `./sampleclean-ec2 --help`.
29+
30+
For example, to launch a cluster with 8 slaves, then run the crowd server:
31+
```shell
32+
# Alternatively, use a pre-saved ami with --master-ami AMI_ID
33+
./sampleclean-ec2 ~/.ssh/aws/sampleclean/ -s 8 -t x1.large launch sampleclean
34+
# ... lots of output ...
35+
./sampleclean-ec2 ~/.ssh/aws/sampleclean/ get-master sampleclean
36+
# ... get the master's hostname ...
37+
./sampleclean-ec2 ~/.ssh/aws/sampleclean/ login sampleclean
38+
> workon sampleclean
39+
> cd $PROJECT_HOME
40+
> ./run.sh -d
41+
# ... and the crowd server should be available at the master's hostname on port 8000 ... #
42+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# These variables are automatically filled in by the spark-ec2 script.
21+
export MASTERS="{{master_list}}"
22+
export SLAVES="{{slave_list}}"
23+
export HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
24+
export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
25+
export SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
26+
export MODULES="{{modules}}"
27+
export SPARK_VERSION="{{spark_version}}"
28+
export SHARK_VERSION="{{shark_version}}"
29+
export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
30+
export SWAP_MB="{{swap}}"
31+
export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
32+
export SPARK_MASTER_OPTS="{{spark_master_opts}}"

deploy/sampleclean-ec2.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#! /bin/bash
2+
3+
# Use the first argument to set up our AWS environment variables
4+
source ./setup_env.sh $1
5+
dir_was_passed=$?
6+
7+
# Pass the remaining arguments to spark ec2, along with the credentials we just
8+
# loaded.
9+
if [ "$dir_was_passed" -ne 0 ]
10+
then
11+
args=${@:2}
12+
else
13+
args=${@:1}
14+
fi
15+
./spark-ec2 -i $AWS_EC2_KEY_FILE -k $AWS_EC2_KEY_NAME $args

deploy/setup_env.sh

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#! /bin/bash
2+
3+
# Require 1 argument.
4+
hardcoded_credentials_dir="$HOME/.ssh/aws/sampleclean"
5+
if [ "$#" -gt "1" ]
6+
then
7+
echo "USAGE: $0 [CREDENTIALS_DIRECTORY]"
8+
exit
9+
fi
10+
11+
no_cred_dir=""
12+
ret=1
13+
if [ "$#" -eq "1" ] && [ ${1:0:1} != "-" ]
14+
then
15+
if [ "$1" != "launch" ] && [ "$1" != "destroy" ] && [ "$1" != "login" ] && [ "$1" != "stop" ] && [ "$1" != "start" ] && [ "$1" != "get-master" ]
16+
then
17+
no_cred_dir="false"
18+
credentials_dir=$1
19+
fi
20+
fi
21+
22+
23+
if [ -z "$no_cred_dir" ]
24+
then
25+
ret=0
26+
credentials_dir=$AWS_CREDENTIALS_DIR
27+
if [ -z "$credentials_dir" ]
28+
then
29+
credentials_dir=$hardcoded_credentials_dir
30+
fi
31+
fi
32+
33+
# Make sure there is only 1 .csv and 1 .pem file in the directory
34+
if [ `ls $credentials_dir | grep ".csv" | wc -l` -ne "1" ]
35+
then
36+
echo "ERROR: credentials directory must contain exactly one .csv file"
37+
exit
38+
fi
39+
if [ `ls $credentials_dir | grep ".pem" | wc -l` -ne "1" ]
40+
then
41+
echo "ERROR: credentials directory must contain exactly one .pem file"
42+
exit
43+
fi
44+
45+
# Look up the secret and access keys in the aws credentials.csv file
46+
credentials_file=`ls $credentials_dir/*.csv`
47+
export AWS_ACCESS_KEY_ID=`cat $credentials_file | grep -v "User Name" | cut -f 2 -d ","`
48+
export AWS_SECRET_ACCESS_KEY=`cat $credentials_file | grep -v "User Name" | cut -f 3 -d ","`
49+
50+
# get the name of the key file
51+
key_file=`ls $credentials_dir/*.pem`
52+
export AWS_EC2_KEY_NAME=$(basename "$key_file" .pem)
53+
export AWS_EC2_KEY_FILE=$key_file
54+
55+
# return whether the credentials directory was in the argument list.
56+
return $ret

deploy/spark-ec2

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/sh
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one
5+
# or more contributor license agreements. See the NOTICE file
6+
# distributed with this work for additional information
7+
# regarding copyright ownership. The ASF licenses this file
8+
# to you under the Apache License, Version 2.0 (the
9+
# "License"); you may not use this file except in compliance
10+
# with the License. You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
#
20+
21+
cd "`dirname $0`"
22+
PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2.py $@

0 commit comments

Comments
 (0)