Skip to content

Commit fe519c2

Browse files
committed
Auto-setup HTTPS when deploying to a new cluster:
* Associates new clusters with elastic ips and sampleclean[1|2|3].eecs.berkeley.edu domains. * Deploys SSL certificates to the master node. * Configures and run Nginx as a reverse proxy in front of gunicorn (gunicorn doesn't handle intermediate certificates). * Defaults to self-signed certs if productions credentials aren't available. * Updates deploy README to reflect changes.
1 parent 4204423 commit fe519c2

File tree

5 files changed

+80
-6
lines changed

5 files changed

+80
-6
lines changed

deploy/README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ credentials needed for using AWS and EC2:
2727

2828
* A `*.csv` file containing IAM credentials for accessing AWS on your behalf.
2929
* A `KEYNAME.pem` file containing the EC2 keypair corresponding to `KEYNAME`.
30+
* (optional) The `sampleclean1_eecs_berkeley_edu_chained.cer` file containing
31+
the chained sampleclean*.eecs.berkeley.edu ssl certificates.
32+
* (optional) The `sampleclean1.eecs.berkeley.edu-san.key` file containing the
33+
private key for the ssl certificate.
3034

3135
For help with `SPARK-EC2-ARGs`, run `./sampleclean-ec2 --help`.
3236

@@ -64,7 +68,7 @@ To actually get code running on a cluster you've launched, you'll need to:
6468
server and run the SCDriver program:
6569
```shell
6670
cd /root/sampleclean-async
67-
./run-all-ec2.sh # pass `-s` to run the crowd server on ssl.
71+
./run-all-ec2.sh
6872
```
6973

7074
* Or to have more control (or to run things in debug mode):

deploy/sampleclean-ec2.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ then
1212
else
1313
args=${@:1}
1414
fi
15-
./spark-ec2 -i $AWS_EC2_KEY_FILE -k $AWS_EC2_KEY_NAME $args
15+
./spark-ec2 -i $AWS_EC2_KEY_FILE -k $AWS_EC2_KEY_NAME --ssl-cert-file $SSL_CERT_FILE --ssl-key-file $SSL_KEY_FILE $args

deploy/setup_env.sh

+4
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,9 @@ key_file=`ls $credentials_dir/*.pem`
5252
export AWS_EC2_KEY_NAME=$(basename "$key_file" .pem)
5353
export AWS_EC2_KEY_FILE=$key_file
5454

55+
# file paths for ssl certificates (these may not exist)
56+
export SSL_CERT_FILE="$credentials_dir/sampleclean1_eecs_berkeley_edu_chained.cer"
57+
export SSL_KEY_FILE="$credentials_dir/sampleclean1.eecs.berkeley.edu-san.key"
58+
5559
# return whether the credentials directory was in the argument list.
5660
return $ret

deploy/spark_ec2.py

+69-4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@
4141
# A URL prefix from which to fetch AMI information
4242
AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
4343

44+
# A list of sampleclean registered domains and AWS Elastic IPs
45+
ip_to_domain = {
46+
'23.23.183.119': 'sampleclean1.eecs.berkeley.edu',
47+
'23.23.176.121': 'sampleclean2.eecs.berkeley.edu',
48+
'23.23.183.60': 'sampleclean3.eecs.berkeley.edu',
49+
}
4450

4551
class UsageError(Exception):
4652
pass
@@ -141,7 +147,12 @@ def parse_args():
141147
parser.add_option(
142148
"--user-data", type="string", default="",
143149
help="Path to a user-data file (most AMI's interpret this as an initialization script)")
144-
150+
parser.add_option(
151+
"--ssl-cert-file", type="string", default="",
152+
help="Path to the chained ssl certificate for setting up HTTPS")
153+
parser.add_option(
154+
"--ssl-key-file", type="string", default="",
155+
help="Path to the ssl key file for setting up HTTPS")
145156

146157
(opts, args) = parser.parse_args()
147158
if len(args) != 2:
@@ -269,6 +280,47 @@ def get_spark_ami(opts):
269280

270281
return ami
271282

283+
# Associate the master node with an elastic IP (and domain name) for ssl support
284+
def assign_elastic_ip(conn, master_nodes):
285+
if len(master_nodes) != 1:
286+
print >> stderr, "More than one master: not assigning EIP!"
287+
raise ValueError()
288+
289+
all_addresses = conn.get_all_addresses()
290+
eip = None
291+
for address in all_addresses:
292+
if not address.instance_id:
293+
eip = address
294+
break
295+
296+
if not eip:
297+
print >> stderr, "No available EIPs: not assigning!"
298+
raise ValueError()
299+
300+
public_ip = eip.public_ip
301+
domain = ip_to_domain[public_ip]
302+
print "Associating master with IP address %s (%s)..." % (public_ip, domain)
303+
conn.associate_address(instance_id=master_nodes[0].id, public_ip=public_ip)
304+
master_nodes[0].public_dns_name = ("ec2-%s.compute-1.amazonaws.com"
305+
% public_ip.replace('.', '-'))
306+
307+
# TODO: add domain to variable templates so node can figure out domain name
308+
return (public_ip, domain)
309+
310+
def deploy_ssl_cert(opts, master_nodes):
311+
if os.path.exists(opts.ssl_cert_file) and os.path.exists(opts.ssl_key_file):
312+
print "SSL credentials found: deploying to master..."
313+
314+
# rsync the cert and key files
315+
master_loc = '%s@%s:' % (opts.user, master_nodes[0].public_dns_name)
316+
file_dest = master_loc + '/root/spark-ec2/sampleclean/'
317+
base_command = [
318+
'rsync', '-v', '-e', stringify_command(ssh_command(opts)),
319+
]
320+
subprocess.check_call(base_command + [opts.ssl_cert_file, file_dest])
321+
subprocess.check_call(base_command + [opts.ssl_key_file, file_dest])
322+
else:
323+
print >> stderr, "No SSL credentials found: not deploying..."
272324

273325
# Launch a cluster of the given name, by setting up its security groups,
274326
# and then starting new instances in them.
@@ -306,6 +358,8 @@ def launch_cluster(conn, opts, cluster_name):
306358

307359
# sampleclean crowd server
308360
master_group.authorize('tcp', 8000, 8000, '0.0.0.0/0')
361+
master_group.authorize('tcp', 443, 443, '0.0.0.0/0')
362+
master_group.authorize('tcp', 80, 80, '0.0.0.0/0')
309363

310364
# sampleclean web server
311365
master_group.authorize('tcp', 8082, 8082, '0.0.0.0/0')
@@ -513,7 +567,12 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
513567
# Deploy configuration files and run setup scripts on a newly launched
514568
# or started EC2 cluster.
515569
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
570+
try:
571+
master_ip, master_domain = assign_elastic_ip(conn, master_nodes)
572+
except ValueError: # no available domain, just use the default
573+
master_ip = master_domain = None
516574
master = master_nodes[0].public_dns_name
575+
517576
if deploy_ssh_key:
518577
print "Generating cluster's SSH key on master..."
519578
key_setup = """
@@ -539,10 +598,11 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
539598

540599
# NOTE: We should clone the repository before running deploy_files to
541600
# prevent ec2-variables.sh from being overwritten
542-
ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/thisisdhaas/spark-ec2.git -b sampleclean-ampcamp")
601+
ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/thisisdhaas/spark-ec2.git -b sampleclean")
543602

544603
print "Deploying files to master..."
545604
deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules)
605+
deploy_ssl_cert(opts, master_nodes)
546606

547607
print "Running setup on master..."
548608
setup_spark_cluster(master, opts)
@@ -710,7 +770,7 @@ def stringify_command(parts):
710770

711771

712772
def ssh_args(opts):
713-
parts = ['-o', 'StrictHostKeyChecking=no']
773+
parts = ['-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null']
714774
if opts.identity_file is not None:
715775
parts += ['-i', opts.identity_file]
716776
return parts
@@ -898,7 +958,12 @@ def real_main():
898958

899959
elif action == "get-master":
900960
(master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
901-
print master_nodes[0].public_dns_name
961+
master = master_nodes[0]
962+
print master.public_dns_name or "No assigned IP"
963+
if master.ip_address in ip_to_domain:
964+
print "(" + ip_to_domain[master.ip_address] + ")"
965+
else:
966+
print "(No associated domain)"
902967

903968
elif action == "stop":
904969
response = raw_input(

run-all-ec2.sh

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ source /root/.bash_profile
1919
workon sampleclean
2020

2121
# Start the crowd server
22+
service nginx restart
2223
pushd src/main/python/crowd_server
2324
./run.sh $@
2425
popd

0 commit comments

Comments
 (0)