41
41
# A URL prefix from which to fetch AMI information
42
42
AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
43
43
44
+ # A list of sampleclean registered domains and AWS Elastic IPs
45
+ ip_to_domain = {
46
+ '23.23.183.119' : 'sampleclean1.eecs.berkeley.edu' ,
47
+ '23.23.176.121' : 'sampleclean2.eecs.berkeley.edu' ,
48
+ '23.23.183.60' : 'sampleclean3.eecs.berkeley.edu' ,
49
+ }
44
50
45
51
class UsageError (Exception ):
46
52
pass
@@ -141,7 +147,12 @@ def parse_args():
141
147
parser .add_option (
142
148
"--user-data" , type = "string" , default = "" ,
143
149
help = "Path to a user-data file (most AMI's interpret this as an initialization script)" )
144
-
150
+ parser .add_option (
151
+ "--ssl-cert-file" , type = "string" , default = "" ,
152
+ help = "Path to the chained ssl certificate for setting up HTTPS" )
153
+ parser .add_option (
154
+ "--ssl-key-file" , type = "string" , default = "" ,
155
+ help = "Path to the ssl key file for setting up HTTPS" )
145
156
146
157
(opts , args ) = parser .parse_args ()
147
158
if len (args ) != 2 :
@@ -269,6 +280,47 @@ def get_spark_ami(opts):
269
280
270
281
return ami
271
282
283
+ # Associate the master node with an elastic IP (and domain name) for ssl support
284
+ def assign_elastic_ip (conn , master_nodes ):
285
+ if len (master_nodes ) != 1 :
286
+ print >> stderr , "More than one master: not assigning EIP!"
287
+ raise ValueError ()
288
+
289
+ all_addresses = conn .get_all_addresses ()
290
+ eip = None
291
+ for address in all_addresses :
292
+ if not address .instance_id :
293
+ eip = address
294
+ break
295
+
296
+ if not eip :
297
+ print >> stderr , "No available EIPs: not assigning!"
298
+ raise ValueError ()
299
+
300
+ public_ip = eip .public_ip
301
+ domain = ip_to_domain [public_ip ]
302
+ print "Associating master with IP address %s (%s)..." % (public_ip , domain )
303
+ conn .associate_address (instance_id = master_nodes [0 ].id , public_ip = public_ip )
304
+ master_nodes [0 ].public_dns_name = ("ec2-%s.compute-1.amazonaws.com"
305
+ % public_ip .replace ('.' , '-' ))
306
+
307
+ # TODO: add domain to variable templates so node can figure out domain name
308
+ return (public_ip , domain )
309
+
310
+ def deploy_ssl_cert (opts , master_nodes ):
311
+ if os .path .exists (opts .ssl_cert_file ) and os .path .exists (opts .ssl_key_file ):
312
+ print "SSL credentials found: deploying to master..."
313
+
314
+ # rsync the cert and key files
315
+ master_loc = '%s@%s:' % (opts .user , master_nodes [0 ].public_dns_name )
316
+ file_dest = master_loc + '/root/spark-ec2/sampleclean/'
317
+ base_command = [
318
+ 'rsync' , '-v' , '-e' , stringify_command (ssh_command (opts )),
319
+ ]
320
+ subprocess .check_call (base_command + [opts .ssl_cert_file , file_dest ])
321
+ subprocess .check_call (base_command + [opts .ssl_key_file , file_dest ])
322
+ else :
323
+ print >> stderr , "No SSL credentials found: not deploying..."
272
324
273
325
# Launch a cluster of the given name, by setting up its security groups,
274
326
# and then starting new instances in them.
@@ -306,6 +358,8 @@ def launch_cluster(conn, opts, cluster_name):
306
358
307
359
# sampleclean crowd server
308
360
master_group .authorize ('tcp' , 8000 , 8000 , '0.0.0.0/0' )
361
+ master_group .authorize ('tcp' , 443 , 443 , '0.0.0.0/0' )
362
+ master_group .authorize ('tcp' , 80 , 80 , '0.0.0.0/0' )
309
363
310
364
# sampleclean web server
311
365
master_group .authorize ('tcp' , 8082 , 8082 , '0.0.0.0/0' )
@@ -513,7 +567,12 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
513
567
# Deploy configuration files and run setup scripts on a newly launched
514
568
# or started EC2 cluster.
515
569
def setup_cluster (conn , master_nodes , slave_nodes , opts , deploy_ssh_key ):
570
+ try :
571
+ master_ip , master_domain = assign_elastic_ip (conn , master_nodes )
572
+ except ValueError : # no available domain, just use the default
573
+ master_ip = master_domain = None
516
574
master = master_nodes [0 ].public_dns_name
575
+
517
576
if deploy_ssh_key :
518
577
print "Generating cluster's SSH key on master..."
519
578
key_setup = """
@@ -539,10 +598,11 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
539
598
540
599
# NOTE: We should clone the repository before running deploy_files to
541
600
# prevent ec2-variables.sh from being overwritten
542
- ssh (master , opts , "rm -rf spark-ec2 && git clone https://github.com/thisisdhaas/spark-ec2.git -b sampleclean-ampcamp " )
601
+ ssh (master , opts , "rm -rf spark-ec2 && git clone https://github.com/thisisdhaas/spark-ec2.git -b sampleclean" )
543
602
544
603
print "Deploying files to master..."
545
604
deploy_files (conn , "deploy.generic" , opts , master_nodes , slave_nodes , modules )
605
+ deploy_ssl_cert (opts , master_nodes )
546
606
547
607
print "Running setup on master..."
548
608
setup_spark_cluster (master , opts )
@@ -710,7 +770,7 @@ def stringify_command(parts):
710
770
711
771
712
772
def ssh_args (opts ):
713
- parts = ['-o' , 'StrictHostKeyChecking=no' ]
773
+ parts = ['-o' , 'StrictHostKeyChecking=no' , '-o' , 'UserKnownHostsFile=/dev/null' ]
714
774
if opts .identity_file is not None :
715
775
parts += ['-i' , opts .identity_file ]
716
776
return parts
@@ -898,7 +958,12 @@ def real_main():
898
958
899
959
elif action == "get-master" :
900
960
(master_nodes , slave_nodes ) = get_existing_cluster (conn , opts , cluster_name )
901
- print master_nodes [0 ].public_dns_name
961
+ master = master_nodes [0 ]
962
+ print master .public_dns_name or "No assigned IP"
963
+ if master .ip_address in ip_to_domain :
964
+ print "(" + ip_to_domain [master .ip_address ] + ")"
965
+ else :
966
+ print "(No associated domain)"
902
967
903
968
elif action == "stop" :
904
969
response = raw_input (
0 commit comments