Skip to content

Commit

Permalink
fix(components): remove needless arguments from AWS EMR scripts (#4252)
Browse files Browse the repository at this point in the history
remove needless submit_pyspark_job arguments
  • Loading branch information
slenky committed Aug 10, 2020
1 parent 57bb7ac commit 262b288
Showing 1 changed file with 19 additions and 12 deletions.
31 changes: 19 additions & 12 deletions components/aws/emr/common/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,23 @@ def get_client(region=None):
client = boto3.client('emr', region_name=region)
return client

def create_cluster(client, cluster_name, log_s3_uri, release_label, instance_type, instance_count):
def create_cluster(client, cluster_name, log_s3_uri, release_label, instance_type, instance_count, ec2SubnetId=None, ec2KeyName=None):
"""Create a EMR cluster."""

instances = {
'MasterInstanceType': instance_type,
'SlaveInstanceType': instance_type,
'InstanceCount': instance_count,
'KeepJobFlowAliveWhenNoSteps':True,
'TerminationProtected':False
}

if ec2SubnetId is not None:
instances['Ec2SubnetId'] = ec2SubnetId

if ec2KeyName is not None:
instances['Ec2KeyName'] = ec2KeyName

response = client.run_job_flow(
Name=cluster_name,
LogUri=log_s3_uri,
Expand All @@ -45,14 +60,7 @@ def create_cluster(client, cluster_name, log_s3_uri, release_label, instance_typ
}
},
],
Instances= {
'MasterInstanceType': instance_type,
'SlaveInstanceType': instance_type,
'InstanceCount': instance_count,
'KeepJobFlowAliveWhenNoSteps':True,
'TerminationProtected':False,

},
Instances= instances,
VisibleToAllUsers=True,
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
Expand Down Expand Up @@ -142,6 +150,5 @@ def wait_for_job(client, jobflow_id, step_id):
def submit_pyspark_job(client, jobflow_id, job_name, py_file, extra_args):
"""Submits single spark job to a running cluster"""

pyspark_args = ['spark-submit', py_file]
pyspark_args.extend(extra_args)
return submit_spark_job(client, jobflow_id, job_name, 'command-runner.jar', '', pyspark_args)
pyspark_args = [py_file, extra_args]
return submit_spark_job(client, jobflow_id, job_name, '', '', pyspark_args)

0 comments on commit 262b288

Please sign in to comment.