diff --git a/dataproc/quickstart/quickstart.py b/dataproc/quickstart/quickstart.py index fcbda8827d3b..f5709e54547b 100644 --- a/dataproc/quickstart/quickstart.py +++ b/dataproc/quickstart/quickstart.py @@ -15,6 +15,17 @@ # limitations under the License. # [START dataproc_quickstart] +"""This quickstart sample walks a user through creating a Cloud Dataproc + cluster, submitting a PySpark job from Google Cloud Storage to the + cluster, reading the output of the job and deleting the cluster, all + using the Python client library. + + Usage: + python3 quickstart.py --project_id --region \ + --cluster_name --job_file_path +""" + +import argparse import time from google.cloud import dataproc_v1 as dataproc @@ -22,18 +33,6 @@ def quickstart(project_id, region, cluster_name, job_file_path): - """This quickstart sample walks a user through creating a Cloud Dataproc - cluster, submitting a PySpark job from Google Cloud Storage to the - cluster, reading the output of the job and deleting the cluster, all - using the Python client library. - - Args: - project_id (string): Project to use for creating resources. - region (string): Region where the resources should live. - cluster_name (string): Name to use for creating a cluster. - job_file_path (string): Job in GCS to execute against the cluster. - """ - # Create the cluster client. cluster_client = dataproc.ClusterControllerClient(client_options={ 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) @@ -125,4 +124,23 @@ def quickstart(project_id, region, cluster_name, job_file_path): operation.result() print('Cluster {} successfully deleted.'.format(cluster_name)) - # [END dataproc_quickstart] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument('--project_id', type=str, + help='Project to use for creating resources.') + parser.add_argument('--region', type=str, + help='Region where the resources should live.') + parser.add_argument('--cluster_name', type=str, + help='Name to use for creating a cluster') + parser.add_argument('--job_file_path', type=str, + help='Job in GCS to execute against the cluster.') + + args = parser.parse_args() + quickstart(args.project_id, args.region, + args.cluster_name, args.job_file_path) +# [END dataproc_quickstart]