Skip to content

Commit 19d830f

Browse files
author
Jonathan Wayne Parrott
committed
Merge pull request #138 from hraban/bq-export-gzip-flag
Allow gzip exported bigquery CSV with -z flag
2 parents 281a551 + ff11f29 commit 19d830f

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

bigquery/api/export_data_to_cloud_storage.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
def export_table(bigquery, cloud_storage_path,
3636
project_id, dataset_id, table_id,
3737
export_format="CSV",
38-
num_retries=5):
38+
num_retries=5,
39+
compression="NONE"):
3940
"""
4041
Starts an export job
4142
@@ -47,6 +48,8 @@ def export_table(bigquery, cloud_storage_path,
4748
e.g. gs://mybucket/myfolder/
4849
export_format: format to export in;
4950
"CSV", "NEWLINE_DELIMITED_JSON", or "AVRO".
51+
compression: format to compress results with,
52+
"NONE" (default) or "GZIP".
5053
5154
Returns: an extract job resource representing the
5255
job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs
@@ -66,7 +69,8 @@ def export_table(bigquery, cloud_storage_path,
6669
'tableId': table_id,
6770
},
6871
'destinationUris': [cloud_storage_path],
69-
'destinationFormat': export_format
72+
'destinationFormat': export_format,
73+
'compression': compression
7074
}
7175
}
7276
}
@@ -101,7 +105,7 @@ def poll_job(bigquery, job):
101105

102106
# [START run]
103107
def main(cloud_storage_path, project_id, dataset_id, table_id,
104-
num_retries, interval, export_format="CSV"):
108+
num_retries, interval, export_format="CSV", compression="NONE"):
105109
# [START build_service]
106110
# Grab the application's default credentials from the environment.
107111
credentials = GoogleCredentials.get_application_default()
@@ -117,7 +121,8 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
117121
dataset_id,
118122
table_id,
119123
num_retries=num_retries,
120-
export_format=export_format)
124+
export_format=export_format,
125+
compression=compression)
121126
poll_job(bigquery, job)
122127
# [END run]
123128

@@ -144,6 +149,11 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
144149
help='Number of times to retry in case of 500 error.',
145150
type=int,
146151
default=5)
152+
parser.add_argument(
153+
'-z', '--gzip',
154+
help='compress resultset with gzip',
155+
action='store_true',
156+
default=False)
147157

148158
args = parser.parse_args()
149159

@@ -153,5 +163,6 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
153163
args.dataset_id,
154164
args.table_id,
155165
args.num_retries,
156-
args.poll_interval)
166+
args.poll_interval,
167+
compression="GZIP" if args.gzip else "NONE")
157168
# [END main]

0 commit comments

Comments
 (0)