Skip to content

Commit 4877315

Browse files
authored
Merge pull request #3427 from tswast/bq-3416
Loading AVRO files from local filesystem.
2 parents d833259 + 7945194 commit 4877315

File tree

3 files changed

+49
-3
lines changed

3 files changed

+49
-3
lines changed

bigquery/google/cloud/bigquery/table.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -917,9 +917,6 @@ def upload_from_file(self,
917917
'configuration': {
918918
'load': {
919919
'sourceFormat': source_format,
920-
'schema': {
921-
'fields': _build_schema_resource(self._schema),
922-
},
923920
'destinationTable': {
924921
'projectId': self._dataset.project,
925922
'datasetId': self._dataset.name,
@@ -929,6 +926,12 @@ def upload_from_file(self,
929926
}
930927
}
931928

929+
if len(self._schema) > 0:
930+
load_config = metadata['configuration']['load']
931+
load_config['schema'] = {
932+
'fields': _build_schema_resource(self._schema)
933+
}
934+
932935
_configure_job_metadata(metadata, allow_jagged_rows,
933936
allow_quoted_newlines, create_disposition,
934937
encoding, field_delimiter,

bigquery/tests/data/colors.avro

308 Bytes
Binary file not shown.

bigquery/tests/system.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,49 @@ def _job_done(instance):
390390
self.assertEqual(sorted(rows, key=by_age),
391391
sorted(ROWS, key=by_age))
392392

393+
def test_load_table_from_local_avro_file_then_dump_table(self):
394+
TABLE_NAME = 'test_table_avro'
395+
ROWS = [
396+
("violet", 400),
397+
("indigo", 445),
398+
("blue", 475),
399+
("green", 510),
400+
("yellow", 570),
401+
("orange", 590),
402+
("red", 650)]
403+
404+
dataset = Config.CLIENT.dataset(
405+
_make_dataset_name('load_local_then_dump'))
406+
407+
retry_403(dataset.create)()
408+
self.to_delete.append(dataset)
409+
410+
table = dataset.table(TABLE_NAME)
411+
self.to_delete.insert(0, table)
412+
413+
with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof:
414+
job = table.upload_from_file(
415+
avrof,
416+
source_format='AVRO',
417+
write_disposition='WRITE_TRUNCATE'
418+
)
419+
420+
def _job_done(instance):
421+
return instance.state.lower() == 'done'
422+
423+
# Retry until done.
424+
retry = RetryInstanceState(_job_done, max_tries=8)
425+
retry(job.reload)()
426+
427+
self.assertEqual(job.output_rows, len(ROWS))
428+
429+
# Reload table to get the schema before fetching the rows.
430+
table.reload()
431+
rows = self._fetch_single_page(table)
432+
by_wavelength = operator.itemgetter(1)
433+
self.assertEqual(sorted(rows, key=by_wavelength),
434+
sorted(ROWS, key=by_wavelength))
435+
393436
def test_load_table_from_storage_then_dump_table(self):
394437
import csv
395438
from google.cloud._testing import _NamedTemporaryFile

0 commit comments

Comments
 (0)