|
49 | 49 | import google.cloud.bigquery as bigquery |
50 | 50 | import google.cloud.bigquery.table |
51 | 51 | from google.cloud.bigquery_storage_v1 import types as bq_storage_types |
| 52 | +from google.cloud.bigquery_storage_v1 import writer as bq_storage_writer |
52 | 53 | import pandas |
53 | 54 | import pyarrow as pa |
54 | 55 |
|
@@ -488,31 +489,29 @@ def stream_worker(work: Iterator[pa.RecordBatch]) -> str: |
488 | 489 | stream = self._write_client.create_write_stream( |
489 | 490 | parent=parent, write_stream=requested_stream |
490 | 491 | ) |
491 | | - stream_name = stream.name |
492 | | - |
493 | | - def request_generator(): |
494 | | - current_offset = 0 |
495 | | - for batch in work: |
496 | | - request = bq_storage_types.AppendRowsRequest( |
497 | | - write_stream=stream.name, offset=current_offset |
498 | | - ) |
| 492 | + base_request = bq_storage_types.AppendRowsRequest( |
| 493 | + write_stream=stream.name, |
| 494 | + ) |
| 495 | + base_request.arrow_rows.writer_schema.serialized_schema = serialized_schema |
499 | 496 |
|
500 | | - request.arrow_rows.writer_schema.serialized_schema = ( |
501 | | - serialized_schema |
502 | | - ) |
503 | | - request.arrow_rows.rows.serialized_record_batch = ( |
504 | | - batch.serialize().to_pybytes() |
505 | | - ) |
| 497 | + stream_manager = bq_storage_writer.AppendRowsStream( |
| 498 | + client=self._write_client, initial_request_template=base_request |
| 499 | + ) |
| 500 | + stream_name = stream.name |
| 501 | + current_offset = 0 |
| 502 | + futures: list[bq_storage_writer.AppendRowsFuture] = [] |
| 503 | + for batch in work: |
| 504 | + request = bq_storage_types.AppendRowsRequest(offset=current_offset) |
| 505 | + request.arrow_rows.rows.serialized_record_batch = ( |
| 506 | + batch.serialize().to_pybytes() |
| 507 | + ) |
506 | 508 |
|
507 | | - yield request |
508 | | - current_offset += batch.num_rows |
| 509 | + futures.append(stream_manager.send(request)) |
| 510 | + current_offset += batch.num_rows |
| 511 | + for future in futures: |
| 512 | + future.result() |
509 | 513 |
|
510 | | - responses = self._write_client.append_rows(requests=request_generator()) |
511 | | - for resp in responses: |
512 | | - if resp.row_errors: |
513 | | - raise ValueError( |
514 | | - f"Errors in stream {stream_name}: {resp.row_errors}" |
515 | | - ) |
| 514 | + stream_manager.close() |
516 | 515 | self._write_client.finalize_write_stream(name=stream_name) |
517 | 516 | return stream_name |
518 | 517 |
|
|
0 commit comments