@@ -418,7 +418,8 @@ class WriteQueue {
418418
419419 ARROW_ASSIGN_OR_RAISE (
420420 writer_, write_options.format ()->MakeWriter (std::move (destination), schema_,
421- write_options.file_write_options ));
421+ write_options.file_write_options ,
422+ {write_options.filesystem , path}));
422423 return Status::OK ();
423424 }
424425
@@ -445,15 +446,15 @@ struct WriteState {
445446 std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
446447};
447448
448- Status WriteNextBatch (WriteState& state, const std::shared_ptr<Fragment>& fragment,
449+ Status WriteNextBatch (WriteState* state, const std::shared_ptr<Fragment>& fragment,
449450 std::shared_ptr<RecordBatch> batch) {
450- ARROW_ASSIGN_OR_RAISE (auto groups, state. write_options .partitioning ->Partition (batch));
451+ ARROW_ASSIGN_OR_RAISE (auto groups, state-> write_options .partitioning ->Partition (batch));
451452 batch.reset (); // drop to hopefully conserve memory
452453
453- if (groups.batches .size () > static_cast <size_t >(state. write_options .max_partitions )) {
454+ if (groups.batches .size () > static_cast <size_t >(state-> write_options .max_partitions )) {
454455 return Status::Invalid (" Fragment would be written into " , groups.batches .size (),
455456 " partitions. This exceeds the maximum of " ,
456- state. write_options .max_partitions );
457+ state-> write_options .max_partitions );
457458 }
458459
459460 std::unordered_set<WriteQueue*> need_flushed;
@@ -462,20 +463,20 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragme
462463 and_ (std::move (groups.expressions [i]), fragment->partition_expression ());
463464 auto batch = std::move (groups.batches [i]);
464465
465- ARROW_ASSIGN_OR_RAISE (auto part,
466- state. write_options .partitioning ->Format (partition_expression));
466+ ARROW_ASSIGN_OR_RAISE (
467+ auto part, state-> write_options .partitioning ->Format (partition_expression));
467468
468469 WriteQueue* queue;
469470 {
470471 // lookup the queue to which batch should be appended
471- auto queues_lock = state. mutex .Lock ();
472+ auto queues_lock = state-> mutex .Lock ();
472473
473474 queue = internal::GetOrInsertGenerated (
474- &state. queues , std::move (part),
475+ &state-> queues , std::move (part),
475476 [&](const std::string& emplaced_part) {
476477 // lookup in `queues` also failed,
477478 // generate a new WriteQueue
478- size_t queue_index = state. queues .size () - 1 ;
479+ size_t queue_index = state-> queues .size () - 1 ;
479480
480481 return internal::make_unique<WriteQueue>(emplaced_part, queue_index,
481482 batch->schema ());
@@ -489,12 +490,12 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragme
489490
490491 // flush all touched WriteQueues
491492 for (auto queue : need_flushed) {
492- RETURN_NOT_OK (queue->Flush (state. write_options ));
493+ RETURN_NOT_OK (queue->Flush (state-> write_options ));
493494 }
494495 return Status::OK ();
495496}
496497
497- Status WriteInternal (const ScanOptions& scan_options, WriteState& state,
498+ Status WriteInternal (const ScanOptions& scan_options, WriteState* state,
498499 ScanTaskVector scan_tasks) {
499500 // Store a mapping from partitions (represened by their formatted partition expressions)
500501 // to a WriteQueue which flushes batches into that partition's output file. In principle
@@ -544,7 +545,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
544545#pragma warning(disable : 4996)
545546#endif
546547
547- // TODO: (ARROW-11782/ARROW-12288) Remove calls to Scan()
548+ // TODO(ARROW-11782/ARROW-12288) Remove calls to Scan()
548549 ARROW_ASSIGN_OR_RAISE (auto scan_task_it, scanner->Scan ());
549550 ARROW_ASSIGN_OR_RAISE (ScanTaskVector scan_tasks, scan_task_it.ToVector ());
550551
@@ -555,11 +556,14 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
555556#endif
556557
557558 WriteState state (write_options);
558- RETURN_NOT_OK (WriteInternal (*scanner->options (), state, std::move (scan_tasks)));
559+ RETURN_NOT_OK (WriteInternal (*scanner->options (), & state, std::move (scan_tasks)));
559560
560561 auto task_group = scanner->options ()->TaskGroup ();
561562 for (const auto & part_queue : state.queues ) {
562- task_group->Append ([&] { return part_queue.second ->writer ()->Finish (); });
563+ task_group->Append ([&] {
564+ RETURN_NOT_OK (write_options.writer_pre_finish (part_queue.second ->writer ().get ()));
565+ return part_queue.second ->writer ()->Finish ();
566+ });
563567 }
564568 return task_group->Finish ();
565569}
0 commit comments