@@ -34,14 +34,11 @@ use crate::utils;
3434use crate :: serde:: protobuf:: ShuffleWritePartition ;
3535use crate :: serde:: scheduler:: { PartitionLocation , PartitionStats } ;
3636use async_trait:: async_trait;
37- use datafusion:: arrow:: array:: {
38- Array , ArrayBuilder , ArrayRef , StringBuilder , StructBuilder , UInt32Builder ,
39- UInt64Builder ,
40- } ;
37+ use datafusion:: arrow:: array:: * ;
4138use datafusion:: arrow:: compute:: take;
4239use datafusion:: arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
43- use datafusion:: arrow:: ipc:: reader :: FileReader ;
44- use datafusion:: arrow:: ipc:: writer :: FileWriter ;
40+ use datafusion:: arrow:: io :: ipc:: read :: FileReader ;
41+ use datafusion:: arrow:: io :: ipc:: write :: FileWriter ;
4542use datafusion:: arrow:: record_batch:: RecordBatch ;
4643use datafusion:: error:: { DataFusionError , Result } ;
4744use datafusion:: physical_plan:: hash_utils:: create_hashes;
@@ -244,7 +241,7 @@ impl ShuffleWriterExec {
244241 . collect :: < Result < Vec < Arc < dyn Array > > > > ( ) ?;
245242
246243 let output_batch =
247- RecordBatch :: try_new ( input_batch. schema ( ) , columns) ?;
244+ RecordBatch :: try_new ( input_batch. schema ( ) . clone ( ) , columns) ?;
248245
249246 // write non-empty batch out
250247
@@ -356,18 +353,18 @@ impl ExecutionPlan for ShuffleWriterExec {
356353
357354 // build metadata result batch
358355 let num_writers = part_loc. len ( ) ;
359- let mut partition_builder = UInt32Builder :: new ( num_writers) ;
360- let mut path_builder = StringBuilder :: new ( num_writers) ;
361- let mut num_rows_builder = UInt64Builder :: new ( num_writers) ;
362- let mut num_batches_builder = UInt64Builder :: new ( num_writers) ;
363- let mut num_bytes_builder = UInt64Builder :: new ( num_writers) ;
356+ let mut partition_builder = UInt32Vec :: with_capacity ( num_writers) ;
357+ let mut path_builder = MutableUtf8Array :: with_capacity ( num_writers) ;
358+ let mut num_rows_builder = UInt64Vec :: with_capacity ( num_writers) ;
359+ let mut num_batches_builder = UInt64Vec :: with_capacity ( num_writers) ;
360+ let mut num_bytes_builder = UInt64Vec :: with_capacity ( num_writers) ;
364361
365362 for loc in & part_loc {
366- path_builder. append_value ( loc. path . clone ( ) ) ? ;
367- partition_builder. append_value ( loc. partition_id as u32 ) ? ;
368- num_rows_builder. append_value ( loc. num_rows ) ? ;
369- num_batches_builder. append_value ( loc. num_batches ) ? ;
370- num_bytes_builder. append_value ( loc. num_bytes ) ? ;
363+ path_builder. push ( Some ( loc. path . clone ( ) ) ) ;
364+ partition_builder. push ( Some ( loc. partition_id as u32 ) ) ;
365+ num_rows_builder. push ( Some ( loc. num_rows ) ) ;
366+ num_batches_builder. push ( Some ( loc. num_batches ) ) ;
367+ num_bytes_builder. push ( Some ( loc. num_bytes ) ) ;
371368 }
372369
373370 // build arrays
@@ -428,17 +425,17 @@ fn result_schema() -> SchemaRef {
428425 ] ) )
429426}
430427
431- struct ShuffleWriter {
428+ struct ShuffleWriter < ' a > {
432429 path : String ,
433- writer : FileWriter < File > ,
430+ writer : FileWriter < ' a , File > ,
434431 num_batches : u64 ,
435432 num_rows : u64 ,
436433 num_bytes : u64 ,
437434}
438435
439- impl ShuffleWriter {
436+ impl < ' a > ShuffleWriter < ' a > {
440437 fn new ( path : & str , schema : & Schema ) -> Result < Self > {
441- let file = File :: create ( path)
438+ let mut file = File :: create ( path)
442439 . map_err ( |e| {
443440 BallistaError :: General ( format ! (
444441 "Failed to create partition file at {}: {:?}" ,
@@ -451,7 +448,7 @@ impl ShuffleWriter {
451448 num_rows : 0 ,
452449 num_bytes : 0 ,
453450 path : path. to_owned ( ) ,
454- writer : FileWriter :: try_new ( file, schema) ?,
451+ writer : FileWriter :: try_new ( & mut file, schema) ?,
455452 } )
456453 }
457454
@@ -480,7 +477,7 @@ impl ShuffleWriter {
480477#[ cfg( test) ]
481478mod tests {
482479 use super :: * ;
483- use datafusion:: arrow:: array:: { StringArray , StructArray , UInt32Array , UInt64Array } ;
480+ use datafusion:: arrow:: array:: { Utf8Array , StructArray , UInt32Array , UInt64Array } ;
484481 use datafusion:: physical_plan:: coalesce_partitions:: CoalescePartitionsExec ;
485482 use datafusion:: physical_plan:: expressions:: Column ;
486483 use datafusion:: physical_plan:: limit:: GlobalLimitExec ;
0 commit comments