2020//! partition is re-partitioned and streamed to disk in Arrow IPC format. Future stages of the query
2121//! will use the ShuffleReaderExec to read these results.
2222
23+ use std:: fs:: File ;
2324use std:: iter:: Iterator ;
2425use std:: path:: PathBuf ;
2526use std:: sync:: { Arc , Mutex } ;
@@ -43,11 +44,11 @@ use datafusion::arrow::record_batch::RecordBatch;
4344use datafusion:: error:: { DataFusionError , Result } ;
4445use datafusion:: physical_plan:: hash_join:: create_hashes;
4546use datafusion:: physical_plan:: {
46- DisplayFormatType , ExecutionPlan , Partitioning , RecordBatchStream ,
47+ DisplayFormatType , ExecutionPlan , Partitioning , RecordBatchStream , SQLMetric ,
4748} ;
4849use futures:: StreamExt ;
50+ use hashbrown:: HashMap ;
4951use log:: info;
50- use std:: fs:: File ;
5152use uuid:: Uuid ;
5253
5354/// ShuffleWriterExec represents a section of a query plan that has consistent partitioning and
@@ -66,6 +67,22 @@ pub struct ShuffleWriterExec {
6667 work_dir : String ,
6768 /// Optional shuffle output partitioning
6869 shuffle_output_partitioning : Option < Partitioning > ,
70+ /// Shuffle write metrics
71+ metrics : ShuffleWriteMetrics ,
72+ }
73+
74+ #[ derive( Debug , Clone ) ]
75+ struct ShuffleWriteMetrics {
76+ /// Time spend writing batches to shuffle files
77+ write_time : Arc < SQLMetric > ,
78+ }
79+
80+ impl ShuffleWriteMetrics {
81+ fn new ( ) -> Self {
82+ Self {
83+ write_time : SQLMetric :: time_nanos ( ) ,
84+ }
85+ }
6986}
7087
7188impl ShuffleWriterExec {
@@ -83,6 +100,7 @@ impl ShuffleWriterExec {
83100 plan,
84101 work_dir,
85102 shuffle_output_partitioning,
103+ metrics : ShuffleWriteMetrics :: new ( ) ,
86104 } )
87105 }
88106
@@ -150,12 +168,16 @@ impl ExecutionPlan for ShuffleWriterExec {
150168 info ! ( "Writing results to {}" , path) ;
151169
152170 // stream results to disk
153- let stats = utils:: write_stream_to_disk ( & mut stream, path)
154- . await
155- . map_err ( |e| DataFusionError :: Execution ( format ! ( "{:?}" , e) ) ) ?;
171+ let stats = utils:: write_stream_to_disk (
172+ & mut stream,
173+ path,
174+ self . metrics . write_time . clone ( ) ,
175+ )
176+ . await
177+ . map_err ( |e| DataFusionError :: Execution ( format ! ( "{:?}" , e) ) ) ?;
156178
157179 info ! (
158- "Executed partition {} in {} seconds. Statistics: {:? }" ,
180+ "Executed partition {} in {} seconds. Statistics: {}" ,
159181 partition,
160182 now. elapsed( ) . as_secs( ) ,
161183 stats
@@ -231,6 +253,7 @@ impl ExecutionPlan for ShuffleWriterExec {
231253 RecordBatch :: try_new ( input_batch. schema ( ) , columns) ?;
232254
233255 // write batch out
256+ let start = Instant :: now ( ) ;
234257 match & mut writers[ num_output_partition] {
235258 Some ( w) => {
236259 w. write ( & output_batch) ?;
@@ -251,6 +274,7 @@ impl ExecutionPlan for ShuffleWriterExec {
251274 writers[ num_output_partition] = Some ( writer) ;
252275 }
253276 }
277+ self . metrics . write_time . add_elapsed ( start) ;
254278 }
255279 }
256280
@@ -310,6 +334,12 @@ impl ExecutionPlan for ShuffleWriterExec {
310334 }
311335 }
312336
337+ fn metrics ( & self ) -> HashMap < String , SQLMetric > {
338+ let mut metrics = HashMap :: new ( ) ;
339+ metrics. insert ( "writeTime" . to_owned ( ) , ( * self . metrics . write_time ) . clone ( ) ) ;
340+ metrics
341+ }
342+
313343 fn fmt_as (
314344 & self ,
315345 t : DisplayFormatType ,
0 commit comments