@@ -30,9 +30,12 @@ use std::task::{Context, Poll};
3030
3131use arrow:: array:: ArrayData ;
3232use arrow:: datatypes:: { Schema , SchemaRef } ;
33+ use arrow:: ipc:: writer:: IpcWriteOptions ;
3334use arrow:: ipc:: { reader:: StreamReader , writer:: StreamWriter } ;
35+ use arrow:: ipc:: { CompressionType , MetadataVersion } ;
3436use arrow:: record_batch:: RecordBatch ;
3537
38+ use datafusion_common:: config:: SpillCompression ;
3639use datafusion_common:: { exec_datafusion_err, DataFusionError , HashSet , Result } ;
3740use datafusion_common_runtime:: SpawnedTask ;
3841use datafusion_execution:: disk_manager:: RefCountedTempFile ;
@@ -194,7 +197,8 @@ pub fn spill_record_batch_by_size(
194197) -> Result < ( ) > {
195198 let mut offset = 0 ;
196199 let total_rows = batch. num_rows ( ) ;
197- let mut writer = IPCStreamWriter :: new ( & path, schema. as_ref ( ) ) ?;
200+ let mut writer =
201+ IPCStreamWriter :: new ( & path, schema. as_ref ( ) , SpillCompression :: Uncompressed ) ?;
198202
199203 while offset < total_rows {
200204 let length = std:: cmp:: min ( total_rows - offset, batch_size_rows) ;
@@ -292,15 +296,28 @@ struct IPCStreamWriter {
292296
293297impl IPCStreamWriter {
294298 /// Create new writer
295- pub fn new ( path : & Path , schema : & Schema ) -> Result < Self > {
299+ pub fn new (
300+ path : & Path ,
301+ schema : & Schema ,
302+ compression_type : SpillCompression ,
303+ ) -> Result < Self > {
296304 let file = File :: create ( path) . map_err ( |e| {
297305 exec_datafusion_err ! ( "Failed to create partition file at {path:?}: {e:?}" )
298306 } ) ?;
307+
308+ // TODO what should be default metadata version & alignment?
309+ let metadata_version = MetadataVersion :: V5 ;
310+ let alignment = 8 ;
311+ let mut write_options =
312+ IpcWriteOptions :: try_new ( alignment, false , metadata_version) ?;
313+ write_options = write_options. try_with_compression ( compression_type. into ( ) ) ?;
314+
315+ let writer = StreamWriter :: try_new_with_options ( file, schema, write_options) ?;
299316 Ok ( Self {
300317 num_batches : 0 ,
301318 num_rows : 0 ,
302319 num_bytes : 0 ,
303- writer : StreamWriter :: try_new ( file , schema ) ? ,
320+ writer,
304321 } )
305322 }
306323
@@ -362,7 +379,12 @@ mod tests {
362379 // Construct SpillManager
363380 let env = Arc :: new ( RuntimeEnv :: default ( ) ) ;
364381 let metrics = SpillMetrics :: new ( & ExecutionPlanMetricsSet :: new ( ) , 0 ) ;
365- let spill_manager = SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ;
382+ let spill_manager = SpillManager :: new (
383+ env,
384+ metrics,
385+ Arc :: clone ( & schema) ,
386+ SpillCompression :: Uncompressed ,
387+ ) ;
366388
367389 let spill_file = spill_manager
368390 . spill_record_batch_and_finish ( & [ batch1, batch2] , "Test" ) ?
@@ -426,7 +448,12 @@ mod tests {
426448 // Construct SpillManager
427449 let env = Arc :: new ( RuntimeEnv :: default ( ) ) ;
428450 let metrics = SpillMetrics :: new ( & ExecutionPlanMetricsSet :: new ( ) , 0 ) ;
429- let spill_manager = SpillManager :: new ( env, metrics, Arc :: clone ( & dict_schema) ) ;
451+ let spill_manager = SpillManager :: new (
452+ env,
453+ metrics,
454+ Arc :: clone ( & dict_schema) ,
455+ SpillCompression :: Uncompressed ,
456+ ) ;
430457
431458 let num_rows = batch1. num_rows ( ) + batch2. num_rows ( ) ;
432459 let spill_file = spill_manager
@@ -454,7 +481,12 @@ mod tests {
454481 let schema = batch1. schema ( ) ;
455482 let env = Arc :: new ( RuntimeEnv :: default ( ) ) ;
456483 let metrics = SpillMetrics :: new ( & ExecutionPlanMetricsSet :: new ( ) , 0 ) ;
457- let spill_manager = SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ;
484+ let spill_manager = SpillManager :: new (
485+ env,
486+ metrics,
487+ Arc :: clone ( & schema) ,
488+ SpillCompression :: Uncompressed ,
489+ ) ;
458490
459491 let spill_file = spill_manager
460492 . spill_record_batch_by_size ( & batch1, "Test Spill" , 1 ) ?
@@ -608,7 +640,12 @@ mod tests {
608640 Field :: new( "b" , DataType :: Utf8 , false ) ,
609641 ] ) ) ;
610642
611- let spill_manager = SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ;
643+ let spill_manager = SpillManager :: new (
644+ env,
645+ metrics,
646+ Arc :: clone ( & schema) ,
647+ SpillCompression :: Uncompressed ,
648+ ) ;
612649
613650 let batch = RecordBatch :: try_new (
614651 schema,
@@ -665,8 +702,12 @@ mod tests {
665702 Field :: new( "b" , DataType :: Utf8 , false ) ,
666703 ] ) ) ;
667704
668- let spill_manager =
669- Arc :: new ( SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ) ;
705+ let spill_manager = Arc :: new ( SpillManager :: new (
706+ env,
707+ metrics,
708+ Arc :: clone ( & schema) ,
709+ SpillCompression :: Uncompressed ,
710+ ) ) ;
670711 let mut in_progress_file = spill_manager. create_in_progress_file ( "Test" ) ?;
671712
672713 let batch1 = RecordBatch :: try_new (
@@ -712,8 +753,12 @@ mod tests {
712753 Field :: new( "b" , DataType :: Utf8 , false ) ,
713754 ] ) ) ;
714755
715- let spill_manager =
716- Arc :: new ( SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ) ;
756+ let spill_manager = Arc :: new ( SpillManager :: new (
757+ env,
758+ metrics,
759+ Arc :: clone ( & schema) ,
760+ SpillCompression :: Uncompressed ,
761+ ) ) ;
717762
718763 // Test write empty batch with interface `InProgressSpillFile` and `append_batch()`
719764 let mut in_progress_file = spill_manager. create_in_progress_file ( "Test" ) ?;
@@ -758,7 +803,12 @@ mod tests {
758803 // Construct SpillManager
759804 let env = Arc :: new ( RuntimeEnv :: default ( ) ) ;
760805 let metrics = SpillMetrics :: new ( & ExecutionPlanMetricsSet :: new ( ) , 0 ) ;
761- let spill_manager = SpillManager :: new ( env, metrics, Arc :: clone ( & schema) ) ;
806+ let spill_manager = SpillManager :: new (
807+ env,
808+ metrics,
809+ Arc :: clone ( & schema) ,
810+ SpillCompression :: Uncompressed ,
811+ ) ;
762812 let batches: [ _ ; 10 ] = std:: array:: from_fn ( |_| batch. clone ( ) ) ;
763813
764814 let spill_file_1 = spill_manager
0 commit comments