@@ -484,7 +484,7 @@ impl SessionContext {
484484 } ;
485485 let options = ListingOptions {
486486 format : file_format,
487- collect_stat : false ,
487+ collect_stat : self . copied_config ( ) . collect_statistics ,
488488 file_extension : file_extension. to_owned ( ) ,
489489 target_partitions : self . copied_config ( ) . target_partitions ,
490490 table_partition_cols : cmd. table_partition_cols . clone ( ) ,
@@ -1085,6 +1085,8 @@ pub const REPARTITION_AGGREGATIONS: &str = "repartition_aggregations";
10851085pub const REPARTITION_WINDOWS : & str = "repartition_windows" ;
10861086/// Session Configuration entry name for 'PARQUET_PRUNING'
10871087pub const PARQUET_PRUNING : & str = "parquet_pruning" ;
1088+ /// Session Configuration entry name for 'COLLECT_STATISTICS'
1089+ pub const COLLECT_STATISTICS : & str = "collect_statistics" ;
10881090
10891091/// Map that holds opaque objects indexed by their type.
10901092///
@@ -1142,6 +1144,8 @@ pub struct SessionConfig {
11421144 pub repartition_windows : bool ,
11431145 /// Should DataFusion parquet reader using the predicate to prune data
11441146 pub parquet_pruning : bool ,
1147+ /// Should DataFusion collect statistics after listing files
1148+ pub collect_statistics : bool ,
11451149 /// Configuration options
11461150 pub config_options : Arc < RwLock < ConfigOptions > > ,
11471151 /// Opaque extensions.
@@ -1160,6 +1164,7 @@ impl Default for SessionConfig {
11601164 repartition_aggregations : true ,
11611165 repartition_windows : true ,
11621166 parquet_pruning : true ,
1167+ collect_statistics : false ,
11631168 config_options : Arc :: new ( RwLock :: new ( ConfigOptions :: new ( ) ) ) ,
11641169 // Assume no extensions by default.
11651170 extensions : HashMap :: with_capacity_and_hasher (
@@ -1262,6 +1267,12 @@ impl SessionConfig {
12621267 self
12631268 }
12641269
1270+ /// Enables or disables the collection of statistics after listing files
1271+ pub fn with_collect_statistics ( mut self , enabled : bool ) -> Self {
1272+ self . collect_statistics = enabled;
1273+ self
1274+ }
1275+
12651276 /// Get the currently configured batch size
12661277 pub fn batch_size ( & self ) -> usize {
12671278 self . config_options
@@ -1305,6 +1316,11 @@ impl SessionConfig {
13051316 PARQUET_PRUNING . to_owned ( ) ,
13061317 format ! ( "{}" , self . parquet_pruning) ,
13071318 ) ;
1319+ map. insert (
1320+ COLLECT_STATISTICS . to_owned ( ) ,
1321+ format ! ( "{}" , self . collect_statistics) ,
1322+ ) ;
1323+
13081324 map
13091325 }
13101326
@@ -1764,6 +1780,9 @@ impl TaskContext {
17641780 . with_parquet_pruning (
17651781 props. get ( PARQUET_PRUNING ) . unwrap ( ) . parse ( ) . unwrap ( ) ,
17661782 )
1783+ . with_collect_statistics (
1784+ props. get ( COLLECT_STATISTICS ) . unwrap ( ) . parse ( ) . unwrap ( ) ,
1785+ )
17671786 }
17681787 }
17691788 TaskProperties :: SessionConfig ( session_config) => session_config. clone ( ) ,
0 commit comments