Skip to content

Commit 6e56897

Browse files
authored
Add setting for statistics collection (#3846)
* Add setting for statistics collection * fmt
1 parent a0d6b2f commit 6e56897

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

datafusion/core/src/execution/context.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ impl SessionContext {
484484
};
485485
let options = ListingOptions {
486486
format: file_format,
487-
collect_stat: false,
487+
collect_stat: self.copied_config().collect_statistics,
488488
file_extension: file_extension.to_owned(),
489489
target_partitions: self.copied_config().target_partitions,
490490
table_partition_cols: cmd.table_partition_cols.clone(),
@@ -1085,6 +1085,8 @@ pub const REPARTITION_AGGREGATIONS: &str = "repartition_aggregations";
10851085
pub const REPARTITION_WINDOWS: &str = "repartition_windows";
10861086
/// Session Configuration entry name for 'PARQUET_PRUNING'
10871087
pub const PARQUET_PRUNING: &str = "parquet_pruning";
1088+
/// Session Configuration entry name for 'COLLECT_STATISTICS'
1089+
pub const COLLECT_STATISTICS: &str = "collect_statistics";
10881090

10891091
/// Map that holds opaque objects indexed by their type.
10901092
///
@@ -1142,6 +1144,8 @@ pub struct SessionConfig {
11421144
pub repartition_windows: bool,
11431145
/// Should DataFusion parquet reader using the predicate to prune data
11441146
pub parquet_pruning: bool,
1147+
/// Should DataFusion collect statistics after listing files
1148+
pub collect_statistics: bool,
11451149
/// Configuration options
11461150
pub config_options: Arc<RwLock<ConfigOptions>>,
11471151
/// Opaque extensions.
@@ -1160,6 +1164,7 @@ impl Default for SessionConfig {
11601164
repartition_aggregations: true,
11611165
repartition_windows: true,
11621166
parquet_pruning: true,
1167+
collect_statistics: false,
11631168
config_options: Arc::new(RwLock::new(ConfigOptions::new())),
11641169
// Assume no extensions by default.
11651170
extensions: HashMap::with_capacity_and_hasher(
@@ -1262,6 +1267,12 @@ impl SessionConfig {
12621267
self
12631268
}
12641269

1270+
/// Enables or disables the collection of statistics after listing files
1271+
pub fn with_collect_statistics(mut self, enabled: bool) -> Self {
1272+
self.collect_statistics = enabled;
1273+
self
1274+
}
1275+
12651276
/// Get the currently configured batch size
12661277
pub fn batch_size(&self) -> usize {
12671278
self.config_options
@@ -1305,6 +1316,11 @@ impl SessionConfig {
13051316
PARQUET_PRUNING.to_owned(),
13061317
format!("{}", self.parquet_pruning),
13071318
);
1319+
map.insert(
1320+
COLLECT_STATISTICS.to_owned(),
1321+
format!("{}", self.collect_statistics),
1322+
);
1323+
13081324
map
13091325
}
13101326

@@ -1764,6 +1780,9 @@ impl TaskContext {
17641780
.with_parquet_pruning(
17651781
props.get(PARQUET_PRUNING).unwrap().parse().unwrap(),
17661782
)
1783+
.with_collect_statistics(
1784+
props.get(COLLECT_STATISTICS).unwrap().parse().unwrap(),
1785+
)
17671786
}
17681787
}
17691788
TaskProperties::SessionConfig(session_config) => session_config.clone(),

0 commit comments

Comments
 (0)