Skip to content

Commit 308a442

Browse files
committed
Consolidate remaining parquet config options into ConfigOptions
1 parent 6d44791 commit 308a442

File tree

12 files changed

+175
-105
lines changed

12 files changed

+175
-105
lines changed

datafusion-examples/examples/flight_server.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,12 @@ impl FlightService for FlightServiceImpl {
6767
) -> Result<Response<SchemaResult>, Status> {
6868
let request = request.into_inner();
6969

70-
let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
70+
let ctx = SessionContext::new();
71+
let format = Arc::new(ParquetFormat::new(ctx.config_options()));
72+
let listing_options = ListingOptions::new(format);
7173
let table_path =
7274
ListingTableUrl::parse(&request.path[0]).map_err(to_tonic_err)?;
7375

74-
let ctx = SessionContext::new();
7576
let schema = listing_options
7677
.infer_schema(&ctx.state(), &table_path)
7778
.await

datafusion-examples/examples/parquet_sql_multiple_files.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async fn main() -> Result<()> {
3232
let testdata = datafusion::test_util::parquet_test_data();
3333

3434
// Configure listing options
35-
let file_format = ParquetFormat::default().with_enable_pruning(true);
35+
let file_format = ParquetFormat::new(ctx.config_options());
3636
let listing_options = ListingOptions {
3737
file_extension: FileType::PARQUET.get_ext(),
3838
format: Arc::new(file_format),

datafusion/core/src/config.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ pub const OPT_PARQUET_REORDER_FILTERS: &str =
6060
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
6161
"datafusion.execution.parquet.enable_page_index";
6262

63+
/// Configuration option "datafusion.execution.parquet.pruning"
64+
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";
65+
66+
/// Configuration option "datafusion.execution.parquet.skip_metadata"
67+
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";
68+
69+
/// Configuration option "datafusion.execution.parquet.metadata_size_hint"
70+
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
71+
"datafusion.execution.parquet.metadata_size_hint";
72+
6373
/// Configuration option "datafusion.optimizer.skip_failed_rules"
6474
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
6575
"datafusion.optimizer.skip_failed_rules";
@@ -237,6 +247,28 @@ impl BuiltInConfigs {
237247
to reduce the number of rows decoded.",
238248
false,
239249
),
250+
ConfigDefinition::new_bool(
251+
OPT_PARQUET_ENABLE_PRUNING,
252+
"If true, the parquet reader attempts to skip entire row groups based \
253+
on the predicate in the query.",
254+
true,
255+
),
256+
ConfigDefinition::new_bool(
257+
OPT_PARQUET_SKIP_METADATA,
258+
"If true, the parquet reader skip the optional embedded metadata that may be in \
259+
the file Schema. This setting can help avoid schema conflicts when querying \
260+
multiple parquet files with schemas containing compatible types but different metadata.",
261+
true,
262+
),
263+
ConfigDefinition::new(
264+
OPT_PARQUET_METADATA_SIZE_HINT,
265+
"If specified, the parquet reader will try and fetch the last `size_hint` \
266+
bytes of the parquet file optimistically. If not specified, two read are required: \
267+
One read to fetch the 8-byte parquet footer and \
268+
another to fetch the metadata length encoded in the footer.",
269+
DataType::Boolean,
270+
ScalarValue::Boolean(None),
271+
),
240272
ConfigDefinition::new_bool(
241273
OPT_OPTIMIZER_SKIP_FAILED_RULES,
242274
"When set to true, the logical plan optimizer will produce warning \

0 commit comments

Comments
 (0)