-
Notifications
You must be signed in to change notification settings - Fork 1.8k
fix: Remove datafusion.execution.parquet.cache_metadata config
#17062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
62363a1
dae15bb
ff2c726
a6c53d6
df29cd7
ed628f8
b77de35
0338633
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -447,19 +447,16 @@ impl FileFormat for ParquetFormat { | |
|
|
||
| let mut source = ParquetSource::new(self.options.clone()); | ||
|
|
||
| // Use the CachedParquetFileReaderFactory when metadata caching is enabled | ||
| if self.options.global.cache_metadata { | ||
| if let Some(metadata_cache) = | ||
| state.runtime_env().cache_manager.get_file_metadata_cache() | ||
| { | ||
| let store = state | ||
| .runtime_env() | ||
| .object_store(conf.object_store_url.clone())?; | ||
| let cached_parquet_read_factory = | ||
| Arc::new(CachedParquetFileReaderFactory::new(store, metadata_cache)); | ||
| source = | ||
| source.with_parquet_file_reader_factory(cached_parquet_read_factory); | ||
| } | ||
| // Use the CachedParquetFileReaderFactory | ||
| if let Some(metadata_cache) = | ||
| state.runtime_env().cache_manager.get_file_metadata_cache() | ||
|
||
| { | ||
| let store = state | ||
| .runtime_env() | ||
| .object_store(conf.object_store_url.clone())?; | ||
| let cached_parquet_read_factory = | ||
| Arc::new(CachedParquetFileReaderFactory::new(store, metadata_cache)); | ||
| source = source.with_parquet_file_reader_factory(cached_parquet_read_factory); | ||
| } | ||
|
|
||
| if let Some(metadata_size_hint) = metadata_size_hint { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nuno-faria this change seems to be brought by
CachedParquetFileReaderFactoryyou added. I just changed the test to reflect the correct behaviour after using the factory as the default now. Just making sure if it looks correct?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's right, when caching by default the page index is always read, even if the query does not require it. FYI @alamb
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that is best practice anyways and probably what people want