Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions datafusion/datasource-parquet/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ use object_store::ObjectStore;
/// ```no_run
/// # use std::sync::Arc;
/// # use arrow::datatypes::Schema;
/// # use datafusion_datasource::file_scan_config::FileScanConfig;
/// # use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
/// # use datafusion_datasource::PartitionedFile;
/// # use datafusion_datasource::source::DataSourceExec;
///
Expand All @@ -183,9 +183,9 @@ use object_store::ObjectStore;
/// .iter()
/// .map(|file_group| {
/// // create a new exec by copying the existing exec's source config
/// let new_config = base_config
/// .clone()
/// .with_file_groups(vec![file_group.clone()]);
/// let new_config = FileScanConfigBuilder::from(base_config.clone())
/// .with_file_groups(vec![file_group.clone()])
/// .build();
///
/// (DataSourceExec::from_data_source(new_config))
/// })
Expand Down
149 changes: 1 addition & 148 deletions datafusion/datasource/src/file_scan_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ use log::{debug, warn};
/// The base configurations for a [`DataSourceExec`], the a physical plan for
/// any given file format.
///
/// Use [`Self::build`] to create a [`DataSourceExec`] from a ``FileScanConfig`.
/// Use [`DataSourceExec::from_data_source`] to create a [`DataSourceExec`] from a ``FileScanConfig`.
///
/// # Example
/// ```
Expand Down Expand Up @@ -669,65 +669,6 @@ impl DataSource for FileScanConfig {
}

impl FileScanConfig {
/// Create a new [`FileScanConfig`] with default settings for scanning files.
///
/// See example on [`FileScanConfig`]
///
/// No file groups are added by default. See [`Self::with_file`], [`Self::with_file_group`] and
/// [`Self::with_file_groups`].
///
/// # Parameters:
/// * `object_store_url`: See [`Self::object_store_url`]
/// * `file_schema`: See [`Self::file_schema`]
#[allow(deprecated)] // `new` will be removed same time as `with_source`
pub fn new(
object_store_url: ObjectStoreUrl,
file_schema: SchemaRef,
file_source: Arc<dyn FileSource>,
) -> Self {
let statistics = Statistics::new_unknown(&file_schema);
let file_source = file_source
.with_statistics(statistics.clone())
.with_schema(Arc::clone(&file_schema));
Self {
object_store_url,
file_schema,
file_groups: vec![],
constraints: Constraints::default(),
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: vec![],
file_compression_type: FileCompressionType::UNCOMPRESSED,
new_lines_in_values: false,
file_source: Arc::clone(&file_source),
batch_size: None,
expr_adapter_factory: None,
}
}

/// Set the file source
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically speaking we should leave these methods until 53.0.0 per https://datafusion.apache.org/contributor-guide/api-health.html#deprecation-guidelines

However, given they are pretty low level (and thus I don't expect many actual users of them) and we are trying to work on this code for other reasons, I think it is fine to remove them early

pub fn with_source(mut self, file_source: Arc<dyn FileSource>) -> Self {
self.file_source =
file_source.with_statistics(Statistics::new_unknown(&self.file_schema));
self
}

/// Set the table constraints of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_constraints(mut self, constraints: Constraints) -> Self {
self.constraints = constraints;
self
}

/// Set the statistics of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_statistics(mut self, statistics: Statistics) -> Self {
self.file_source = self.file_source.with_statistics(statistics);
self
}

fn projection_indices(&self) -> Vec<usize> {
match &self.projection {
Some(proj) => proj.clone(),
Expand Down Expand Up @@ -788,88 +729,6 @@ impl FileScanConfig {
self.constraints.project(&indexes).unwrap_or_default()
}

/// Set the projection of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_projection(mut self, projection: Option<Vec<usize>>) -> Self {
self.projection = projection;
self
}

/// Set the limit of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_limit(mut self, limit: Option<usize>) -> Self {
self.limit = limit;
self
}

/// Add a file as a single group
///
/// See [Self::file_groups] for more information.
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
#[allow(deprecated)]
pub fn with_file(self, file: PartitionedFile) -> Self {
self.with_file_group(FileGroup::new(vec![file]))
}

/// Add the file groups
///
/// See [Self::file_groups] for more information.
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_file_groups(mut self, mut file_groups: Vec<FileGroup>) -> Self {
self.file_groups.append(&mut file_groups);
self
}

/// Add a new file group
///
/// See [Self::file_groups] for more information
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_file_group(mut self, file_group: FileGroup) -> Self {
self.file_groups.push(file_group);
self
}

/// Set the partitioning columns of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
self.table_partition_cols = table_partition_cols
.into_iter()
.map(|f| Arc::new(f) as FieldRef)
.collect();
self
}

/// Set the output ordering of the files
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_output_ordering(mut self, output_ordering: Vec<LexOrdering>) -> Self {
self.output_ordering = output_ordering;
self
}

/// Set the file compression type
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_file_compression_type(
mut self,
file_compression_type: FileCompressionType,
) -> Self {
self.file_compression_type = file_compression_type;
self
}

/// Set the new_lines_in_values property
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_newlines_in_values(mut self, new_lines_in_values: bool) -> Self {
self.new_lines_in_values = new_lines_in_values;
self
}

/// Set the batch_size property
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
pub fn with_batch_size(mut self, batch_size: Option<usize>) -> Self {
self.batch_size = batch_size;
self
}

/// Specifies whether newlines in (quoted) values are supported.
///
/// Parsing newlines in quoted values may be affected by execution behaviour such as
Expand Down Expand Up @@ -1099,12 +958,6 @@ impl FileScanConfig {
.collect())
}

/// Returns a new [`DataSourceExec`] to scan the files specified by this config
#[deprecated(since = "47.0.0", note = "use DataSourceExec::new instead")]
pub fn build(self) -> Arc<DataSourceExec> {
DataSourceExec::from_data_source(self)
}

/// Write the data_type based on file_source
fn fmt_file_source(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
write!(f, ", file_type={}", self.file_source.file_type())?;
Expand Down
4 changes: 0 additions & 4 deletions datafusion/datasource/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,6 @@ impl ExecutionPlan for DataSourceExec {
Some(self.data_source.metrics().clone_inner())
}

fn statistics(&self) -> Result<Statistics> {
self.data_source.statistics()
}

fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
if let Some(partition) = partition {
let mut statistics = Statistics::new_unknown(&self.schema());
Expand Down