diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index a10e05a55c64..6cefef8d0eb5 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -221,7 +221,7 @@ pub struct Statistics { /// Total bytes of the table rows. pub total_byte_size: Precision, /// Statistics on a column level. It contains a [`ColumnStatistics`] for - /// each field in the schema of the the table to which the [`Statistics`] refer. + /// each field in the schema of the table to which the [`Statistics`] refer. pub column_statistics: Vec, } diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index 72dc289d4b64..5ee0f7186703 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -49,7 +49,7 @@ use object_store::{ObjectMeta, ObjectStore}; /// This trait abstracts all the file format specific implementations /// from the [`TableProvider`]. This helps code re-utilization across -/// providers that support the the same file formats. +/// providers that support the same file formats. /// /// [`TableProvider`]: crate::datasource::provider::TableProvider #[async_trait] diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 54fe6e8406fd..0740a8d2cdbc 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -392,7 +392,7 @@ fn adjust_input_keys_ordering( let expr = proj.expr(); // For Projection, we need to transform the requirements to the columns before the Projection // And then to push down the requirements - // Construct a mapping from new name to the the orginal Column + // Construct a mapping from new name to the orginal Column let new_required = map_columns_before_projection(&requirements.data, expr); if new_required.len() == requirements.data.len() { requirements.children[0].data = new_required; diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs index bd71b3e8ed80..bf010a5e39d8 100644 --- a/datafusion/core/src/physical_optimizer/output_requirements.rs +++ b/datafusion/core/src/physical_optimizer/output_requirements.rs @@ -216,7 +216,7 @@ impl PhysicalOptimizerRule for OutputRequirements { } } -/// This functions adds ancillary `OutputRequirementExec` to the the physical plan, so that +/// This functions adds ancillary `OutputRequirementExec` to the physical plan, so that /// global requirements are not lost during optimization. fn require_top_ordering(plan: Arc) -> Result> { let (new_plan, is_changed) = require_top_ordering_helper(plan)?; diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index e8f3bf01ecaa..ab5611597472 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -322,7 +322,7 @@ fn try_swapping_with_output_req( projection: &ProjectionExec, output_req: &OutputRequirementExec, ) -> Result>> { - // If the projection does not narrow the the schema, we should not try to push it down: + // If the projection does not narrow the schema, we should not try to push it down: if projection.expr().len() >= projection.input().schema().fields().len() { return Ok(None); } @@ -372,7 +372,7 @@ fn try_swapping_with_output_req( fn try_swapping_with_coalesce_partitions( projection: &ProjectionExec, ) -> Result>> { - // If the projection does not narrow the the schema, we should not try to push it down: + // If the projection does not narrow the schema, we should not try to push it down: if projection.expr().len() >= projection.input().schema().fields().len() { return Ok(None); } @@ -387,7 +387,7 @@ fn try_swapping_with_filter( projection: &ProjectionExec, filter: &FilterExec, ) -> Result>> { - // If the projection does not narrow the the schema, we should not try to push it down: + // If the projection does not narrow the schema, we should not try to push it down: if projection.expr().len() >= projection.input().schema().fields().len() { return Ok(None); } @@ -412,7 +412,7 @@ fn try_swapping_with_repartition( projection: &ProjectionExec, repartition: &RepartitionExec, ) -> Result>> { - // If the projection does not narrow the the schema, we should not try to push it down. + // If the projection does not narrow the schema, we should not try to push it down. if projection.expr().len() >= projection.input().schema().fields().len() { return Ok(None); } @@ -454,7 +454,7 @@ fn try_swapping_with_sort( projection: &ProjectionExec, sort: &SortExec, ) -> Result>> { - // If the projection does not narrow the the schema, we should not try to push it down. + // If the projection does not narrow the schema, we should not try to push it down. if projection.expr().len() >= projection.input().schema().fields().len() { return Ok(None); } @@ -1082,7 +1082,7 @@ fn join_table_borders( (far_right_left_col_ind, far_left_right_col_ind) } -/// Tries to update the equi-join `Column`'s of a join as if the the input of +/// Tries to update the equi-join `Column`'s of a join as if the input of /// the join was replaced by a projection. fn update_join_on( proj_left_exprs: &[(Column, String)], @@ -1152,7 +1152,7 @@ fn new_columns_for_join_on( (new_columns.len() == hash_join_on.len()).then_some(new_columns) } -/// Tries to update the column indices of a [`JoinFilter`] as if the the input of +/// Tries to update the column indices of a [`JoinFilter`] as if the input of /// the join was replaced by a projection. fn update_join_filter( projection_left_exprs: &[(Column, String)], diff --git a/datafusion/physical-expr/src/binary_map.rs b/datafusion/physical-expr/src/binary_map.rs index b661f0a74148..6c3a452a8611 100644 --- a/datafusion/physical-expr/src/binary_map.rs +++ b/datafusion/physical-expr/src/binary_map.rs @@ -280,7 +280,7 @@ where /// # Returns /// /// The payload value for the entry, either the existing value or - /// the the newly inserted value + /// the newly inserted value /// /// # Safety: /// diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs index 4f1914b12c96..556103e1e222 100644 --- a/datafusion/physical-plan/src/aggregates/order/mod.rs +++ b/datafusion/physical-plan/src/aggregates/order/mod.rs @@ -40,7 +40,7 @@ pub(crate) enum GroupOrdering { } impl GroupOrdering { - /// Create a `GroupOrdering` for the the specified ordering + /// Create a `GroupOrdering` for the specified ordering pub fn try_new( input_schema: &Schema, mode: &InputOrderMode, diff --git a/datafusion/sql/src/expr/arrow_cast.rs b/datafusion/sql/src/expr/arrow_cast.rs index 9a0d61f41c01..a75cdf9e3c6b 100644 --- a/datafusion/sql/src/expr/arrow_cast.rs +++ b/datafusion/sql/src/expr/arrow_cast.rs @@ -76,7 +76,7 @@ pub fn create_arrow_cast(mut args: Vec, schema: &DFSchema) -> Result /// Parses `str` into a `DataType`. /// -/// `parse_data_type` is the the reverse of [`DataType`]'s `Display` +/// `parse_data_type` is the reverse of [`DataType`]'s `Display` /// impl, and maintains the invariant that /// `parse_data_type(data_type.to_string()) == data_type` /// diff --git a/datafusion/sqllogictest/test_files/create_function.slt b/datafusion/sqllogictest/test_files/create_function.slt index baa40ac64afc..4f0c53c36ca1 100644 --- a/datafusion/sqllogictest/test_files/create_function.slt +++ b/datafusion/sqllogictest/test_files/create_function.slt @@ -47,7 +47,7 @@ select abs(-1); statement ok DROP FUNCTION abs; -# now the the query errors +# now the query errors query error Invalid function 'abs'. select abs(-1); diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt index 92093ba13eba..0d98c41d0028 100644 --- a/datafusion/sqllogictest/test_files/limit.slt +++ b/datafusion/sqllogictest/test_files/limit.slt @@ -320,7 +320,7 @@ SELECT COUNT(*) FROM (SELECT a FROM t1 LIMIT 3 OFFSET 11); 0 # The aggregate does not need to be computed because the input statistics are exact and -# the number of rows is less than or equal to the the "fetch+skip" value (LIMIT+OFFSET). +# the number of rows is less than or equal to the "fetch+skip" value (LIMIT+OFFSET). query TT EXPLAIN SELECT COUNT(*) FROM (SELECT a FROM t1 LIMIT 3 OFFSET 8); ---- diff --git a/dev/changelog/13.0.0.md b/dev/changelog/13.0.0.md index 0f35903e2600..14b42a052ef9 100644 --- a/dev/changelog/13.0.0.md +++ b/dev/changelog/13.0.0.md @@ -87,7 +87,7 @@ - Optimizer rule 'projection_push_down' failed due to unexpected error: Error during planning: Aggregate schema has wrong number of fields. Expected 3 got 8 [\#3704](https://github.com/apache/arrow-datafusion/issues/3704) - Optimizer regressions in `unwrap_cast_in_comparison` [\#3690](https://github.com/apache/arrow-datafusion/issues/3690) - Internal error when evaluating a predicate = "The type of Dictionary\(Int16, Utf8\) = Int64 of binary physical should be same" [\#3685](https://github.com/apache/arrow-datafusion/issues/3685) -- Specialized regexp_replace should early-abort when the the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) +- Specialized regexp_replace should early-abort when the input arrays are empty [\#3647](https://github.com/apache/arrow-datafusion/issues/3647) - Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3646](https://github.com/apache/arrow-datafusion/issues/3646) - Internal error: Failed to coerce types Decimal128\(10, 2\) and Boolean in BETWEEN expression [\#3645](https://github.com/apache/arrow-datafusion/issues/3645) - Type coercion error: The type of Boolean AND Decimal128\(10, 2\) of binary physical should be same [\#3644](https://github.com/apache/arrow-datafusion/issues/3644) diff --git a/dev/changelog/7.0.0.md b/dev/changelog/7.0.0.md index e63c2a4455c9..4d2606d7bfbe 100644 --- a/dev/changelog/7.0.0.md +++ b/dev/changelog/7.0.0.md @@ -56,7 +56,7 @@ - Keep all datafusion's packages up to date with Dependabot [\#1472](https://github.com/apache/arrow-datafusion/issues/1472) - ExecutionContext support init ExecutionContextState with `new(state: Arc>)` method [\#1439](https://github.com/apache/arrow-datafusion/issues/1439) - support the decimal scalar value [\#1393](https://github.com/apache/arrow-datafusion/issues/1393) -- Documentation for using scalar functions with the the DataFrame API [\#1364](https://github.com/apache/arrow-datafusion/issues/1364) +- Documentation for using scalar functions with the DataFrame API [\#1364](https://github.com/apache/arrow-datafusion/issues/1364) - Support `boolean == boolean` and `boolean != boolean` operators [\#1159](https://github.com/apache/arrow-datafusion/issues/1159) - Support DataType::Decimal\(15, 2\) in TPC-H benchmark [\#174](https://github.com/apache/arrow-datafusion/issues/174) - Make `MemoryStream` public [\#150](https://github.com/apache/arrow-datafusion/issues/150) diff --git a/docs/source/contributor-guide/communication.md b/docs/source/contributor-guide/communication.md index 8678aa534baf..7b5e71bc3a1c 100644 --- a/docs/source/contributor-guide/communication.md +++ b/docs/source/contributor-guide/communication.md @@ -44,7 +44,7 @@ request one in the `Arrow Rust` channel of the [Arrow Rust Discord server](https ## Mailing list We also use arrow.apache.org's `dev@` mailing list for release coordination and occasional design discussions. Other -than the the release process, most DataFusion mailing list traffic will link to a GitHub issue or PR for discussion. +than the release process, most DataFusion mailing list traffic will link to a GitHub issue or PR for discussion. ([subscribe](mailto:dev-subscribe@arrow.apache.org), [unsubscribe](mailto:dev-unsubscribe@arrow.apache.org), [archives](https://lists.apache.org/list.html?dev@arrow.apache.org)). diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index f433e026e0a2..ad210724103d 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -204,7 +204,7 @@ let df = ctx.sql(&sql).await.unwrap(); ## Adding a Window UDF -Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have access to the rows around them. Access to the the proximal rows is helpful, but adds some complexity to the implementation. +Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have access to the rows around them. Access to the proximal rows is helpful, but adds some complexity to the implementation. For example, we will declare a user defined window function that computes a moving average.