Merge branch 'main' into fast_sort_with_inlined_fast_key

Dandandan · web-flow · commit 82ab9df377dd · 2025-07-01T20:39:52.000+02:00
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
@@ -32,7 +32,6 @@ use std::mem::{size_of, size_of_val};
 use std::str::FromStr;
 use std::sync::Arc;
 
-use crate::arrow_datafusion_err;
 use crate::cast::{
     as_decimal128_array, as_decimal256_array, as_dictionary_array,
     as_fixed_size_binary_array, as_fixed_size_list_array,
@@ -41,6 +40,7 @@ use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_
 use crate::format::DEFAULT_CAST_OPTIONS;
 use crate::hash_utils::create_hashes;
 use crate::utils::SingleRowListArrayBuilder;
+use crate::{_internal_datafusion_err, arrow_datafusion_err};
 use arrow::array::{
     types::{IntervalDayTime, IntervalMonthDayNano},
     *,
@@ -1849,10 +1849,6 @@ impl ScalarValue {
     /// Returns an error if the iterator is empty or if the
     /// [`ScalarValue`]s are not all the same type
     ///
-    /// # Panics
-    ///
-    /// Panics if `self` is a dictionary with invalid key type
-    ///
     /// # Example
     /// ```
     /// use datafusion_common::ScalarValue;
@@ -3343,6 +3339,16 @@ impl ScalarValue {
         arr1 == &right
     }
 
+    /// Compare `self` with `other` and return an `Ordering`.
+    ///
+    /// This is the same as [`PartialOrd`] except that it returns
+    /// `Err` if the values cannot be compared, e.g., they have incompatible data types.
+    pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
+        self.partial_cmp(other).ok_or_else(|| {
+            _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
+        })
+    }
+
     /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
     /// includes the allocated size (`capacity`) rather than the current length (`len`)
     pub fn size(&self) -> usize {
@@ -4761,6 +4767,32 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_try_cmp() {
+        assert_eq!(
+            ScalarValue::try_cmp(
+                &ScalarValue::Int32(Some(1)),
+                &ScalarValue::Int32(Some(2))
+            )
+            .unwrap(),
+            Ordering::Less
+        );
+        assert_eq!(
+            ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
+                .unwrap(),
+            Ordering::Less
+        );
+        assert_starts_with(
+            ScalarValue::try_cmp(
+                &ScalarValue::Int32(Some(1)),
+                &ScalarValue::Int64(Some(2)),
+            )
+            .unwrap_err()
+            .message(),
+            "Uncomparable values: Int32(1), Int64(2)",
+        );
+    }
+
     #[test]
     fn scalar_decimal_test() -> Result<()> {
         let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
@@ -7669,4 +7701,15 @@ mod tests {
         ];
         assert!(scalars.iter().all(|s| s.is_null()));
     }
+
+    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
+    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
+    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
+        let actual = actual.as_ref();
+        let expected_prefix = expected_prefix.as_ref();
+        assert!(
+            actual.starts_with(expected_prefix),
+            "Expected '{actual}' to start with '{expected_prefix}'"
+        );
+    }
 }
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
@@ -22,7 +22,7 @@ pub mod memory;
 pub mod proxy;
 pub mod string_utils;
 
-use crate::error::{_exec_datafusion_err, _internal_datafusion_err, _internal_err};
+use crate::error::{_exec_datafusion_err, _internal_err};
 use crate::{DataFusionError, Result, ScalarValue};
 use arrow::array::{
     cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
@@ -120,14 +120,13 @@ pub fn compare_rows(
         let result = match (lhs.is_null(), rhs.is_null(), sort_options.nulls_first) {
             (true, false, false) | (false, true, true) => Ordering::Greater,
             (true, false, true) | (false, true, false) => Ordering::Less,
-            (false, false, _) => if sort_options.descending {
-                rhs.partial_cmp(lhs)
-            } else {
-                lhs.partial_cmp(rhs)
+            (false, false, _) => {
+                if sort_options.descending {
+                    rhs.try_cmp(lhs)?
+                } else {
+                    lhs.try_cmp(rhs)?
+                }
             }
-            .ok_or_else(|| {
-                _internal_datafusion_err!("Column array shouldn't be empty")
-            })?,
             (true, true, _) => continue,
         };
         if result != Ordering::Equal {
diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
@@ -31,7 +31,6 @@ use datafusion_execution::memory_pool::{
 };
 use datafusion_expr::display_schema;
 use datafusion_physical_plan::spill::get_record_batch_memory_size;
-use itertools::Itertools;
 use std::time::Duration;
 
 use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBuilder};
@@ -73,43 +72,6 @@ async fn sort_query_fuzzer_runner() {
     fuzzer.run().await.unwrap();
 }
 
-/// Reproduce the bug with specific seeds from the
-/// [failing test case](https://github.com/apache/datafusion/issues/16452).
-#[tokio::test(flavor = "multi_thread")]
-async fn test_reproduce_sort_query_issue_16452() {
-    // Seeds from the failing test case
-    let init_seed = 10313160656544581998u64;
-    let query_seed = 15004039071976572201u64;
-    let config_seed_1 = 11807432710583113300u64;
-    let config_seed_2 = 759937414670321802u64;
-
-    let random_seed = 1u64; // Use a fixed seed to ensure consistent behavior
-
-    let mut test_generator = SortFuzzerTestGenerator::new(
-        2000,
-        3,
-        "sort_fuzz_table".to_string(),
-        get_supported_types_columns(random_seed),
-        false,
-        random_seed,
-    );
-
-    let mut results = vec![];
-
-    for config_seed in [config_seed_1, config_seed_2] {
-        let r = test_generator
-            .fuzzer_run(init_seed, query_seed, config_seed)
-            .await
-            .unwrap();
-
-        results.push(r);
-    }
-
-    for (lhs, rhs) in results.iter().tuple_windows() {
-        check_equality_of_batches(lhs, rhs).unwrap();
-    }
-}
-
 /// SortQueryFuzzer holds the runner configuration for executing sort query fuzz tests. The fuzzing details are managed inside `SortFuzzerTestGenerator`.
 ///
 /// It defines:
@@ -466,7 +428,7 @@ impl SortFuzzerTestGenerator {
             .collect();
 
         let mut order_by_clauses = Vec::new();
-        for col in selected_columns {
+        for col in &selected_columns {
             let mut clause = col.name.clone();
             if rng.random_bool(0.5) {
                 let order = if rng.random_bool(0.5) { "ASC" } else { "DESC" };
@@ -501,7 +463,12 @@ impl SortFuzzerTestGenerator {
         let limit_clause = limit.map_or(String::new(), |l| format!(" LIMIT {l}"));
 
         let query = format!(
-            "SELECT * FROM {} ORDER BY {}{}",
+            "SELECT {} FROM {} ORDER BY {}{}",
+            selected_columns
+                .iter()
+                .map(|col| col.name.clone())
+                .collect::<Vec<_>>()
+                .join(", "),
             self.table_name,
             order_by_clauses.join(", "),
             limit_clause
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
@@ -52,7 +52,7 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
 
     // workaround lack of Ord of ScalarValue
     let cmp = |a: &ScalarValue, b: &ScalarValue| {
-        a.partial_cmp(b).expect("Can compare ScalarValues")
+        a.try_cmp(b).expect("Can compare ScalarValues")
     };
     scalars.sort_by(cmp);
     assert_eq!(
diff --git a/datafusion/functions-aggregate-common/src/min_max.rs b/datafusion/functions-aggregate-common/src/min_max.rs
@@ -291,10 +291,9 @@ fn min_max_batch_generic(array: &ArrayRef, ordering: Ordering) -> Result<ScalarV
             extreme = current;
             continue;
         }
-        if let Some(cmp) = extreme.partial_cmp(&current) {
-            if cmp == ordering {
-                extreme = current;
-            }
+        let cmp = extreme.try_cmp(&current)?;
+        if cmp == ordering {
+            extreme = current;
         }
     }
 
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
@@ -461,6 +461,7 @@ impl Accumulator for DistinctArrayAggAccumulator {
         }
 
         if let Some(opts) = self.sort_options {
+            let mut delayed_cmp_err = Ok(());
             values.sort_by(|a, b| {
                 if a.is_null() {
                     return match opts.nulls_first {
@@ -475,10 +476,15 @@ impl Accumulator for DistinctArrayAggAccumulator {
                     };
                 }
                 match opts.descending {
-                    true => b.partial_cmp(a).unwrap_or(Ordering::Equal),
-                    false => a.partial_cmp(b).unwrap_or(Ordering::Equal),
+                    true => b.try_cmp(a),
+                    false => a.try_cmp(b),
                 }
+                .unwrap_or_else(|err| {
+                    delayed_cmp_err = Err(err);
+                    Ordering::Equal
+                })
             });
+            delayed_cmp_err?;
         };
 
         let arr = ScalarValue::new_list(&values, &self.datatype, true);
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
@@ -204,17 +204,16 @@ fn find_most_restrictive_predicate(
 
             if let Some(scalar) = scalar_value {
                 if let Some(current_best) = best_value {
-                    if let Some(comparison) = scalar.partial_cmp(current_best) {
-                        let is_better = if find_greater {
-                            comparison == std::cmp::Ordering::Greater
-                        } else {
-                            comparison == std::cmp::Ordering::Less
-                        };
-
-                        if is_better {
-                            best_value = Some(scalar);
-                            most_restrictive_idx = idx;
-                        }
+                    let comparison = scalar.try_cmp(current_best)?;
+                    let is_better = if find_greater {
+                        comparison == std::cmp::Ordering::Greater
+                    } else {
+                        comparison == std::cmp::Ordering::Less
+                    };
+
+                    if is_better {
+                        best_value = Some(scalar);
+                        most_restrictive_idx = idx;
                     }
                 } else {
                     best_value = Some(scalar);
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
@@ -18,8 +18,8 @@
 //! TopK: Combination of Sort / LIMIT
 
 use arrow::{
-    array::Array,
-    compute::interleave_record_batch,
+    array::{Array, AsArray},
+    compute::{interleave_record_batch, prep_null_mask_filter, FilterBuilder},
     row::{RowConverter, Rows, SortField},
 };
 use datafusion_expr::{ColumnarValue, Operator};
@@ -203,7 +203,7 @@ impl TopK {
         let baseline = self.metrics.baseline.clone();
         let _timer = baseline.elapsed_compute().timer();
 
-        let sort_keys: Vec<ArrayRef> = self
+        let mut sort_keys: Vec<ArrayRef> = self
             .expr
             .iter()
             .map(|expr| {
@@ -212,15 +212,56 @@ impl TopK {
             })
             .collect::<Result<Vec<_>>>()?;
 
+        let mut selected_rows = None;
+
+        if let Some(filter) = self.filter.as_ref() {
+            // If a filter is provided, update it with the new rows
+            let filter = filter.current()?;
+            let filtered = filter.evaluate(&batch)?;
+            let num_rows = batch.num_rows();
+            let array = filtered.into_array(num_rows)?;
+            let mut filter = array.as_boolean().clone();
+            let true_count = filter.true_count();
+            if true_count == 0 {
+                // nothing to filter, so no need to update
+                return Ok(());
+            }
+            // only update the keys / rows if the filter does not match all rows
+            if true_count < num_rows {
+                // Indices in `set_indices` should be correct if filter contains nulls
+                // So we prepare the filter here. Note this is also done in the `FilterBuilder`
+                // so there is no overhead to do this here.
+                if filter.nulls().is_some() {
+                    filter = prep_null_mask_filter(&filter);
+                }
+
+                let filter_predicate = FilterBuilder::new(&filter);
+                let filter_predicate = if sort_keys.len() > 1 {
+                    // Optimize filter when it has multiple sort keys
+                    filter_predicate.optimize().build()
+                } else {
+                    filter_predicate.build()
+                };
+                selected_rows = Some(filter);
+                sort_keys = sort_keys
+                    .iter()
+                    .map(|key| filter_predicate.filter(key).map_err(|x| x.into()))
+                    .collect::<Result<Vec<_>>>()?;
+            }
+        };
         // reuse existing `Rows` to avoid reallocations
         let rows = &mut self.scratch_rows;
         rows.clear();
         self.row_converter.append(rows, &sort_keys)?;
 
         let mut batch_entry = self.heap.register_batch(batch.clone());
 
-        let replacements =
-            self.find_new_topk_items(0..sort_keys[0].len(), &mut batch_entry);
+        let replacements = match selected_rows {
+            Some(filter) => {
+                self.find_new_topk_items(filter.values().set_indices(), &mut batch_entry)
+            }
+            None => self.find_new_topk_items(0..sort_keys[0].len(), &mut batch_entry),
+        };
 
         if replacements > 0 {
             self.metrics.row_replacements.add(replacements);
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
@@ -21,8 +21,9 @@ use datafusion_expr::planner::{
 };
 use sqlparser::ast::{
     AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
-    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
-    StructField, Subscript, TrimWhereField, Value, ValueWithSpan,
+    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias,
+    FunctionArguments, MapEntry, StructField, Subscript, TrimWhereField, Value,
+    ValueWithSpan,
 };
 
 use datafusion_common::{
@@ -476,7 +477,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             ),
 
             SQLExpr::Function(function) => {
-                self.sql_function_to_expr(function, schema, planner_context)
+                // workaround for https://github.com/apache/datafusion-sqlparser-rs/issues/1909
+                if matches!(function.args, FunctionArguments::None)
+                    && function.name.0.len() > 1
+                    && function.name.0.iter().all(|part| part.as_ident().is_some())
+                {
+                    let ids = function
+                        .name
+                        .0
+                        .iter()
+                        .map(|part| part.as_ident().expect("just checked").clone())
+                        .collect();
+                    self.sql_compound_identifier_to_expr(ids, schema, planner_context)
+                } else {
+                    self.sql_function_to_expr(function, schema, planner_context)
+                }
             }
 
             SQLExpr::Rollup(exprs) => {
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt

Original file line number	Diff line number	Diff line change
`@@ -291,10 +291,9 @@ fn min_max_batch_generic(array: &ArrayRef, ordering: Ordering) -> Result<ScalarV`
`291`	`291`	`extreme = current;`
`292`	`292`	`continue;`
`293`	`293`	`}`
`294`		`- if let Some(cmp) = extreme.partial_cmp(&current) {`
`295`		`- if cmp == ordering {`
`296`		`- extreme = current;`
`297`		`- }`
	`294`	`+ let cmp = extreme.try_cmp(&current)?;`
	`295`	`+ if cmp == ordering {`
	`296`	`+ extreme = current;`
`298`	`297`	`}`
`299`	`298`	`}`
`300`	`299`