Skip to content

Commit 5becc5b

Browse files
committed
temporary fix
1 parent f55bb79 commit 5becc5b

File tree

2 files changed

+8
-50
lines changed

2 files changed

+8
-50
lines changed

datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ async fn sort_query_fuzzer_runner() {
7373
fuzzer.run().await.unwrap();
7474
}
7575

76-
/// Reproduce the bug with specific seeds from the failing test case
76+
/// Reproduce the bug with specific seeds from the [
77+
/// failing test case](https://github.com/apache/datafusion/issues/16452).
7778
#[tokio::test(flavor = "multi_thread")]
7879
async fn test_reproduce_sort_query_issue_16452() {
7980
// Seeds from the failing test case
@@ -82,7 +83,6 @@ async fn test_reproduce_sort_query_issue_16452() {
8283
let config_seed_1 = 11807432710583113300u64;
8384
let config_seed_2 = 759937414670321802u64;
8485

85-
// Use a fixed seed to replicate the original behavior more closely
8686
let random_seed = 1u64; // Use a fixed seed to ensure consistent behavior
8787

8888
println!("Creating test generator with same config as original runner...");
@@ -106,8 +106,6 @@ async fn test_reproduce_sort_query_issue_16452() {
106106
results.push(r);
107107
}
108108

109-
dbg!(results.len());
110-
111109
for (lhs, rhs) in results.iter().tuple_windows() {
112110
check_equality_of_batches(lhs, rhs).unwrap();
113111
}
@@ -620,6 +618,7 @@ impl SortFuzzerTestGenerator {
620618
let with_mem_limit = !query_str.contains("LIMIT") && self.set_memory_limit;
621619

622620
let ctx = self.generate_random_config(config_seed, with_mem_limit)?;
621+
623622
let df = ctx.sql(&query_str).await.unwrap();
624623
let results = df.collect().await.unwrap();
625624

datafusion/physical-plan/src/topk/mod.rs

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
//! TopK: Combination of Sort / LIMIT
1919
2020
use arrow::{
21-
array::{Array, AsArray},
22-
compute::{interleave_record_batch, prep_null_mask_filter, FilterBuilder},
21+
array::Array,
22+
compute::interleave_record_batch,
2323
row::{RowConverter, Rows, SortField},
2424
};
2525
use datafusion_expr::{ColumnarValue, Operator};
@@ -203,7 +203,7 @@ impl TopK {
203203
let baseline = self.metrics.baseline.clone();
204204
let _timer = baseline.elapsed_compute().timer();
205205

206-
let mut sort_keys: Vec<ArrayRef> = self
206+
let sort_keys: Vec<ArrayRef> = self
207207
.expr
208208
.iter()
209209
.map(|expr| {
@@ -212,56 +212,15 @@ impl TopK {
212212
})
213213
.collect::<Result<Vec<_>>>()?;
214214

215-
let mut selected_rows = None;
216-
217-
if let Some(filter) = self.filter.as_ref() {
218-
// If a filter is provided, update it with the new rows
219-
let filter = filter.current()?;
220-
let filtered = filter.evaluate(&batch)?;
221-
let num_rows = batch.num_rows();
222-
let array = filtered.into_array(num_rows)?;
223-
let mut filter = array.as_boolean().clone();
224-
let true_count = filter.true_count();
225-
if true_count == 0 {
226-
// nothing to filter, so no need to update
227-
return Ok(());
228-
}
229-
// only update the keys / rows if the filter does not match all rows
230-
if true_count < num_rows {
231-
// Indices in `set_indices` should be correct if filter contains nulls
232-
// So we prepare the filter here. Note this is also done in the `FilterBuilder`
233-
// so there is no overhead to do this here.
234-
if filter.nulls().is_some() {
235-
filter = prep_null_mask_filter(&filter);
236-
}
237-
238-
let filter_predicate = FilterBuilder::new(&filter);
239-
let filter_predicate = if sort_keys.len() > 1 {
240-
// Optimize filter when it has multiple sort keys
241-
filter_predicate.optimize().build()
242-
} else {
243-
filter_predicate.build()
244-
};
245-
selected_rows = Some(filter);
246-
sort_keys = sort_keys
247-
.iter()
248-
.map(|key| filter_predicate.filter(key).map_err(|x| x.into()))
249-
.collect::<Result<Vec<_>>>()?;
250-
}
251-
};
252215
// reuse existing `Rows` to avoid reallocations
253216
let rows = &mut self.scratch_rows;
254217
rows.clear();
255218
self.row_converter.append(rows, &sort_keys)?;
256219

257220
let mut batch_entry = self.heap.register_batch(batch.clone());
258221

259-
let replacements = match selected_rows {
260-
Some(filter) => {
261-
self.find_new_topk_items(filter.values().set_indices(), &mut batch_entry)
262-
}
263-
None => self.find_new_topk_items(0..sort_keys[0].len(), &mut batch_entry),
264-
};
222+
let replacements =
223+
self.find_new_topk_items(0..sort_keys[0].len(), &mut batch_entry);
265224

266225
if replacements > 0 {
267226
self.metrics.row_replacements.add(replacements);

0 commit comments

Comments
 (0)