Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[minor]: Fix rank calculation bug when empty order by is seen #8567

Merged
merged 3 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions datafusion/physical-expr/src/window/rank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,16 @@ impl PartitionEvaluator for RankEvaluator {
// There is no argument, values are order by column values (where rank is calculated)
let range_columns = values;
let last_rank_data = get_row_at_idx(range_columns, row_idx)?;
let empty = self.state.last_rank_data.is_empty();
if empty || self.state.last_rank_data != last_rank_data {
self.state.last_rank_data = last_rank_data;
let new_rank_encountered =
if let Some(state_last_rank_data) = &self.state.last_rank_data {
// if rank data changes, new rank is encountered
state_last_rank_data != &last_rank_data
} else {
// First rank seen
true
};
if new_rank_encountered {
self.state.last_rank_data = Some(last_rank_data);
self.state.last_rank_boundary += self.state.current_group_count;
self.state.current_group_count = 1;
self.state.n_rank += 1;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/window/window_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ pub enum WindowFn {
#[derive(Debug, Clone, Default)]
pub struct RankState {
/// The last values for rank as these values change, we increase n_rank
pub last_rank_data: Vec<ScalarValue>,
pub last_rank_data: Option<Vec<ScalarValue>>,
/// The index where last_rank_boundary is started
pub last_rank_boundary: usize,
/// Keep the number of entries in current rank
Expand Down
30 changes: 18 additions & 12 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3801,19 +3801,25 @@ select rank() over (order by 1) rnk from (select 1 a union all select 2 a) x
1
1

# support scalar value in ORDER BY
query I
select dense_rank() over () rnk from (select 1 a union all select 2 a) x
----
1
1

# support scalar value in both ORDER BY and PARTITION BY, RANK function
# TODO: fix the test, some issue in RANK
#query IIIIII
#select rank() over (partition by 1 order by 1) rnk,
# rank() over (partition by a, 1 order by 1) rnk1,
# rank() over (partition by a, 1 order by a, 1) rnk2,
# rank() over (partition by 1) rnk3,
# rank() over (partition by null) rnk4,
# rank() over (partition by 1, null, a) rnk5
#from (select 1 a union all select 2 a) x
#----
#1 1 1 1 1 1
#1 1 1 1 1 1
query IIIIII
select rank() over (partition by 1 order by 1) rnk,
rank() over (partition by a, 1 order by 1) rnk1,
rank() over (partition by a, 1 order by a, 1) rnk2,
rank() over (partition by 1) rnk3,
rank() over (partition by null) rnk4,
rank() over (partition by 1, null, a) rnk5
from (select 1 a union all select 2 a) x
----
1 1 1 1 1 1
1 1 1 1 1 1

# support scalar value in both ORDER BY and PARTITION BY, ROW_NUMBER function
query IIIIII
Expand Down