Skip to content

Commit 82ffd82

Browse files
goldmedalCopilot
andauthored
Allow to set up the default null ordering (#3)
* add default_null_ordering config * add test for different config * Update datafusion/sql/src/planner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * update doc * fix sqllogictest --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ae82756 commit 82ffd82

File tree

9 files changed

+149
-5
lines changed

9 files changed

+149
-5
lines changed

datafusion/common/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ config_namespace! {
259259

260260
/// Specifies the recursion depth limit when parsing complex SQL Queries
261261
pub recursion_limit: usize, default = 50
262+
263+
/// Specifies the default null ordering for query results
264+
/// By default, `asc_reverse` is used to follow Postgres's behavior.
265+
/// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
266+
pub default_null_ordering: String, default = "asc_reverse".to_string()
262267
}
263268
}
264269

datafusion/core/src/execution/session_state.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,10 @@ impl SessionState {
597597
.enable_options_value_normalization,
598598
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
599599
collect_spans: sql_parser_options.collect_spans,
600+
default_null_ordering: sql_parser_options
601+
.default_null_ordering
602+
.as_str()
603+
.into(),
600604
}
601605
}
602606

datafusion/sql/src/expr/order_by.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
102102
expr_vec.push(Sort::new(
103103
expr,
104104
asc,
105-
// When asc is true, by default nulls last to be consistent with postgres
106-
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
107-
nulls_first.unwrap_or(!asc),
105+
nulls_first.unwrap_or(self.options.default_null_ordering.eval(asc)),
108106
))
109107
}
110108
Ok(expr_vec)

datafusion/sql/src/planner.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
1919
use std::collections::HashMap;
20+
use std::str::FromStr;
2021
use std::sync::Arc;
2122
use std::vec;
2223

@@ -54,6 +55,8 @@ pub struct ParserOptions {
5455
pub enable_options_value_normalization: bool,
5556
/// Whether to collect spans
5657
pub collect_spans: bool,
58+
/// Default null ordering for sorting expressions.
59+
pub default_null_ordering: NullOrdering,
5760
}
5861

5962
impl ParserOptions {
@@ -74,6 +77,9 @@ impl ParserOptions {
7477
support_varchar_with_length: true,
7578
enable_options_value_normalization: false,
7679
collect_spans: false,
80+
// By default, `asc_reverse` is used to follow Postgres's behavior.
81+
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
82+
default_null_ordering: NullOrdering::AscReverse,
7783
}
7884
}
7985

@@ -122,6 +128,12 @@ impl ParserOptions {
122128
self.collect_spans = value;
123129
self
124130
}
131+
132+
/// Sets the `default_null_ordering` option.
133+
pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self {
134+
self.default_null_ordering = value;
135+
self
136+
}
125137
}
126138

127139
impl Default for ParserOptions {
@@ -139,10 +151,60 @@ impl From<&SqlParserOptions> for ParserOptions {
139151
enable_options_value_normalization: options
140152
.enable_options_value_normalization,
141153
collect_spans: options.collect_spans,
154+
default_null_ordering: options.default_null_ordering.as_str().into(),
142155
}
143156
}
144157
}
145158

159+
/// Represents the null ordering for sorting expressions.
160+
#[derive(Debug, Clone, Copy)]
161+
pub enum NullOrdering {
162+
/// Ascending order with nulls appearing last.
163+
AscReverse,
164+
/// Descending order with nulls appearing last.
165+
DescReverse,
166+
/// Nulls appear first.
167+
NullsFirst,
168+
/// Nulls appear last.
169+
NullsLast,
170+
}
171+
172+
impl NullOrdering {
173+
/// Evaluates the null ordering based on the given ascending flag.
174+
///
175+
/// # Returns
176+
/// * `true` if nulls should appear first.
177+
/// * `false` if nulls should appear last.
178+
pub fn eval(&self, asc: bool) -> bool {
179+
match self {
180+
Self::AscReverse => !asc,
181+
Self::DescReverse => asc,
182+
Self::NullsFirst => true,
183+
Self::NullsLast => false,
184+
}
185+
}
186+
}
187+
188+
impl FromStr for NullOrdering {
189+
type Err = DataFusionError;
190+
191+
fn from_str(s: &str) -> Result<Self> {
192+
match s {
193+
"asc_reverse" => Ok(Self::AscReverse),
194+
"desc_reverse" => Ok(Self::DescReverse),
195+
"nulls_first" => Ok(Self::NullsFirst),
196+
"nulls_last" => Ok(Self::NullsLast),
197+
_ => plan_err!("Unknown null ordering: {s}"),
198+
}
199+
}
200+
}
201+
202+
impl From<&str> for NullOrdering {
203+
fn from(s: &str) -> Self {
204+
Self::from_str(s).unwrap_or(Self::AscReverse)
205+
}
206+
}
207+
146208
/// Ident Normalizer
147209
#[derive(Debug)]
148210
pub struct IdentNormalizer {

datafusion/sql/src/statement.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1345,7 +1345,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
13451345
)
13461346
.unwrap();
13471347
let asc = order_by_expr.asc.unwrap_or(true);
1348-
let nulls_first = order_by_expr.nulls_first.unwrap_or(!asc);
1348+
let nulls_first = order_by_expr
1349+
.nulls_first
1350+
.unwrap_or(self.options.default_null_ordering.eval(asc));
13491351

13501352
SortExpr::new(ordered_expr, asc, nulls_first)
13511353
})

datafusion/sql/tests/sql_integration.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use datafusion_expr::{
3636
use datafusion_functions::{string, unicode};
3737
use datafusion_sql::{
3838
parser::DFParser,
39-
planner::{ParserOptions, SqlToRel},
39+
planner::{NullOrdering, ParserOptions, SqlToRel},
4040
};
4141

4242
use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState};
@@ -94,6 +94,7 @@ fn parse_decimals() {
9494
support_varchar_with_length: false,
9595
enable_options_value_normalization: false,
9696
collect_spans: false,
97+
default_null_ordering: NullOrdering::AscReverse,
9798
},
9899
);
99100
}
@@ -150,6 +151,7 @@ fn parse_ident_normalization() {
150151
support_varchar_with_length: false,
151152
enable_options_value_normalization: false,
152153
collect_spans: false,
154+
default_null_ordering: NullOrdering::AscReverse,
153155
},
154156
);
155157
if plan.is_ok() {

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ datafusion.optimizer.repartition_windows true
259259
datafusion.optimizer.skip_failed_rules false
260260
datafusion.optimizer.top_down_join_key_reordering true
261261
datafusion.sql_parser.collect_spans false
262+
datafusion.sql_parser.default_null_ordering asc_reverse
262263
datafusion.sql_parser.dialect generic
263264
datafusion.sql_parser.enable_ident_normalization true
264265
datafusion.sql_parser.enable_options_value_normalization false
@@ -356,6 +357,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data
356357
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
357358
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
358359
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes.
360+
datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
359361
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
360362
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
361363
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.

datafusion/sqllogictest/test_files/order.slt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,74 @@ NULL three
9494
1 one
9595
2 two
9696

97+
statement ok
98+
set datafusion.sql_parser.default_null_ordering = 'desc_reverse';
99+
100+
# test asc with `desc_reverse` null ordering
101+
102+
query IT
103+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
104+
----
105+
NULL three
106+
1 one
107+
2 two
108+
109+
# test desc with `desc_reverse` null ordering
110+
111+
query IT
112+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
113+
----
114+
2 two
115+
1 one
116+
NULL three
117+
118+
statement ok
119+
set datafusion.sql_parser.default_null_ordering = 'nulls_first';
120+
121+
# test asc with `nulls_first` null ordering
122+
123+
query IT
124+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
125+
----
126+
NULL three
127+
1 one
128+
2 two
129+
130+
# test desc with `nulls_first` null ordering
131+
132+
query IT
133+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
134+
----
135+
NULL three
136+
2 two
137+
1 one
138+
139+
140+
statement ok
141+
set datafusion.sql_parser.default_null_ordering = 'nulls_last';
142+
143+
# test asc with `nulls_last` null ordering
144+
145+
query IT
146+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
147+
----
148+
1 one
149+
2 two
150+
NULL three
151+
152+
# test desc with `nulls_last` null ordering
153+
154+
query IT
155+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
156+
----
157+
2 two
158+
1 one
159+
NULL three
160+
161+
# reset to default null ordering
162+
statement ok
163+
set datafusion.sql_parser.default_null_ordering = 'asc_reverse';
164+
97165
# sort
98166

99167
statement ok

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
129129
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
130130
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
131131
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
132+
| datafusion.sql_parser.default_null_ordering | asc_reverse | Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html> |

0 commit comments

Comments
 (0)