Skip to content

Commit d9e170e

Browse files
yoavcloudayman-sigma
authored andcommitted
Add support for Snowflake column aliases that use SQL keywords (apache#1632)
1 parent ed103fb commit d9e170e

File tree

4 files changed

+163
-56
lines changed

4 files changed

+163
-56
lines changed

src/dialect/mod.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any {
820820
fn supports_set_stmt_without_operator(&self) -> bool {
821821
false
822822
}
823+
824+
/// Returns true if the specified keyword should be parsed as a select item alias.
825+
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
826+
/// to enable looking ahead if needed.
827+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
828+
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
829+
}
830+
831+
/// Returns true if the specified keyword should be parsed as a table factor alias.
832+
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
833+
/// to enable looking ahead if needed.
834+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
835+
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
836+
}
823837
}
824838

825839
/// This represents the operators for which precedence must be defined

src/dialect/snowflake.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect {
251251
fn supports_partiql(&self) -> bool {
252252
true
253253
}
254+
255+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
256+
explicit
257+
|| match kw {
258+
// The following keywords can be considered an alias as long as
259+
// they are not followed by other tokens that may change their meaning
260+
// e.g. `SELECT * EXCEPT (col1) FROM tbl`
261+
Keyword::EXCEPT
262+
// e.g. `SELECT 1 LIMIT 5`
263+
| Keyword::LIMIT
264+
// e.g. `SELECT 1 OFFSET 5 ROWS`
265+
| Keyword::OFFSET
266+
// e.g. `INSERT INTO t SELECT 1 RETURNING *`
267+
| Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
268+
{
269+
false
270+
}
271+
272+
// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
273+
// which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
274+
Keyword::FETCH
275+
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
276+
{
277+
false
278+
}
279+
280+
// Reserved keywords by the Snowflake dialect, which seem to be less strictive
281+
// than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following
282+
// keywords were tested with the this statement: `SELECT 1 <KW>`.
283+
Keyword::FROM
284+
| Keyword::GROUP
285+
| Keyword::HAVING
286+
| Keyword::INTERSECT
287+
| Keyword::INTO
288+
| Keyword::MINUS
289+
| Keyword::ORDER
290+
| Keyword::SELECT
291+
| Keyword::UNION
292+
| Keyword::WHERE
293+
| Keyword::WITH => false,
294+
295+
// Any other word is considered an alias
296+
_ => true,
297+
}
298+
}
254299
}
255300

256301
fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {

src/parser/mod.rs

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8848,38 +8848,76 @@ impl<'a> Parser<'a> {
88488848
Ok(IdentWithAlias { ident, alias })
88498849
}
88508850

8851-
/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
8852-
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
8853-
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
8851+
/// Optionally parses an alias for a select list item
8852+
fn maybe_parse_select_item_alias(&mut self) -> Result<Option<Ident>, ParserError> {
8853+
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
8854+
parser.dialect.is_select_item_alias(explicit, kw, parser)
8855+
}
8856+
self.parse_optional_alias_inner(None, validator)
8857+
}
8858+
8859+
/// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`.
8860+
/// In this case, the alias is allowed to optionally name the columns in the table, in
8861+
/// addition to the table itself.
8862+
pub fn maybe_parse_table_alias(&mut self) -> Result<Option<TableAlias>, ParserError> {
8863+
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
8864+
parser.dialect.is_table_factor_alias(explicit, kw, parser)
8865+
}
8866+
match self.parse_optional_alias_inner(None, validator)? {
8867+
Some(name) => {
8868+
let columns = self.parse_table_alias_column_defs()?;
8869+
Ok(Some(TableAlias { name, columns }))
8870+
}
8871+
None => Ok(None),
8872+
}
8873+
}
8874+
8875+
/// Wrapper for parse_optional_alias_inner, left for backwards-compatibility
8876+
/// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias`
8877+
/// and `maybe_parse_table_alias`.
88548878
pub fn parse_optional_alias(
88558879
&mut self,
88568880
reserved_kwds: &[Keyword],
88578881
) -> Result<Option<Ident>, ParserError> {
8882+
fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool {
8883+
false
8884+
}
8885+
self.parse_optional_alias_inner(Some(reserved_kwds), validator)
8886+
}
8887+
8888+
/// Parses an optional alias after a SQL element such as a select list item
8889+
/// or a table name.
8890+
///
8891+
/// This method accepts an optional list of reserved keywords or a function
8892+
/// to call to validate if a keyword should be parsed as an alias, to allow
8893+
/// callers to customize the parsing logic based on their context.
8894+
fn parse_optional_alias_inner<F>(
8895+
&mut self,
8896+
reserved_kwds: Option<&[Keyword]>,
8897+
validator: F,
8898+
) -> Result<Option<Ident>, ParserError>
8899+
where
8900+
F: Fn(bool, &Keyword, &mut Parser) -> bool,
8901+
{
88588902
let after_as = self.parse_keyword(Keyword::AS);
8903+
88598904
let next_token = self.next_token();
88608905
match next_token.token {
8861-
// Accept any identifier after `AS` (though many dialects have restrictions on
8862-
// keywords that may appear here). If there's no `AS`: don't parse keywords,
8863-
// which may start a construct allowed in this position, to be parsed as aliases.
8864-
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
8865-
// not an alias.)
8866-
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
8906+
// By default, if a word is located after the `AS` keyword we consider it an alias
8907+
// as long as it's not reserved.
8908+
Token::Word(w)
8909+
if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) =>
8910+
{
88678911
Ok(Some(w.into_ident(next_token.span)))
88688912
}
8869-
// MSSQL supports single-quoted strings as aliases for columns
8870-
// We accept them as table aliases too, although MSSQL does not.
8871-
//
8872-
// Note, that this conflicts with an obscure rule from the SQL
8873-
// standard, which we don't implement:
8874-
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
8875-
// "[Obscure Rule] SQL allows you to break a long <character
8876-
// string literal> up into two or more smaller <character string
8877-
// literal>s, split by a <separator> that includes a newline
8878-
// character. When it sees such a <literal>, your DBMS will
8879-
// ignore the <separator> and treat the multiple strings as
8880-
// a single <literal>."
8913+
// This pattern allows for customizing the acceptance of words as aliases based on the caller's
8914+
// context, such as to what SQL element this word is a potential alias of (select item alias, table name
8915+
// alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords.
8916+
Token::Word(w) if validator(after_as, &w.keyword, self) => {
8917+
Ok(Some(w.into_ident(next_token.span)))
8918+
}
8919+
// For backwards-compatibility, we accept quoted strings as aliases regardless of the context.
88818920
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
8882-
// Support for MySql dialect double-quoted string, `AS "HOUR"` for example
88838921
Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
88848922
_ => {
88858923
if after_as {
@@ -8891,23 +8929,6 @@ impl<'a> Parser<'a> {
88918929
}
88928930
}
88938931

8894-
/// Parse `AS identifier` when the AS is describing a table-valued object,
8895-
/// like in `... FROM generate_series(1, 10) AS t (col)`. In this case
8896-
/// the alias is allowed to optionally name the columns in the table, in
8897-
/// addition to the table itself.
8898-
pub fn parse_optional_table_alias(
8899-
&mut self,
8900-
reserved_kwds: &[Keyword],
8901-
) -> Result<Option<TableAlias>, ParserError> {
8902-
match self.parse_optional_alias(reserved_kwds)? {
8903-
Some(name) => {
8904-
let columns = self.parse_table_alias_column_defs()?;
8905-
Ok(Some(TableAlias { name, columns }))
8906-
}
8907-
None => Ok(None),
8908-
}
8909-
}
8910-
89118932
pub fn parse_optional_group_by(&mut self) -> Result<Option<GroupByExpr>, ParserError> {
89128933
if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
89138934
let expressions = if self.parse_keyword(Keyword::ALL) {
@@ -10909,7 +10930,7 @@ impl<'a> Parser<'a> {
1090910930
let name = self.parse_object_name(false)?;
1091010931
self.expect_token(&Token::LParen)?;
1091110932
let args = self.parse_optional_args()?;
10912-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10933+
let alias = self.maybe_parse_table_alias()?;
1091310934
Ok(TableFactor::Function {
1091410935
lateral: true,
1091510936
name,
@@ -10922,7 +10943,7 @@ impl<'a> Parser<'a> {
1092210943
self.expect_token(&Token::LParen)?;
1092310944
let expr = self.parse_expr()?;
1092410945
self.expect_token(&Token::RParen)?;
10925-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10946+
let alias = self.maybe_parse_table_alias()?;
1092610947
Ok(TableFactor::TableFunction { expr, alias })
1092710948
} else if self.consume_token(&Token::LParen) {
1092810949
// A left paren introduces either a derived table (i.e., a subquery)
@@ -10971,7 +10992,7 @@ impl<'a> Parser<'a> {
1097110992
#[allow(clippy::if_same_then_else)]
1097210993
if !table_and_joins.joins.is_empty() {
1097310994
self.expect_token(&Token::RParen)?;
10974-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10995+
let alias = self.maybe_parse_table_alias()?;
1097510996
Ok(TableFactor::NestedJoin {
1097610997
table_with_joins: Box::new(table_and_joins),
1097710998
alias,
@@ -10984,7 +11005,7 @@ impl<'a> Parser<'a> {
1098411005
// (B): `table_and_joins` (what we found inside the parentheses)
1098511006
// is a nested join `(foo JOIN bar)`, not followed by other joins.
1098611007
self.expect_token(&Token::RParen)?;
10987-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11008+
let alias = self.maybe_parse_table_alias()?;
1098811009
Ok(TableFactor::NestedJoin {
1098911010
table_with_joins: Box::new(table_and_joins),
1099011011
alias,
@@ -10998,9 +11019,7 @@ impl<'a> Parser<'a> {
1099811019
// [AS alias])`) as well.
1099911020
self.expect_token(&Token::RParen)?;
1100011021

11001-
if let Some(outer_alias) =
11002-
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
11003-
{
11022+
if let Some(outer_alias) = self.maybe_parse_table_alias()? {
1100411023
// Snowflake also allows specifying an alias *after* parens
1100511024
// e.g. `FROM (mytable) AS alias`
1100611025
match &mut table_and_joins.relation {
@@ -11053,7 +11072,7 @@ impl<'a> Parser<'a> {
1105311072
// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2)
1105411073
// where there are no parentheses around the VALUES clause.
1105511074
let values = SetExpr::Values(self.parse_values(false)?);
11056-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11075+
let alias = self.maybe_parse_table_alias()?;
1105711076
Ok(TableFactor::Derived {
1105811077
lateral: false,
1105911078
subquery: Box::new(Query {
@@ -11079,7 +11098,7 @@ impl<'a> Parser<'a> {
1107911098
self.expect_token(&Token::RParen)?;
1108011099

1108111100
let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
11082-
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) {
11101+
let alias = match self.maybe_parse_table_alias() {
1108311102
Ok(Some(alias)) => Some(alias),
1108411103
Ok(None) => None,
1108511104
Err(e) => return Err(e),
@@ -11116,7 +11135,7 @@ impl<'a> Parser<'a> {
1111611135
let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?;
1111711136
self.expect_token(&Token::RParen)?;
1111811137
self.expect_token(&Token::RParen)?;
11119-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11138+
let alias = self.maybe_parse_table_alias()?;
1112011139
Ok(TableFactor::JsonTable {
1112111140
json_expr,
1112211141
json_path,
@@ -11161,7 +11180,7 @@ impl<'a> Parser<'a> {
1116111180
}
1116211181
}
1116311182

11164-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11183+
let alias = self.maybe_parse_table_alias()?;
1116511184

1116611185
// MSSQL-specific table hints:
1116711186
let mut with_hints = vec![];
@@ -11339,7 +11358,7 @@ impl<'a> Parser<'a> {
1133911358
} else {
1134011359
Vec::new()
1134111360
};
11342-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11361+
let alias = self.maybe_parse_table_alias()?;
1134311362
Ok(TableFactor::OpenJsonTable {
1134411363
json_expr,
1134511364
json_path,
@@ -11438,7 +11457,7 @@ impl<'a> Parser<'a> {
1143811457

1143911458
self.expect_token(&Token::RParen)?;
1144011459

11441-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11460+
let alias = self.maybe_parse_table_alias()?;
1144211461

1144311462
Ok(TableFactor::MatchRecognize {
1144411463
table: Box::new(table),
@@ -11682,7 +11701,7 @@ impl<'a> Parser<'a> {
1168211701
) -> Result<TableFactor, ParserError> {
1168311702
let subquery = self.parse_query()?;
1168411703
self.expect_token(&Token::RParen)?;
11685-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11704+
let alias = self.maybe_parse_table_alias()?;
1168611705
Ok(TableFactor::Derived {
1168711706
lateral: match lateral {
1168811707
Lateral => true,
@@ -11776,7 +11795,7 @@ impl<'a> Parser<'a> {
1177611795
};
1177711796

1177811797
self.expect_token(&Token::RParen)?;
11779-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11798+
let alias = self.maybe_parse_table_alias()?;
1178011799
Ok(TableFactor::Pivot {
1178111800
table: Box::new(table),
1178211801
aggregate_functions,
@@ -11798,7 +11817,7 @@ impl<'a> Parser<'a> {
1179811817
self.expect_keyword_is(Keyword::IN)?;
1179911818
let columns = self.parse_parenthesized_column_list(Mandatory, false)?;
1180011819
self.expect_token(&Token::RParen)?;
11801-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11820+
let alias = self.maybe_parse_table_alias()?;
1180211821
Ok(TableFactor::Unpivot {
1180311822
table: Box::new(table),
1180411823
value,
@@ -12624,7 +12643,7 @@ impl<'a> Parser<'a> {
1262412643
})
1262512644
}
1262612645
expr => self
12627-
.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)
12646+
.maybe_parse_select_item_alias()
1262812647
.map(|alias| match alias {
1262912648
Some(alias) => SelectItem::ExprWithAlias { expr, alias },
1263012649
None => SelectItem::UnnamedExpr(expr),

tests/sqlparser_snowflake.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3029,3 +3029,32 @@ fn parse_ls_and_rm() {
30293029

30303030
snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#);
30313031
}
3032+
3033+
#[test]
3034+
fn test_sql_keywords_as_select_item_aliases() {
3035+
// Some keywords that should be parsed as an alias
3036+
let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"];
3037+
for kw in unreserved_kws {
3038+
snowflake()
3039+
.one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}"));
3040+
}
3041+
3042+
// Some keywords that should not be parsed as an alias
3043+
let reserved_kws = vec![
3044+
"FROM",
3045+
"GROUP",
3046+
"HAVING",
3047+
"INTERSECT",
3048+
"INTO",
3049+
"ORDER",
3050+
"SELECT",
3051+
"UNION",
3052+
"WHERE",
3053+
"WITH",
3054+
];
3055+
for kw in reserved_kws {
3056+
assert!(snowflake()
3057+
.parse_sql_statements(&format!("SELECT 1 {kw}"))
3058+
.is_err());
3059+
}
3060+
}

0 commit comments

Comments
 (0)