Skip to content

Commit 1ff1908

Browse files
author
Alexander Beedie
committed
Add support for IS [NOT] [form] NORMALIZED
1 parent 36db176 commit 1ff1908

File tree

6 files changed

+181
-12
lines changed

6 files changed

+181
-12
lines changed

src/ast/mod.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ pub use self::trigger::{
8383

8484
pub use self::value::{
8585
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
86-
TrimWhereField, Value,
86+
NormalizationForm, TrimWhereField, Value,
8787
};
8888

8989
use crate::ast::helpers::stmt_data_loading::{
@@ -653,6 +653,12 @@ pub enum Expr {
653653
IsDistinctFrom(Box<Expr>, Box<Expr>),
654654
/// `IS NOT DISTINCT FROM` operator
655655
IsNotDistinctFrom(Box<Expr>, Box<Expr>),
656+
/// `<expr> IS [ NOT ] [ form ] NORMALIZED`
657+
IsNormalized {
658+
expr: Box<Expr>,
659+
form: Option<NormalizationForm>,
660+
negated: bool,
661+
},
656662
/// `[ NOT ] IN (val1, val2, ...)`
657663
InList {
658664
expr: Box<Expr>,
@@ -1118,7 +1124,7 @@ impl fmt::Display for LambdaFunction {
11181124
/// `OneOrManyWithParens` implements `Deref<Target = [T]>` and `IntoIterator`,
11191125
/// so you can call slice methods on it and iterate over items
11201126
/// # Examples
1121-
/// Acessing as a slice:
1127+
/// Accessing as a slice:
11221128
/// ```
11231129
/// # use sqlparser::ast::OneOrManyWithParens;
11241130
/// let one = OneOrManyWithParens::One("a");
@@ -1419,6 +1425,24 @@ impl fmt::Display for Expr {
14191425
if *regexp { "REGEXP" } else { "RLIKE" },
14201426
pattern
14211427
),
1428+
Expr::IsNormalized {
1429+
expr,
1430+
form,
1431+
negated,
1432+
} => {
1433+
let not_ = if *negated { "NOT " } else { "" };
1434+
if form.is_none() {
1435+
write!(f, "{} IS {}NORMALIZED", expr, not_)
1436+
} else {
1437+
write!(
1438+
f,
1439+
"{} IS {}{} NORMALIZED",
1440+
expr,
1441+
not_,
1442+
form.as_ref().unwrap()
1443+
)
1444+
}
1445+
}
14221446
Expr::SimilarTo {
14231447
negated,
14241448
expr,
@@ -7749,7 +7773,7 @@ where
77497773
/// ```sql
77507774
/// EXPLAIN (ANALYZE, VERBOSE TRUE, FORMAT TEXT) SELECT * FROM my_table;
77517775
///
7752-
/// VACCUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
7776+
/// VACUUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
77537777
/// ```
77547778
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
77557779
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]

src/ast/spans.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,12 @@ impl Spanned for Expr {
13241324
escape_char: _,
13251325
any: _,
13261326
} => expr.span().union(&pattern.span()),
1327+
Expr::RLike { .. } => Span::empty(),
1328+
Expr::IsNormalized {
1329+
expr,
1330+
form: _,
1331+
negated: _,
1332+
} => expr.span(),
13271333
Expr::SimilarTo {
13281334
negated: _,
13291335
expr,
@@ -1359,7 +1365,6 @@ impl Spanned for Expr {
13591365
Expr::Array(array) => array.span(),
13601366
Expr::MatchAgainst { .. } => Span::empty(),
13611367
Expr::JsonAccess { value, path } => value.span().union(&path.span()),
1362-
Expr::RLike { .. } => Span::empty(),
13631368
Expr::AnyOp {
13641369
left,
13651370
compare_op: _,

src/ast/value.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,27 @@ impl fmt::Display for DateTimeField {
270270
}
271271
}
272272

273+
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
274+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
275+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
276+
pub enum NormalizationForm {
277+
NFC,
278+
NFD,
279+
NFKC,
280+
NFKD,
281+
}
282+
283+
impl fmt::Display for NormalizationForm {
284+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285+
match self {
286+
NormalizationForm::NFC => write!(f, "NFC"),
287+
NormalizationForm::NFD => write!(f, "NFD"),
288+
NormalizationForm::NFKC => write!(f, "NFKC"),
289+
NormalizationForm::NFKD => write!(f, "NFKD"),
290+
}
291+
}
292+
}
293+
273294
pub struct EscapeQuotedString<'a> {
274295
string: &'a str,
275296
quote: char,

src/keywords.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ define_keywords!(
287287
ENCODING,
288288
ENCRYPTION,
289289
END,
290-
END_EXEC = "END-EXEC",
291290
ENDPOINT,
291+
END_EXEC = "END-EXEC",
292292
END_FRAME,
293293
END_PARTITION,
294294
ENFORCED,
@@ -539,6 +539,7 @@ define_keywords!(
539539
NOORDER,
540540
NOREPLICATION,
541541
NORMALIZE,
542+
NORMALIZED,
542543
NOSCAN,
543544
NOSUPERUSER,
544545
NOT,

src/parser/mod.rs

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3183,9 +3183,15 @@ impl<'a> Parser<'a> {
31833183
{
31843184
let expr2 = self.parse_expr()?;
31853185
Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2)))
3186+
} else if let Ok((form, negated)) = self.parse_unicode_is_normalized() {
3187+
Ok(Expr::IsNormalized {
3188+
expr: Box::new(expr),
3189+
form,
3190+
negated,
3191+
})
31863192
} else {
31873193
self.expected(
3188-
"[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS",
3194+
"[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS",
31893195
self.peek_token(),
31903196
)
31913197
}
@@ -3850,7 +3856,7 @@ impl<'a> Parser<'a> {
38503856
/// If the current token is the `expected` keyword, consume the token.
38513857
/// Otherwise, return an error.
38523858
///
3853-
// todo deprecate infavor of expected_keyword_is
3859+
// todo deprecate in favor of expected_keyword_is
38543860
pub fn expect_keyword(&mut self, expected: Keyword) -> Result<TokenWithSpan, ParserError> {
38553861
if self.parse_keyword(expected) {
38563862
Ok(self.get_current_token().clone())
@@ -8452,6 +8458,42 @@ impl<'a> Parser<'a> {
84528458
}
84538459
}
84548460

8461+
/// Parse a literal unicode normalization clause
8462+
pub fn parse_unicode_is_normalized(
8463+
&mut self,
8464+
) -> Result<(Option<NormalizationForm>, bool), ParserError> {
8465+
let neg = self.parse_keyword(Keyword::NOT);
8466+
if self.parse_keyword(Keyword::NORMALIZED) {
8467+
return Ok((None, neg));
8468+
}
8469+
let index = self.index;
8470+
let next_token = self.next_token();
8471+
let normalized_form = if let Token::Word(Word {
8472+
value: ref s,
8473+
quote_style: None,
8474+
keyword: Keyword::NoKeyword,
8475+
}) = next_token.token
8476+
{
8477+
match s.to_uppercase().as_str() {
8478+
"NFC" => Some(NormalizationForm::NFC),
8479+
"NFD" => Some(NormalizationForm::NFD),
8480+
"NFKC" => Some(NormalizationForm::NFKC),
8481+
"NFKD" => Some(NormalizationForm::NFKD),
8482+
_ => {
8483+
self.index = index;
8484+
return self.expected("unicode normalization", next_token);
8485+
}
8486+
}
8487+
} else {
8488+
None
8489+
};
8490+
if self.parse_keyword(Keyword::NORMALIZED) {
8491+
return Ok((normalized_form, neg));
8492+
}
8493+
self.index = index;
8494+
self.expected("unicode normalization", self.peek_token())
8495+
}
8496+
84558497
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
84568498
self.expect_token(&Token::LParen)?;
84578499
let values = self.parse_comma_separated(|parser| {
@@ -8957,7 +8999,7 @@ impl<'a> Parser<'a> {
89578999
}
89589000
}
89599001

8960-
/// Parse a table object for insetion
9002+
/// Parse a table object for insertion
89619003
/// e.g. `some_database.some_table` or `FUNCTION some_table_func(...)`
89629004
pub fn parse_table_object(&mut self) -> Result<TableObject, ParserError> {
89639005
if self.dialect.supports_insert_table_function() && self.parse_keyword(Keyword::FUNCTION) {
@@ -11867,7 +11909,7 @@ impl<'a> Parser<'a> {
1186711909
} else {
1186811910
let mut name = self.parse_grantee_name()?;
1186911911
if self.consume_token(&Token::Colon) {
11870-
// Redshift supports namespace prefix for extenrnal users and groups:
11912+
// Redshift supports namespace prefix for external users and groups:
1187111913
// <Namespace>:<GroupName> or <Namespace>:<UserName>
1187211914
// https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-access-control-native-idp.html
1187311915
let ident = self.parse_identifier()?;
@@ -12863,7 +12905,7 @@ impl<'a> Parser<'a> {
1286312905
Ok(WithFill { from, to, step })
1286412906
}
1286512907

12866-
// Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect)
12908+
// Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect)
1286712909
// that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier
1286812910
pub fn parse_interpolations(&mut self) -> Result<Option<Interpolate>, ParserError> {
1286912911
if !self.parse_keyword(Keyword::INTERPOLATE) {
@@ -14372,7 +14414,7 @@ mod tests {
1437214414
assert_eq!(
1437314415
ast,
1437414416
Err(ParserError::ParserError(
14375-
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16"
14417+
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: a at Line: 1, Column: 16"
1437614418
.to_string()
1437714419
))
1437814420
);

tests/sqlparser_common.rs

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9296,6 +9296,46 @@ fn parse_is_boolean() {
92969296
verified_expr(sql)
92979297
);
92989298

9299+
let sql = "a IS NORMALIZED";
9300+
assert_eq!(
9301+
IsNormalized {
9302+
expr: Box::new(Identifier(Ident::new("a"))),
9303+
form: None,
9304+
negated: false,
9305+
},
9306+
verified_expr(sql)
9307+
);
9308+
9309+
let sql = "a IS NOT NORMALIZED";
9310+
assert_eq!(
9311+
IsNormalized {
9312+
expr: Box::new(Identifier(Ident::new("a"))),
9313+
form: None,
9314+
negated: true,
9315+
},
9316+
verified_expr(sql)
9317+
);
9318+
9319+
let sql = "a IS NFKC NORMALIZED";
9320+
assert_eq!(
9321+
IsNormalized {
9322+
expr: Box::new(Identifier(Ident::new("a"))),
9323+
form: Some(NormalizationForm::NFKC),
9324+
negated: false,
9325+
},
9326+
verified_expr(sql)
9327+
);
9328+
9329+
let sql = "a IS NOT NFKD NORMALIZED";
9330+
assert_eq!(
9331+
IsNormalized {
9332+
expr: Box::new(Identifier(Ident::new("a"))),
9333+
form: Some(NormalizationForm::NFKD),
9334+
negated: true,
9335+
},
9336+
verified_expr(sql)
9337+
);
9338+
92999339
let sql = "a IS UNKNOWN";
93009340
assert_eq!(
93019341
IsUnknown(Box::new(Identifier(Ident::new("a")))),
@@ -9314,14 +9354,50 @@ fn parse_is_boolean() {
93149354
verified_stmt("SELECT f FROM foo WHERE field IS FALSE");
93159355
verified_stmt("SELECT f FROM foo WHERE field IS NOT FALSE");
93169356

9357+
verified_stmt("SELECT f FROM foo WHERE field IS NORMALIZED");
9358+
verified_stmt("SELECT f FROM foo WHERE field IS NFC NORMALIZED");
9359+
verified_stmt("SELECT f FROM foo WHERE field IS NFD NORMALIZED");
9360+
verified_stmt("SELECT f FROM foo WHERE field IS NOT NORMALIZED");
9361+
verified_stmt("SELECT f FROM foo WHERE field IS NOT NFKC NORMALIZED");
9362+
93179363
verified_stmt("SELECT f FROM foo WHERE field IS UNKNOWN");
93189364
verified_stmt("SELECT f FROM foo WHERE field IS NOT UNKNOWN");
93199365

93209366
let sql = "SELECT f from foo where field is 0";
93219367
let res = parse_sql_statements(sql);
93229368
assert_eq!(
93239369
ParserError::ParserError(
9324-
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0"
9370+
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: 0"
9371+
.to_string()
9372+
),
9373+
res.unwrap_err()
9374+
);
9375+
9376+
let sql = "SELECT s, s IS XYZ NORMALIZED FROM foo";
9377+
let res = parse_sql_statements(sql);
9378+
assert_eq!(
9379+
ParserError::ParserError(
9380+
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: XYZ"
9381+
.to_string()
9382+
),
9383+
res.unwrap_err()
9384+
);
9385+
9386+
let sql = "SELECT s, s IS NFKC FROM foo";
9387+
let res = parse_sql_statements(sql);
9388+
assert_eq!(
9389+
ParserError::ParserError(
9390+
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: NFKC"
9391+
.to_string()
9392+
),
9393+
res.unwrap_err()
9394+
);
9395+
9396+
let sql = "SELECT s, s IS TRIM(' NFKC ') FROM foo";
9397+
let res = parse_sql_statements(sql);
9398+
assert_eq!(
9399+
ParserError::ParserError(
9400+
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: TRIM"
93259401
.to_string()
93269402
),
93279403
res.unwrap_err()

0 commit comments

Comments
 (0)