Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,16 +993,16 @@ impl fmt::Display for Statement {
}
match hive_distribution {
HiveDistributionStyle::PARTITIONED { columns } => {
write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?;
write!(f, " PARTITIONED BY ({})", display_comma_separated(columns))?;
}
HiveDistributionStyle::CLUSTERED {
columns,
sorted_by,
num_buckets,
} => {
write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?;
write!(f, " CLUSTERED BY ({})", display_comma_separated(columns))?;
if !sorted_by.is_empty() {
write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?;
write!(f, " SORTED BY ({})", display_comma_separated(sorted_by))?;
}
if *num_buckets > 0 {
write!(f, " INTO {} BUCKETS", num_buckets)?;
Expand All @@ -1016,8 +1016,8 @@ impl fmt::Display for Statement {
write!(
f,
" SKEWED BY ({})) ON ({})",
display_comma_separated(&columns),
display_comma_separated(&on)
display_comma_separated(columns),
display_comma_separated(on)
)?;
if *stored_as_directories {
write!(f, " STORED AS DIRECTORIES")?;
Expand Down
8 changes: 8 additions & 0 deletions src/ast/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ pub enum BinaryOperator {
PGBitwiseXor,
PGBitwiseShiftLeft,
PGBitwiseShiftRight,
PGRegexMatch,
PGRegexIMatch,
PGRegexNotMatch,
PGRegexNotIMatch,
}

impl fmt::Display for BinaryOperator {
Expand Down Expand Up @@ -110,6 +114,10 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::PGBitwiseXor => "#",
BinaryOperator::PGBitwiseShiftLeft => "<<",
BinaryOperator::PGBitwiseShiftRight => ">>",
BinaryOperator::PGRegexMatch => "~",
BinaryOperator::PGRegexIMatch => "~*",
BinaryOperator::PGRegexNotMatch => "!~",
BinaryOperator::PGRegexNotIMatch => "!~*",
})
}
}
10 changes: 9 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ impl<'a> Parser<'a> {

/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
pub fn parse_sql(dialect: &dyn Dialect, sql: &str) -> Result<Vec<Statement>, ParserError> {
let mut tokenizer = Tokenizer::new(dialect, &sql);
let mut tokenizer = Tokenizer::new(dialect, sql);
let tokens = tokenizer.tokenize()?;
let mut parser = Parser::new(tokens, dialect);
let mut stmts = Vec::new();
Expand Down Expand Up @@ -835,6 +835,10 @@ impl<'a> Parser<'a> {
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
Some(BinaryOperator::PGBitwiseXor)
}
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch),
Token::Word(w) => match w.keyword {
Keyword::AND => Some(BinaryOperator::And),
Keyword::OR => Some(BinaryOperator::Or),
Expand Down Expand Up @@ -993,6 +997,10 @@ impl<'a> Parser<'a> {
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::Spaceship => Ok(20),
Token::Pipe => Ok(21),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
Expand Down
6 changes: 3 additions & 3 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl TestedDialects {
}

pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
self.one_of_identical_results(|dialect| Parser::parse_sql(dialect, &sql))
self.one_of_identical_results(|dialect| Parser::parse_sql(dialect, sql))
// To fail the `ensure_multiple_dialects_are_tested` test:
// Parser::parse_sql(&**self.dialects.first().unwrap(), sql)
}
Expand All @@ -75,11 +75,11 @@ impl TestedDialects {
/// tree as parsing `canonical`, and that serializing it back to string
/// results in the `canonical` representation.
pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
let mut statements = self.parse_sql_statements(&sql).unwrap();
let mut statements = self.parse_sql_statements(sql).unwrap();
assert_eq!(statements.len(), 1);

if !canonical.is_empty() && sql != canonical {
assert_eq!(self.parse_sql_statements(&canonical).unwrap(), statements);
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
}

let only_statement = statements.pop().unwrap();
Expand Down
66 changes: 64 additions & 2 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,14 @@ pub enum Token {
RArrow,
/// Sharp `#` used for PostgreSQL Bitwise XOR operator
Sharp,
/// Tilde `~` used for PostgreSQL Bitwise NOT operator
/// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator
Tilde,
/// `~*` , a case insensitive match regular expression operator in PostgreSQL
TildeAsterisk,
/// `!~` , a case sensitive not match regular expression operator in PostgreSQL
ExclamationMarkTilde,
/// `!~*` , a case insensitive not match regular expression operator in PostgreSQL
ExclamationMarkTildeAsterisk,
/// `<<`, a bitwise shift left operator in PostgreSQL
ShiftLeft,
/// `>>`, a bitwise shift right operator in PostgreSQL
Expand Down Expand Up @@ -171,6 +177,9 @@ impl fmt::Display for Token {
Token::ExclamationMark => f.write_str("!"),
Token::DoubleExclamationMark => f.write_str("!!"),
Token::Tilde => f.write_str("~"),
Token::TildeAsterisk => f.write_str("~*"),
Token::ExclamationMarkTilde => f.write_str("!~"),
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
Token::AtSign => f.write_str("@"),
Token::ShiftLeft => f.write_str("<<"),
Token::ShiftRight => f.write_str(">>"),
Expand Down Expand Up @@ -486,6 +495,14 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('=') => self.consume_and_return(chars, Token::Neq),
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
Some('~') => {
chars.next();
match chars.peek() {
Some('*') => self
.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
_ => Ok(Some(Token::ExclamationMarkTilde)),
}
}
_ => Ok(Some(Token::ExclamationMark)),
}
}
Expand Down Expand Up @@ -535,7 +552,13 @@ impl<'a> Tokenizer<'a> {
comment,
})))
}
'~' => self.consume_and_return(chars, Token::Tilde),
'~' => {
chars.next(); // consume
match chars.peek() {
Some('*') => self.consume_and_return(chars, Token::TildeAsterisk),
_ => Ok(Some(Token::Tilde)),
}
}
'#' => self.consume_and_return(chars, Token::Sharp),
'@' => self.consume_and_return(chars, Token::AtSign),
other => self.consume_and_return(chars, Token::Char(other)),
Expand Down Expand Up @@ -1111,6 +1134,45 @@ mod tests {
compare(expected, tokens);
}

#[test]
fn tokenize_pg_regex_match() {
let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::Tilde,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::TildeAsterisk,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::ExclamationMarkTilde,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::make_word("col", None),
Token::Whitespace(Whitespace::Space),
Token::ExclamationMarkTildeAsterisk,
Token::Whitespace(Whitespace::Space),
Token::SingleQuotedString("^a".into()),
];
compare(expected, tokens);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 this is quite cool

}

fn compare(expected: Vec<Token>, actual: Vec<Token>) {
//println!("------------------------------");
//println!("tokens = {:?}", actual);
Expand Down
6 changes: 3 additions & 3 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ fn parse_insert_sqlite() {
let dialect = SQLiteDialect {};

let check = |sql: &str, expected_action: Option<SqliteOnConflict>| match Parser::parse_sql(
&dialect, &sql,
&dialect, sql,
)
.unwrap()
.pop()
Expand Down Expand Up @@ -340,7 +340,7 @@ fn parse_column_aliases() {
}

// alias without AS is parsed correctly:
one_statement_parses_to("SELECT a.col + 1 newname FROM foo AS a", &sql);
one_statement_parses_to("SELECT a.col + 1 newname FROM foo AS a", sql);
}

#[test]
Expand Down Expand Up @@ -2685,7 +2685,7 @@ fn parse_multiple_statements() {
let res = parse_sql_statements(&(sql1.to_owned() + ";" + sql2_kw + sql2_rest));
assert_eq!(
vec![
one_statement_parses_to(&sql1, ""),
one_statement_parses_to(sql1, ""),
one_statement_parses_to(&(sql2_kw.to_owned() + sql2_rest), ""),
],
res.unwrap()
Expand Down
22 changes: 22 additions & 0 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,28 @@ fn parse_pg_postfix_factorial() {
}
}

#[test]
fn parse_pg_regex_match_ops() {
let pg_regex_match_ops = &[
("~", BinaryOperator::PGRegexMatch),
("~*", BinaryOperator::PGRegexIMatch),
("!~", BinaryOperator::PGRegexNotMatch),
("!~*", BinaryOperator::PGRegexNotIMatch),
];

for (str_op, op) in pg_regex_match_ops {
let select = pg().verified_only_select(&format!("SELECT 'abc' {} '^a'", &str_op));
assert_eq!(
SelectItem::UnnamedExpr(Expr::BinaryOp {
left: Box::new(Expr::Value(Value::SingleQuotedString("abc".into()))),
op: op.clone(),
right: Box::new(Expr::Value(Value::SingleQuotedString("^a".into()))),
}),
select.projection[0]
);
}
}

fn pg() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {})],
Expand Down
4 changes: 2 additions & 2 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn test_snowflake_create_table() {
fn test_snowflake_single_line_tokenize() {
let sql = "CREATE TABLE# this is a comment \ntable_1";
let dialect = SnowflakeDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();

let expected = vec![
Expand All @@ -55,7 +55,7 @@ fn test_snowflake_single_line_tokenize() {
assert_eq!(expected, tokens);

let sql = "CREATE TABLE// this is a comment \ntable_1";
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();

let expected = vec![
Expand Down