Skip to content

Commit

Permalink
sql-pretty: add pretty printing crate
Browse files Browse the repository at this point in the history
This is copied from github.com/mjibson/mzfmt but now integrated directly
into our repo. Refactor the parser tests to reuse the datadriven
statements for verification here. That will prevent future syntax
changes from lagging behind in their pretty implementation.

Integration into `SHOW` and other commands in future commits.
  • Loading branch information
maddyblue committed Oct 10, 2023
1 parent adf410d commit c7ec88e
Show file tree
Hide file tree
Showing 11 changed files with 1,022 additions and 118 deletions.
37 changes: 36 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ members = [
"src/sql",
"src/sql-lexer",
"src/sql-parser",
"src/sql-pretty",
"src/sqllogictest",
"src/stash",
"src/stash-debug",
Expand Down
3 changes: 3 additions & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ skip = [
# our crates use `bitflags 1.3.2`
# TODO: fork `tower-http` and swap to use older bitflags
{ name = "bitflags", version = "1.3.2" },

# `pretty` explicitly chose to use this older version.
{ name = "arrayvec", version = "0.5.2" },
]

# Use `tracing` instead.
Expand Down
6 changes: 4 additions & 2 deletions src/sql-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ publish = false

[dependencies]
bytesize = "1.1.0"
datadriven = { version = "0.6.0", optional = true }
enum-kinds = "0.5.1"
itertools = "0.10.5"
mz-ore = { path = "../ore", default-features = false, features = ["stack"] }
Expand All @@ -17,12 +18,12 @@ phf = { version = "0.11.1", features = ["uncased"] }
serde = { version = "1.0.152", features = ["derive"] }
tracing = "0.1.37"
uncased = "0.9.7"
unicode-width = { version = "0.1.10", optional = true }
workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true }

[dev-dependencies]
datadriven = "0.6.0"
mz-ore = { path = "../ore", default-features = false, features = ["test"] }
unicode-width = "0.1.10"
mz-sql-parser = { path = ".", features = ["test"] }

[build-dependencies]
anyhow = "1.0.66"
Expand All @@ -31,6 +32,7 @@ mz-walkabout = { path = "../walkabout", default-features = false }

[features]
default = ["workspace-hack"]
test = ["datadriven", "unicode-width"]

[package.metadata.cargo-udeps.ignore]
normal = ["workspace-hack"]
115 changes: 115 additions & 0 deletions src/sql-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,118 @@

pub mod ast;
pub mod parser;

#[cfg(feature = "test")]
pub fn datadriven_testcase(tc: &datadriven::TestCase) -> String {
use crate::ast::display::AstDisplay;
use crate::ast::{Expr, Statement};
use datadriven::TestCase;
use mz_ore::collections::CollectionExt;
use mz_ore::fmt::FormatBuffer;
use unicode_width::UnicodeWidthStr;

fn render_error(sql: &str, e: parser::ParserError) -> String {
let mut s = format!("error: {}\n", e.message);

// Do our best to emulate psql in rendering a caret pointing at the
// offending character in the query. This makes it possible to detect
// incorrect error positions by visually scanning the test files.
let end = sql.len();
let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end);
writeln!(s, "{}", &sql[line_start..line_end]);
for _ in 0..sql[line_start..e.pos].width() {
write!(s, " ");
}
writeln!(s, "^");

s
}

fn parse_statement(tc: &TestCase) -> String {
let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input);
match parser::parse_statements(input) {
Ok(s) => {
if s.len() != 1 {
return "expected exactly one statement\n".to_string();
}
let stmt = s.into_element().ast;
for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] {
let mut parsed = match parser::parse_statements(&printed) {
Ok(parsed) => parsed.into_element().ast,
Err(err) => panic!("reparse failed: {}: {}\n", stmt, err),
};
match (&mut parsed, &stmt) {
// DECLARE remembers the original SQL. Erase that here so it can differ if
// needed (for example, quoting identifiers vs not). This is ok because we
// still compare that the resulting ASTs are identical, and it's valid for
// those to come from different original strings.
(Statement::Declare(parsed), Statement::Declare(stmt)) => {
parsed.sql = stmt.sql.clone();
}
_ => {}
}
if parsed != stmt {
panic!(
"reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n",
stmt, parsed
);
}
}
if tc.args.get("roundtrip").is_some() {
format!("{}\n", stmt)
} else {
// TODO(justin): it would be nice to have a middle-ground between this
// all-on-one-line and {:#?}'s huge number of lines.
format!("{}\n=>\n{:?}\n", stmt, stmt)
}
}
Err(e) => render_error(input, e.error),
}
}

fn parse_scalar(tc: &TestCase) -> String {
let input = tc.input.trim();
match parser::parse_expr(input) {
Ok(s) => {
for printed in [s.to_ast_string(), s.to_ast_string_stable()] {
match parser::parse_expr(&printed) {
Ok(parsed) => {
// TODO: We always coerce the double colon operator into a Cast expr instead
// of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints
// itself as double colon. We're thus unable to perfectly roundtrip
// `CAST(..)`. We could fix this by keeping "::" as a binary operator and
// teaching func.rs how to handle it, similar to how that file handles "~~"
// (without the parser converting that operator directly into an
// Expr::Like).
if !matches!(parsed, Expr::Cast { .. }) {
if parsed != s {
panic!(
"reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n",
s, parsed
);
}
}
}
Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"),
}
}

if tc.args.get("roundtrip").is_some() {
format!("{}\n", s)
} else {
// TODO(justin): it would be nice to have a middle-ground between this
// all-on-one-line and {:#?}'s huge number of lines.
format!("{:?}\n", s)
}
}
Err(e) => render_error(input, e),
}
}

match tc.directive.as_str() {
"parse-statement" => parse_statement(tc),
"parse-scalar" => parse_scalar(tc),
dir => panic!("unhandled directive {}", dir),
}
}
120 changes: 5 additions & 115 deletions src/sql-parser/tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,131 +89,21 @@
use std::error::Error;
use std::iter;

use datadriven::walk;
use itertools::Itertools;
use mz_ore::collections::CollectionExt;
use mz_ore::fmt::FormatBuffer;
use mz_sql_parser::ast::display::AstDisplay;
use mz_sql_parser::ast::visit::Visit;
use mz_sql_parser::ast::visit_mut::{self, VisitMut};
use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName, Statement};
use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName};
use mz_sql_parser::datadriven_testcase;
use mz_sql_parser::parser::{
self, parse_statements, parse_statements_with_limit, ParserError, MAX_STATEMENT_BATCH_SIZE,
self, parse_statements, parse_statements_with_limit, MAX_STATEMENT_BATCH_SIZE,
};
use unicode_width::UnicodeWidthStr;

#[mz_ore::test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
fn datadriven() {
use datadriven::{walk, TestCase};

fn render_error(sql: &str, e: ParserError) -> String {
let mut s = format!("error: {}\n", e.message);

// Do our best to emulate psql in rendering a caret pointing at the
// offending character in the query. This makes it possible to detect
// incorrect error positions by visually scanning the test files.
let end = sql.len();
let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end);
writeln!(s, "{}", &sql[line_start..line_end]);
for _ in 0..sql[line_start..e.pos].width() {
write!(s, " ");
}
writeln!(s, "^");

s
}

fn parse_statement(tc: &TestCase) -> String {
let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input);
match parser::parse_statements(input) {
Ok(s) => {
if s.len() != 1 {
return "expected exactly one statement\n".to_string();
}
let stmt = s.into_element().ast;
for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] {
let mut parsed = match parser::parse_statements(&printed) {
Ok(parsed) => parsed.into_element().ast,
Err(err) => panic!("reparse failed: {}: {}\n", stmt, err),
};
match (&mut parsed, &stmt) {
// DECLARE remembers the original SQL. Erase that here so it can differ if
// needed (for example, quoting identifiers vs not). This is ok because we
// still compare that the resulting ASTs are identical, and it's valid for
// those to come from different original strings.
(Statement::Declare(parsed), Statement::Declare(stmt)) => {
parsed.sql = stmt.sql.clone();
}
_ => {}
}
if parsed != stmt {
panic!(
"reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n",
stmt, parsed
);
}
}
if tc.args.get("roundtrip").is_some() {
format!("{}\n", stmt)
} else {
// TODO(justin): it would be nice to have a middle-ground between this
// all-on-one-line and {:#?}'s huge number of lines.
format!("{}\n=>\n{:?}\n", stmt, stmt)
}
}
Err(e) => render_error(input, e.error),
}
}

fn parse_scalar(tc: &TestCase) -> String {
let input = tc.input.trim();
match parser::parse_expr(input) {
Ok(s) => {
for printed in [s.to_ast_string(), s.to_ast_string_stable()] {
match parser::parse_expr(&printed) {
Ok(parsed) => {
// TODO: We always coerce the double colon operator into a Cast expr instead
// of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints
// itself as double colon. We're thus unable to perfectly roundtrip
// `CAST(..)`. We could fix this by keeping "::" as a binary operator and
// teaching func.rs how to handle it, similar to how that file handles "~~"
// (without the parser converting that operator directly into an
// Expr::Like).
if !matches!(parsed, Expr::Cast { .. }) {
if parsed != s {
panic!(
"reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n",
s, parsed
);
}
}
}
Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"),
}
}

if tc.args.get("roundtrip").is_some() {
format!("{}\n", s)
} else {
// TODO(justin): it would be nice to have a middle-ground between this
// all-on-one-line and {:#?}'s huge number of lines.
format!("{:?}\n", s)
}
}
Err(e) => render_error(input, e),
}
}

walk("tests/testdata", |f| {
f.run(|test_case| -> String {
match test_case.directive.as_str() {
"parse-statement" => parse_statement(test_case),
"parse-scalar" => parse_scalar(test_case),
dir => panic!("unhandled directive {}", dir),
}
})
});
walk("tests/testdata", |f| f.run(datadriven_testcase));
}

#[mz_ore::test]
Expand Down
Loading

0 comments on commit c7ec88e

Please sign in to comment.