From 373a69485269c18c3eabc6c2d945b1869e6454c0 Mon Sep 17 00:00:00 2001 From: Matt Jibson Date: Sun, 8 Oct 2023 07:37:54 +0000 Subject: [PATCH] sql-pretty: add pretty printing crate This is copied from github.com/mjibson/mzfmt but now integrated directly into our repo. Refactor the parser tests to reuse the datadriven statements for verification here. That will prevent future syntax changes from lagging behind in their pretty implementation. Integration into `SHOW` and other commands in future commits. --- Cargo.lock | 36 +- Cargo.toml | 1 + deny.toml | 3 + src/sql-parser/Cargo.toml | 12 +- src/sql-parser/src/lib.rs | 114 ++++ src/sql-parser/tests/sqlparser_common.rs | 120 +--- src/sql-pretty/Cargo.toml | 22 + src/sql-pretty/src/lib.rs | 665 +++++++++++++++++++++++ src/sql-pretty/tests/parser.rs | 128 +++++ 9 files changed, 979 insertions(+), 122 deletions(-) create mode 100644 src/sql-pretty/Cargo.toml create mode 100644 src/sql-pretty/src/lib.rs create mode 100644 src/sql-pretty/tests/parser.rs diff --git a/Cargo.lock b/Cargo.lock index 1bf6c5f44f889..435ff42037065 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,6 +98,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "arrayvec" version = "0.7.2" @@ -5190,6 +5196,17 @@ dependencies = [ "workspace-hack", ] +[[package]] +name = "mz-sql-pretty" +version = "0.0.0" +dependencies = [ + "datadriven", + "mz-ore", + "mz-sql-parser", + "pretty", + "workspace-hack", +] + [[package]] name = "mz-sqllogictest" version = "0.0.1" @@ -6592,6 +6609,17 @@ dependencies = [ "treeline", ] +[[package]] +name = "pretty" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55c4d17d994b637e2f4daf6e5dc5d660d209d5642377d675d7a1c3ab69fa579" +dependencies = [ + "arrayvec 0.5.2", + "typed-arena", + "unicode-width", +] + [[package]] name = "pretty-hex" version = "0.3.0" @@ -8808,6 +8836,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typed-builder" version = "0.10.0" @@ -8989,7 +9023,7 @@ name = "vte" version = "0.10.1" source = "git+https://github.com/MaterializeInc/vte?rev=45670c47cebd7af050def2f80a307bdeec7caba3#45670c47cebd7af050def2f80a307bdeec7caba3" dependencies = [ - "arrayvec", + "arrayvec 0.7.2", "utf8parse", "vte_generate_state_changes", ] diff --git a/Cargo.toml b/Cargo.toml index 4c8bba8db13e2..d697f06699116 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ members = [ "src/sql", "src/sql-lexer", "src/sql-parser", + "src/sql-pretty", "src/sqllogictest", "src/stash", "src/stash-debug", diff --git a/deny.toml b/deny.toml index 1e1aadbad0476..21958ee5c506f 100644 --- a/deny.toml +++ b/deny.toml @@ -58,6 +58,9 @@ skip = [ # our crates use `bitflags 1.3.2` # TODO: fork `tower-http` and swap to use older bitflags { name = "bitflags", version = "1.3.2" }, + + # `pretty` explicitly chose to use this older version. + { name = "arrayvec", version = "0.5.2" }, ] # Use `tracing` instead. diff --git a/src/sql-parser/Cargo.toml b/src/sql-parser/Cargo.toml index 8e4228b4dfa4b..5180909ce1c15 100644 --- a/src/sql-parser/Cargo.toml +++ b/src/sql-parser/Cargo.toml @@ -9,20 +9,20 @@ publish = false [dependencies] bytesize = "1.1.0" +datadriven = "0.6.0" enum-kinds = "0.5.1" itertools = "0.10.5" -mz-ore = { path = "../ore", default-features = false, features = ["stack"] } +mz-ore = { path = "../ore", default-features = false, features = [ + "stack", + "test", +] } mz-sql-lexer = { path = "../sql-lexer" } phf = { version = "0.11.1", features = ["uncased"] } serde = { version = "1.0.152", features = ["derive"] } tracing = "0.1.37" uncased = "0.9.7" -workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true } - -[dev-dependencies] -datadriven = "0.6.0" -mz-ore = { path = "../ore", default-features = false, features = ["test"] } unicode-width = "0.1.10" +workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true } [build-dependencies] anyhow = "1.0.66" diff --git a/src/sql-parser/src/lib.rs b/src/sql-parser/src/lib.rs index 7adf50891e784..16f7f0f57d70f 100644 --- a/src/sql-parser/src/lib.rs +++ b/src/sql-parser/src/lib.rs @@ -105,3 +105,117 @@ pub mod ast; pub mod parser; + +pub fn datadriven_testcase(tc: &datadriven::TestCase) -> String { + use crate::ast::display::AstDisplay; + use crate::ast::{Expr, Statement}; + use datadriven::TestCase; + use mz_ore::collections::CollectionExt; + use mz_ore::fmt::FormatBuffer; + use unicode_width::UnicodeWidthStr; + + fn render_error(sql: &str, e: parser::ParserError) -> String { + let mut s = format!("error: {}\n", e.message); + + // Do our best to emulate psql in rendering a caret pointing at the + // offending character in the query. This makes it possible to detect + // incorrect error positions by visually scanning the test files. + let end = sql.len(); + let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0); + let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end); + writeln!(s, "{}", &sql[line_start..line_end]); + for _ in 0..sql[line_start..e.pos].width() { + write!(s, " "); + } + writeln!(s, "^"); + + s + } + + fn parse_statement(tc: &TestCase) -> String { + let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input); + match parser::parse_statements(input) { + Ok(s) => { + if s.len() != 1 { + return "expected exactly one statement\n".to_string(); + } + let stmt = s.into_element().ast; + for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] { + let mut parsed = match parser::parse_statements(&printed) { + Ok(parsed) => parsed.into_element().ast, + Err(err) => panic!("reparse failed: {}: {}\n", stmt, err), + }; + match (&mut parsed, &stmt) { + // DECLARE remembers the original SQL. Erase that here so it can differ if + // needed (for example, quoting identifiers vs not). This is ok because we + // still compare that the resulting ASTs are identical, and it's valid for + // those to come from different original strings. + (Statement::Declare(parsed), Statement::Declare(stmt)) => { + parsed.sql = stmt.sql.clone(); + } + _ => {} + } + if parsed != stmt { + panic!( + "reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n", + stmt, parsed + ); + } + } + if tc.args.get("roundtrip").is_some() { + format!("{}\n", stmt) + } else { + // TODO(justin): it would be nice to have a middle-ground between this + // all-on-one-line and {:#?}'s huge number of lines. + format!("{}\n=>\n{:?}\n", stmt, stmt) + } + } + Err(e) => render_error(input, e.error), + } + } + + fn parse_scalar(tc: &TestCase) -> String { + let input = tc.input.trim(); + match parser::parse_expr(input) { + Ok(s) => { + for printed in [s.to_ast_string(), s.to_ast_string_stable()] { + match parser::parse_expr(&printed) { + Ok(parsed) => { + // TODO: We always coerce the double colon operator into a Cast expr instead + // of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints + // itself as double colon. We're thus unable to perfectly roundtrip + // `CAST(..)`. We could fix this by keeping "::" as a binary operator and + // teaching func.rs how to handle it, similar to how that file handles "~~" + // (without the parser converting that operator directly into an + // Expr::Like). + if !matches!(parsed, Expr::Cast { .. }) { + if parsed != s { + panic!( + "reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n", + s, parsed + ); + } + } + } + Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"), + } + } + + if tc.args.get("roundtrip").is_some() { + format!("{}\n", s) + } else { + // TODO(justin): it would be nice to have a middle-ground between this + // all-on-one-line and {:#?}'s huge number of lines. + format!("{:?}\n", s) + } + } + Err(e) => render_error(input, e), + } + } + + match tc.directive.as_str() { + "parse-statement" => parse_statement(tc), + "parse-scalar" => parse_scalar(tc), + dir => panic!("unhandled directive {}", dir), + } +} diff --git a/src/sql-parser/tests/sqlparser_common.rs b/src/sql-parser/tests/sqlparser_common.rs index b195d3644fe1b..c2b485484a16e 100644 --- a/src/sql-parser/tests/sqlparser_common.rs +++ b/src/sql-parser/tests/sqlparser_common.rs @@ -89,131 +89,21 @@ use std::error::Error; use std::iter; +use datadriven::walk; use itertools::Itertools; -use mz_ore::collections::CollectionExt; -use mz_ore::fmt::FormatBuffer; use mz_sql_parser::ast::display::AstDisplay; use mz_sql_parser::ast::visit::Visit; use mz_sql_parser::ast::visit_mut::{self, VisitMut}; -use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName, Statement}; +use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName}; +use mz_sql_parser::datadriven_testcase; use mz_sql_parser::parser::{ - self, parse_statements, parse_statements_with_limit, ParserError, MAX_STATEMENT_BATCH_SIZE, + self, parse_statements, parse_statements_with_limit, MAX_STATEMENT_BATCH_SIZE, }; -use unicode_width::UnicodeWidthStr; #[mz_ore::test] #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` fn datadriven() { - use datadriven::{walk, TestCase}; - - fn render_error(sql: &str, e: ParserError) -> String { - let mut s = format!("error: {}\n", e.message); - - // Do our best to emulate psql in rendering a caret pointing at the - // offending character in the query. This makes it possible to detect - // incorrect error positions by visually scanning the test files. - let end = sql.len(); - let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0); - let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end); - writeln!(s, "{}", &sql[line_start..line_end]); - for _ in 0..sql[line_start..e.pos].width() { - write!(s, " "); - } - writeln!(s, "^"); - - s - } - - fn parse_statement(tc: &TestCase) -> String { - let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input); - match parser::parse_statements(input) { - Ok(s) => { - if s.len() != 1 { - return "expected exactly one statement\n".to_string(); - } - let stmt = s.into_element().ast; - for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] { - let mut parsed = match parser::parse_statements(&printed) { - Ok(parsed) => parsed.into_element().ast, - Err(err) => panic!("reparse failed: {}: {}\n", stmt, err), - }; - match (&mut parsed, &stmt) { - // DECLARE remembers the original SQL. Erase that here so it can differ if - // needed (for example, quoting identifiers vs not). This is ok because we - // still compare that the resulting ASTs are identical, and it's valid for - // those to come from different original strings. - (Statement::Declare(parsed), Statement::Declare(stmt)) => { - parsed.sql = stmt.sql.clone(); - } - _ => {} - } - if parsed != stmt { - panic!( - "reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n", - stmt, parsed - ); - } - } - if tc.args.get("roundtrip").is_some() { - format!("{}\n", stmt) - } else { - // TODO(justin): it would be nice to have a middle-ground between this - // all-on-one-line and {:#?}'s huge number of lines. - format!("{}\n=>\n{:?}\n", stmt, stmt) - } - } - Err(e) => render_error(input, e.error), - } - } - - fn parse_scalar(tc: &TestCase) -> String { - let input = tc.input.trim(); - match parser::parse_expr(input) { - Ok(s) => { - for printed in [s.to_ast_string(), s.to_ast_string_stable()] { - match parser::parse_expr(&printed) { - Ok(parsed) => { - // TODO: We always coerce the double colon operator into a Cast expr instead - // of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints - // itself as double colon. We're thus unable to perfectly roundtrip - // `CAST(..)`. We could fix this by keeping "::" as a binary operator and - // teaching func.rs how to handle it, similar to how that file handles "~~" - // (without the parser converting that operator directly into an - // Expr::Like). - if !matches!(parsed, Expr::Cast { .. }) { - if parsed != s { - panic!( - "reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n", - s, parsed - ); - } - } - } - Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"), - } - } - - if tc.args.get("roundtrip").is_some() { - format!("{}\n", s) - } else { - // TODO(justin): it would be nice to have a middle-ground between this - // all-on-one-line and {:#?}'s huge number of lines. - format!("{:?}\n", s) - } - } - Err(e) => render_error(input, e), - } - } - - walk("tests/testdata", |f| { - f.run(|test_case| -> String { - match test_case.directive.as_str() { - "parse-statement" => parse_statement(test_case), - "parse-scalar" => parse_scalar(test_case), - dir => panic!("unhandled directive {}", dir), - } - }) - }); + walk("tests/testdata", |f| f.run(datadriven_testcase)); } #[mz_ore::test] diff --git a/src/sql-pretty/Cargo.toml b/src/sql-pretty/Cargo.toml new file mode 100644 index 0000000000000..2e1c5d9c84b93 --- /dev/null +++ b/src/sql-pretty/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "mz-sql-pretty" +description = "The pretty printer for Materialize's SQL dialect." +version = "0.0.0" +edition.workspace = true +rust-version.workspace = true +publish = false + +[dependencies] +mz-sql-parser = { path = "../sql-parser" } +pretty = "0.12.3" +workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true } + +[dev-dependencies] +datadriven = "0.6.0" +mz-ore = { path = "../ore", default-features = false, features = ["test"] } + +[features] +default = ["workspace-hack"] + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] diff --git a/src/sql-pretty/src/lib.rs b/src/sql-pretty/src/lib.rs new file mode 100644 index 0000000000000..ba6d2cdf844d7 --- /dev/null +++ b/src/sql-pretty/src/lib.rs @@ -0,0 +1,665 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +// BEGIN LINT CONFIG +// DO NOT EDIT. Automatically generated by bin/gen-lints. +// Have complaints about the noise? See the note in misc/python/materialize/cli/gen-lints.py first. +#![allow(unknown_lints)] +#![allow(clippy::style)] +#![allow(clippy::complexity)] +#![allow(clippy::large_enum_variant)] +#![allow(clippy::mutable_key_type)] +#![allow(clippy::stable_sort_primitive)] +#![allow(clippy::map_entry)] +#![allow(clippy::box_default)] +#![allow(clippy::drain_collect)] +#![warn(clippy::bool_comparison)] +#![warn(clippy::clone_on_ref_ptr)] +#![warn(clippy::no_effect)] +#![warn(clippy::unnecessary_unwrap)] +#![warn(clippy::dbg_macro)] +#![warn(clippy::todo)] +#![warn(clippy::wildcard_dependencies)] +#![warn(clippy::zero_prefixed_literal)] +#![warn(clippy::borrowed_box)] +#![warn(clippy::deref_addrof)] +#![warn(clippy::double_must_use)] +#![warn(clippy::double_parens)] +#![warn(clippy::extra_unused_lifetimes)] +#![warn(clippy::needless_borrow)] +#![warn(clippy::needless_question_mark)] +#![warn(clippy::needless_return)] +#![warn(clippy::redundant_pattern)] +#![warn(clippy::redundant_slicing)] +#![warn(clippy::redundant_static_lifetimes)] +#![warn(clippy::single_component_path_imports)] +#![warn(clippy::unnecessary_cast)] +#![warn(clippy::useless_asref)] +#![warn(clippy::useless_conversion)] +#![warn(clippy::builtin_type_shadow)] +#![warn(clippy::duplicate_underscore_argument)] +#![warn(clippy::double_neg)] +#![warn(clippy::unnecessary_mut_passed)] +#![warn(clippy::wildcard_in_or_patterns)] +#![warn(clippy::crosspointer_transmute)] +#![warn(clippy::excessive_precision)] +#![warn(clippy::overflow_check_conditional)] +#![warn(clippy::as_conversions)] +#![warn(clippy::match_overlapping_arm)] +#![warn(clippy::zero_divided_by_zero)] +#![warn(clippy::must_use_unit)] +#![warn(clippy::suspicious_assignment_formatting)] +#![warn(clippy::suspicious_else_formatting)] +#![warn(clippy::suspicious_unary_op_formatting)] +#![warn(clippy::mut_mutex_lock)] +#![warn(clippy::print_literal)] +#![warn(clippy::same_item_push)] +#![warn(clippy::useless_format)] +#![warn(clippy::write_literal)] +#![warn(clippy::redundant_closure)] +#![warn(clippy::redundant_closure_call)] +#![warn(clippy::unnecessary_lazy_evaluations)] +#![warn(clippy::partialeq_ne_impl)] +#![warn(clippy::redundant_field_names)] +#![warn(clippy::transmutes_expressible_as_ptr_casts)] +#![warn(clippy::unused_async)] +#![warn(clippy::disallowed_methods)] +#![warn(clippy::disallowed_macros)] +#![warn(clippy::disallowed_types)] +#![warn(clippy::from_over_into)] +// END LINT CONFIG + +use mz_sql_parser::ast::display::AstDisplay; +use mz_sql_parser::ast::*; +use mz_sql_parser::parser::{parse_statements, ParserStatementError}; +use pretty::*; + +const TAB: isize = 4; + +pub fn to_doc(v: &Statement) -> RcDoc { + match v { + Statement::Select(v) => doc_select_statement(v), + Statement::Insert(v) => doc_insert(v), + Statement::CreateView(v) => doc_create_view(v), + Statement::CreateMaterializedView(v) => doc_create_materialized_view(v), + _ => doc_display_pass(v), + } +} + +pub fn to_pretty(stmt: &Statement, width: usize) -> String { + let mut w = Vec::new(); + to_doc(stmt).render(width, &mut w).unwrap(); + let mut s = String::from_utf8(w).unwrap(); + s.push(';'); + s +} + +pub fn pretty_strs(str: &str, width: usize) -> Result, ParserStatementError> { + let stmts = parse_statements(str)?; + Ok(stmts.iter().map(|s| to_pretty(&s.ast, width)).collect()) +} + +pub fn pretty_str(str: &str, width: usize) -> Result { + Ok(pretty_strs(str, width)?.join("\n\n")) +} + +// Use when we don't know what to do. +fn doc_display<'a, T: AstDisplay>(v: &T, _debug: &str) -> RcDoc<'a, ()> { + #[cfg(test)] + eprintln!( + "UNKNOWN PRETTY TYPE in {}: {}, {}", + _debug, + std::any::type_name::(), + v.to_ast_string() + ); + doc_display_pass(v) +} + +// Use when the AstDisplay trait is what we want. +fn doc_display_pass<'a, T: AstDisplay>(v: &T) -> RcDoc<'a, ()> { + RcDoc::text(v.to_ast_string()) +} + +fn nest<'a>(title: RcDoc<'a>, v: RcDoc<'a>) -> RcDoc<'a> { + RcDoc::intersperse([title, v], Doc::line()) + .nest(TAB) + .group() +} + +fn nest_title(title: S, v: RcDoc) -> RcDoc +where + S: Into, +{ + nest(RcDoc::text(title.into()), v) +} + +fn title_comma_separate<'a, F, T, S>(title: S, f: F, v: &'a [T]) -> RcDoc<'a, ()> +where + F: Fn(&'a T) -> RcDoc<'a, ()>, + S: Into, +{ + let title = RcDoc::text(title.into()); + if v.is_empty() { + title + } else { + nest(title, comma_separate(f, v)) + } +} + +fn comma_separate<'a, F, T>(f: F, v: &'a [T]) -> RcDoc<'a, ()> +where + F: Fn(&'a T) -> RcDoc<'a, ()>, +{ + let docs = v.iter().map(f).collect(); + comma_separated(docs) +} + +fn comma_separated(v: Vec) -> RcDoc { + RcDoc::intersperse(v, RcDoc::concat([RcDoc::text(","), RcDoc::line()])).group() +} + +fn bracket, B: Into>(left: A, d: RcDoc, right: B) -> RcDoc { + bracket_doc( + RcDoc::text(left.into()), + d, + RcDoc::text(right.into()), + RcDoc::line_(), + ) +} + +fn bracket_doc<'a>(left: RcDoc<'a>, d: RcDoc<'a>, right: RcDoc<'a>, line: RcDoc<'a>) -> RcDoc<'a> { + RcDoc::concat([ + left, + RcDoc::concat([line.clone(), d]).nest(TAB), + line, + right, + ]) + .group() +} + +// + +fn doc_create_view(v: &CreateViewStatement) -> RcDoc { + let mut docs = vec![]; + docs.push(RcDoc::text(format!( + "CREATE{}{} VIEW{}", + if v.if_exists == IfExistsBehavior::Replace { + " OR REPLACE" + } else { + "" + }, + if v.temporary { " TEMPORARY" } else { "" }, + if v.if_exists == IfExistsBehavior::Skip { + " IF NOT EXISTS" + } else { + "" + }, + ))); + docs.push(doc_view_definition(&v.definition)); + RcDoc::intersperse(docs, Doc::line()).nest(TAB).group() +} + +fn doc_create_materialized_view(v: &CreateMaterializedViewStatement) -> RcDoc { + let mut docs = vec![]; + docs.push(RcDoc::text(format!( + "CREATE{} MATERIALIZED VIEW{} {}", + if v.if_exists == IfExistsBehavior::Replace { + " OR REPLACE" + } else { + "" + }, + if v.if_exists == IfExistsBehavior::Skip { + " IF NOT EXISTS" + } else { + "" + }, + v.name, + ))); + if !v.columns.is_empty() { + docs.push(bracket( + "(", + comma_separate(doc_display_pass, &v.columns), + ")", + )); + } + if let Some(cluster) = &v.in_cluster { + docs.push(RcDoc::text(format!("IN CLUSTER {cluster}"))); + } + docs.push(nest_title("AS", doc_query(&v.query))); + RcDoc::intersperse(docs, Doc::line()).nest(TAB).group() +} + +fn doc_view_definition(v: &ViewDefinition) -> RcDoc { + let mut docs = vec![RcDoc::text(v.name.to_string())]; + if !v.columns.is_empty() { + docs.push(bracket( + "(", + comma_separate(doc_display_pass, &v.columns), + ")", + )); + } + docs.push(nest_title("AS", doc_query(&v.query))); + RcDoc::intersperse(docs, Doc::line()).group() +} + +fn doc_insert(v: &InsertStatement) -> RcDoc { + let mut first = vec![RcDoc::text(format!("INSERT INTO {}", v.table_name))]; + if !v.columns.is_empty() { + first.push(bracket( + "(", + comma_separate(doc_display_pass, &v.columns), + ")", + )); + } + let sources = match &v.source { + InsertSource::Query(query) => doc_query(query), + _ => doc_display(&v.source, "insert source"), + }; + let mut doc = RcDoc::intersperse( + [ + RcDoc::intersperse(first, Doc::line()).nest(TAB).group(), + sources, + ], + Doc::line(), + ) + .nest(TAB) + .group(); + if !v.returning.is_empty() { + doc = nest( + doc, + nest_title("RETURNING", comma_separate(doc_select_item, &v.returning)), + ) + } + doc +} + +fn doc_select_statement(v: &SelectStatement) -> RcDoc { + let mut doc = doc_query(&v.query); + if let Some(as_of) = &v.as_of { + doc = RcDoc::intersperse([doc, doc_display_pass(as_of)], Doc::line()) + .nest(TAB) + .group(); + } + doc.group() +} + +fn doc_order_by(v: &[OrderByExpr]) -> RcDoc { + title_comma_separate("ORDER BY", doc_order_by_expr, v) +} + +fn doc_order_by_expr(v: &OrderByExpr) -> RcDoc { + let doc = doc_expr(&v.expr); + let doc = match v.asc { + Some(true) => nest(doc, RcDoc::text("ASC")), + Some(false) => nest(doc, RcDoc::text("DESC")), + None => doc, + }; + match v.nulls_last { + Some(true) => nest(doc, RcDoc::text("NULLS LAST")), + Some(false) => nest(doc, RcDoc::text("NULLS FIRST")), + None => doc, + } +} + +fn doc_query(v: &Query) -> RcDoc { + let mut docs = vec![]; + if !v.ctes.is_empty() { + match &v.ctes { + CteBlock::Simple(ctes) => docs.push(title_comma_separate("WITH", doc_cte, ctes)), + CteBlock::MutuallyRecursive(mutrec) => { + let mut doc = RcDoc::text("WITH MUTUALLY RECURSIVE"); + if !mutrec.options.is_empty() { + doc = nest( + doc, + bracket("(", comma_separate(doc_display_pass, &mutrec.options), ")"), + ); + } + docs.push(nest( + doc, + comma_separate(doc_mutually_recursive, &mutrec.ctes), + )); + } + } + } + docs.push(doc_set_expr(&v.body)); + if !v.order_by.is_empty() { + docs.push(doc_order_by(&v.order_by)); + } + + let offset = if let Some(offset) = &v.offset { + vec![RcDoc::concat([nest_title("OFFSET", doc_expr(offset))])] + } else { + vec![] + }; + + if let Some(limit) = &v.limit { + if limit.with_ties { + docs.extend(offset); + docs.push(RcDoc::concat([ + RcDoc::text("FETCH FIRST "), + doc_expr(&limit.quantity), + RcDoc::text(" ROWS WITH TIES"), + ])); + } else { + docs.push(nest_title("LIMIT", doc_expr(&limit.quantity))); + docs.extend(offset); + } + } else { + docs.extend(offset); + } + + RcDoc::intersperse(docs, Doc::line()).group() +} + +fn doc_cte(v: &Cte) -> RcDoc { + RcDoc::concat([ + RcDoc::text(format!("{} AS", v.alias)), + RcDoc::line(), + bracket("(", doc_query(&v.query), ")"), + ]) +} + +fn doc_mutually_recursive(v: &CteMutRec) -> RcDoc { + let mut docs = Vec::new(); + if !v.columns.is_empty() { + docs.push(bracket( + "(", + comma_separate(doc_display_pass, &v.columns), + ")", + )); + } + docs.push(bracket("AS (", doc_query(&v.query), ")")); + nest( + doc_display_pass(&v.name), + RcDoc::intersperse(docs, Doc::line()).group(), + ) +} + +fn doc_set_expr(v: &SetExpr) -> RcDoc { + match v { + SetExpr::Select(v) => doc_select(v), + SetExpr::Query(v) => bracket("(", doc_query(v), ")"), + SetExpr::SetOperation { + op, + all, + left, + right, + } => { + let all_str = if *all { " ALL" } else { "" }; + RcDoc::concat([ + doc_set_expr(left), + RcDoc::line(), + RcDoc::concat([ + RcDoc::text(format!("{}{}", op, all_str)), + RcDoc::line(), + doc_set_expr(right), + ]) + .nest(TAB) + .group(), + ]) + } + SetExpr::Values(v) => doc_values(v), + SetExpr::Show(v) => doc_display(v, "SHOW"), + SetExpr::Table(v) => nest(RcDoc::text("TABLE"), doc_display_pass(v)), + } + .group() +} + +fn doc_values(v: &Values) -> RcDoc { + let rows = + v.0.iter() + .map(|row| bracket("(", comma_separate(doc_expr, row), ")")) + .collect(); + RcDoc::concat([RcDoc::text("VALUES"), RcDoc::line(), comma_separated(rows)]) + .nest(TAB) + .group() +} + +fn doc_table_with_joins(v: &TableWithJoins) -> RcDoc { + let mut docs = vec![doc_table_factor(&v.relation)]; + for j in &v.joins { + docs.push(doc_join(j)); + } + RcDoc::intersperse(docs, Doc::line()).nest(TAB).group() +} + +fn doc_join(v: &Join) -> RcDoc { + let (constraint, name) = match &v.join_operator { + JoinOperator::Inner(constraint) => (constraint, "JOIN"), + JoinOperator::FullOuter(constraint) => (constraint, "FULL JOIN"), + JoinOperator::LeftOuter(constraint) => (constraint, "LEFT JOIN"), + JoinOperator::RightOuter(constraint) => (constraint, "RIGHT JOIN"), + _ => return doc_display(v, "join operator"), + }; + let constraint = match constraint { + JoinConstraint::On(expr) => nest_title("ON", doc_expr(expr)), + JoinConstraint::Using { columns, alias } => { + let mut doc = bracket("USING(", comma_separate(doc_display_pass, columns), ")"); + if let Some(alias) = alias { + doc = nest(doc, nest_title("AS", doc_display_pass(alias))); + } + doc + } + _ => return doc_display(v, "join constrant"), + }; + RcDoc::intersperse( + [RcDoc::text(name), doc_table_factor(&v.relation), constraint], + Doc::line(), + ) + .nest(TAB) + .group() +} + +fn doc_table_factor(v: &TableFactor) -> RcDoc { + match v { + TableFactor::Derived { + lateral, + subquery, + alias, + } => { + if *lateral { + return doc_display(v, "table factor lateral"); + } + let mut docs = vec![bracket("(", doc_query(subquery), ")")]; + if let Some(alias) = alias { + docs.push(RcDoc::text(format!("AS {}", alias))); + } + RcDoc::intersperse(docs, Doc::line()).nest(TAB).group() + } + TableFactor::NestedJoin { join, alias } => { + let mut doc = bracket("(", doc_table_with_joins(join), ")"); + if let Some(alias) = alias { + doc = RcDoc::intersperse([doc, RcDoc::text(format!("AS {}", alias))], Doc::line()) + .nest(TAB) + .group() + } + doc + } + TableFactor::Table { .. } => doc_display_pass(v), + _ => doc_display(v, "table factor variant"), + } +} + +fn doc_select(v: &Select) -> RcDoc { + let mut docs = vec![]; + docs.push(title_comma_separate( + format!( + "SELECT{}", + if let Some(distinct) = &v.distinct { + format!(" {}", distinct.to_ast_string()) + } else { + "".into() + } + ), + doc_select_item, + &v.projection, + )); + if !v.from.is_empty() { + docs.push(title_comma_separate("FROM", doc_table_with_joins, &v.from)); + } + if let Some(selection) = &v.selection { + docs.push(nest_title("WHERE", doc_expr(selection))); + } + if !v.group_by.is_empty() { + docs.push(title_comma_separate("GROUP BY", doc_expr, &v.group_by)); + } + if let Some(having) = &v.having { + docs.push(nest_title("HAVING", doc_expr(having))); + } + if !v.options.is_empty() { + docs.push(bracket( + "OPTIONS (", + comma_separate(doc_display_pass, &v.options), + ")", + )); + } + RcDoc::intersperse(docs, Doc::line()).group() +} + +fn doc_select_item(v: &SelectItem) -> RcDoc { + match v { + SelectItem::Expr { expr, alias } => { + let mut doc = doc_expr(expr); + if let Some(alias) = alias { + doc = nest( + doc, + RcDoc::concat([RcDoc::text("AS "), doc_display_pass(alias)]), + ); + } + doc + } + SelectItem::Wildcard => doc_display_pass(v), + } +} + +fn doc_expr(v: &Expr) -> RcDoc { + match v { + Expr::Op { op, expr1, expr2 } => { + if let Some(expr2) = expr2 { + RcDoc::concat([ + doc_expr(expr1), + RcDoc::line(), + RcDoc::text(format!("{} ", op)), + doc_expr(expr2).nest(TAB), + ]) + } else { + RcDoc::concat([RcDoc::text(format!("{} ", op)), doc_expr(expr1)]) + } + } + Expr::Cast { expr, data_type } => bracket( + "CAST(", + RcDoc::concat([ + doc_expr(expr), + RcDoc::line(), + RcDoc::text(format!("AS {}", data_type)), + ]) + .nest(TAB), + ")", + ), + Expr::Nested(ast) => bracket("(", doc_expr(ast), ")"), + Expr::Function(fun) => doc_function(fun), + Expr::Subquery(ast) => bracket("(", doc_query(ast), ")"), + Expr::Identifier(_) + | Expr::Value(_) + | Expr::QualifiedWildcard(_) + | Expr::WildcardAccess(_) + | Expr::FieldAccess { .. } => doc_display_pass(v), + Expr::And { left, right } => bracket_doc( + doc_expr(left), + RcDoc::text("AND"), + doc_expr(right), + RcDoc::line(), + ), + Expr::Or { left, right } => bracket_doc( + doc_expr(left), + RcDoc::text("OR"), + doc_expr(right), + RcDoc::line(), + ), + Expr::Exists(s) => bracket("EXISTS (", doc_query(s), ")"), + Expr::IsExpr { + expr, + negated, + construct, + } => bracket_doc( + doc_expr(expr), + RcDoc::text(if *negated { "IS NOT" } else { "IS" }), + doc_display_pass(construct), + RcDoc::line(), + ), + Expr::Not { expr } => RcDoc::concat([RcDoc::text("NOT"), RcDoc::line(), doc_expr(expr)]), + Expr::Between { + expr, + negated, + low, + high, + } => RcDoc::intersperse( + [ + doc_expr(expr), + RcDoc::text(if *negated { "NOT BETWEEN" } else { "BETWEEN" }), + RcDoc::intersperse( + [doc_expr(low), RcDoc::text("AND"), doc_expr(high)], + RcDoc::line(), + ) + .group(), + ], + RcDoc::line(), + ), + Expr::InSubquery { + expr, + subquery, + negated, + } => RcDoc::intersperse( + [ + doc_expr(expr), + RcDoc::text(if *negated { "NOT IN (" } else { "IN (" }), + doc_query(subquery), + RcDoc::text(")"), + ], + RcDoc::line(), + ), + Expr::InList { + expr, + list, + negated, + } => RcDoc::intersperse( + [ + doc_expr(expr), + RcDoc::text(if *negated { "NOT IN (" } else { "IN (" }), + comma_separate(doc_expr, list), + RcDoc::text(")"), + ], + RcDoc::line(), + ), + Expr::Row { exprs } => bracket("ROW(", comma_separate(doc_expr, exprs), ")"), + _ => doc_display(v, "expr variant"), + } + .group() +} + +fn doc_function(v: &Function) -> RcDoc { + match &v.args { + FunctionArgs::Star => doc_display_pass(v), + FunctionArgs::Args { args, order_by } => { + if args.is_empty() { + // Nullary, don't allow newline between parens, so just delegate. + doc_display_pass(v) + } else { + if v.filter.is_some() || v.over.is_some() || !order_by.is_empty() { + return doc_display(v, "function filter or over or order by"); + } + let mut name = format!("{}(", v.name.to_ast_string()); + if v.distinct { + name.push_str("DISTINCT"); + } + bracket(name, comma_separate(doc_expr, args), ")") + } + } + } +} diff --git a/src/sql-pretty/tests/parser.rs b/src/sql-pretty/tests/parser.rs new file mode 100644 index 0000000000000..b898cc40d6313 --- /dev/null +++ b/src/sql-pretty/tests/parser.rs @@ -0,0 +1,128 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +// BEGIN LINT CONFIG +// DO NOT EDIT. Automatically generated by bin/gen-lints. +// Have complaints about the noise? See the note in misc/python/materialize/cli/gen-lints.py first. +#![allow(unknown_lints)] +#![allow(clippy::style)] +#![allow(clippy::complexity)] +#![allow(clippy::large_enum_variant)] +#![allow(clippy::mutable_key_type)] +#![allow(clippy::stable_sort_primitive)] +#![allow(clippy::map_entry)] +#![allow(clippy::box_default)] +#![allow(clippy::drain_collect)] +#![warn(clippy::bool_comparison)] +#![warn(clippy::clone_on_ref_ptr)] +#![warn(clippy::no_effect)] +#![warn(clippy::unnecessary_unwrap)] +#![warn(clippy::dbg_macro)] +#![warn(clippy::todo)] +#![warn(clippy::wildcard_dependencies)] +#![warn(clippy::zero_prefixed_literal)] +#![warn(clippy::borrowed_box)] +#![warn(clippy::deref_addrof)] +#![warn(clippy::double_must_use)] +#![warn(clippy::double_parens)] +#![warn(clippy::extra_unused_lifetimes)] +#![warn(clippy::needless_borrow)] +#![warn(clippy::needless_question_mark)] +#![warn(clippy::needless_return)] +#![warn(clippy::redundant_pattern)] +#![warn(clippy::redundant_slicing)] +#![warn(clippy::redundant_static_lifetimes)] +#![warn(clippy::single_component_path_imports)] +#![warn(clippy::unnecessary_cast)] +#![warn(clippy::useless_asref)] +#![warn(clippy::useless_conversion)] +#![warn(clippy::builtin_type_shadow)] +#![warn(clippy::duplicate_underscore_argument)] +#![warn(clippy::double_neg)] +#![warn(clippy::unnecessary_mut_passed)] +#![warn(clippy::wildcard_in_or_patterns)] +#![warn(clippy::crosspointer_transmute)] +#![warn(clippy::excessive_precision)] +#![warn(clippy::overflow_check_conditional)] +#![warn(clippy::as_conversions)] +#![warn(clippy::match_overlapping_arm)] +#![warn(clippy::zero_divided_by_zero)] +#![warn(clippy::must_use_unit)] +#![warn(clippy::suspicious_assignment_formatting)] +#![warn(clippy::suspicious_else_formatting)] +#![warn(clippy::suspicious_unary_op_formatting)] +#![warn(clippy::mut_mutex_lock)] +#![warn(clippy::print_literal)] +#![warn(clippy::same_item_push)] +#![warn(clippy::useless_format)] +#![warn(clippy::write_literal)] +#![warn(clippy::redundant_closure)] +#![warn(clippy::redundant_closure_call)] +#![warn(clippy::unnecessary_lazy_evaluations)] +#![warn(clippy::partialeq_ne_impl)] +#![warn(clippy::redundant_field_names)] +#![warn(clippy::transmutes_expressible_as_ptr_casts)] +#![warn(clippy::unused_async)] +#![warn(clippy::disallowed_methods)] +#![warn(clippy::disallowed_macros)] +#![warn(clippy::disallowed_types)] +#![warn(clippy::from_over_into)] +// END LINT CONFIG + +use datadriven::walk; +use mz_sql_parser::ast::display::AstDisplay; +use mz_sql_parser::datadriven_testcase; +use mz_sql_parser::parser::parse_statements; +use mz_sql_pretty::to_pretty; + +// Use the parser's datadriven tests to get a comprehensive set of SQL statements. Assert they all +// generate identical ASTs when pretty printed. Output the same output as the parser so datadriven +// is happy. (Having the datadriven parser be exported would be nice here too.) +#[mz_ore::test] +#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` +fn test_parser() { + walk("../sql-parser/tests/testdata", |f| { + f.run(|tc| -> String { + if tc.directive == "parse-statement" { + verify_pretty_print(&tc.input); + } + datadriven_testcase(tc) + }) + }); +} + +fn verify_pretty_print(stmt: &str) { + let original = match parse_statements(stmt) { + Ok(stmt) => match stmt.into_iter().next() { + Some(stmt) => stmt, + None => return, + }, + Err(_) => return, + }; + for n in &[1, 40, 1000000] { + let n = *n; + let pretty1 = to_pretty(&original.ast, n); + let prettied = parse_statements(&pretty1) + .unwrap_or_else(|_| panic!("could not parse: {pretty1}, original: {stmt}")) + .into_iter() + .next() + .unwrap(); + let pretty2 = to_pretty(&prettied.ast, n); + assert_eq!(pretty1, pretty2); + assert_eq!( + original.ast.to_ast_string_stable(), + prettied.ast.to_ast_string_stable(), + "\noriginal: {stmt}", + ); + // Everything should always squash to a single line. + if n > (stmt.len() * 2) { + assert_eq!(pretty1.lines().count(), 1, "{}: {}", n, pretty1); + } + } +}