From b9bbd19c896f81ef3592e952974c77c944ac40af Mon Sep 17 00:00:00 2001 From: gvozdvmozgu Date: Mon, 8 Jul 2024 14:02:45 -0700 Subject: [PATCH] feat: add `RF04` rule --- Cargo.lock | 243 +++++++++++++++++- crates/lib/Cargo.toml | 8 +- crates/lib/src/core/config.rs | 21 +- crates/lib/src/core/dialects/init.rs | 28 +- crates/lib/src/core/rules/base.rs | 1 + crates/lib/src/lib.rs | 1 - crates/lib/src/rules/references.rs | 7 +- crates/lib/src/rules/references/RF04.rs | 126 +++++++++ crates/lib/src/utils/identifers.rs | 2 +- .../fixtures/rules/std_rule_cases/RF04.yml | 152 +++++++++++ crates/lib/tests/rules.rs | 113 ++++++++ 11 files changed, 682 insertions(+), 20 deletions(-) create mode 100644 crates/lib/src/rules/references/RF04.rs create mode 100644 crates/lib/test/fixtures/rules/std_rule_cases/RF04.yml create mode 100644 crates/lib/tests/rules.rs diff --git a/Cargo.lock b/Cargo.lock index b97172eae..23116847c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,7 @@ dependencies = [ "const-random", "getrandom", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -40,6 +41,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anes" version = "0.1.6" @@ -153,6 +169,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bit-set" version = "0.5.3" @@ -253,6 +275,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.6", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -433,6 +468,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + [[package]] name = "cpp_demangle" version = "0.4.3" @@ -518,6 +559,41 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "darling" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "debugid" version = "0.8.0" @@ -527,6 +603,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + [[package]] name = "diff" version = "0.1.13" @@ -649,6 +735,12 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -690,6 +782,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -708,12 +806,58 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indenter" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -721,7 +865,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", + "serde", ] [[package]] @@ -744,7 +889,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" dependencies = [ "ahash", - "indexmap", + "indexmap 2.2.6", "is-terminal", "itoa", "log", @@ -952,6 +1097,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-format" version = "0.4.4" @@ -1120,6 +1271,12 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pprof" version = "0.13.0" @@ -1452,13 +1609,43 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_with" +version = "3.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e73139bc5ec2d45e6c5fd85be5a46949c1c39a4c18e56915f5eb4c12f975e377" +dependencies = [ + "base64", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.2.6", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b80d3d6b56b64335c0180e5ffde23b3c5e08c14c585b51a15bd0e95393f46703" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap", + "indexmap 2.2.6", "itoa", "ryu", "serde", @@ -1542,7 +1729,7 @@ dependencies = [ "expect-test", "fancy-regex", "glob", - "indexmap", + "indexmap 2.2.6", "itertools 0.13.0", "lazy-regex", "nohash-hasher", @@ -1554,9 +1741,11 @@ dependencies = [ "rustc-hash", "serde", "serde_json", + "serde_with", "serde_yaml", "slyce", "smol_str", + "strum", "strum_macros", "tempdir", "tracing", @@ -1598,6 +1787,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum_macros" version = "0.26.4" @@ -1717,6 +1912,37 @@ dependencies = [ "tikv-jemalloc-sys", ] +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -1983,6 +2209,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/crates/lib/Cargo.toml b/crates/lib/Cargo.toml index 5031d8724..a10952890 100644 --- a/crates/lib/Cargo.toml +++ b/crates/lib/Cargo.toml @@ -10,6 +10,10 @@ crate-type = ["cdylib", "rlib"] bench = false doctest = false +[[test]] +name = "rules" +harness = false + [[bench]] name = "parsing" harness = false @@ -41,7 +45,7 @@ anstyle = "1.0" tracing = "0.1.40" pretty_assertions = "1.4.0" slyce = "0.3.1" -ahash = { version = "0.8.11", features = ["compile-time-rng"] } +ahash = { version = "0.8.11", features = ["compile-time-rng", "serde"] } lazy-regex = "3.1.0" anymap = { package = "anymap3", version = "1.0" } rayon = "1.10.0" @@ -50,6 +54,7 @@ serde_yaml = "0.9.34" nohash-hasher = "0.2.0" rustc-hash = "2.0.0" strum_macros = "0.26.4" +strum = "0.26.3" [dev-dependencies] criterion = "0.5" @@ -57,5 +62,6 @@ expect-test = "1.5" glob = "0.3" serde_json = "1" tempdir = "0.3" +serde_with = "3.8" [target.'cfg(unix)'.dependencies] pprof = { version = "0.13", features = ["flamegraph", "criterion"] } diff --git a/crates/lib/src/core/config.rs b/crates/lib/src/core/config.rs index 4ac4223d4..1b170fff9 100644 --- a/crates/lib/src/core/config.rs +++ b/crates/lib/src/core/config.rs @@ -5,6 +5,7 @@ use std::str::FromStr; use ahash::AHashMap; use configparser::ini::Ini; use itertools::Itertools; +use serde::Deserialize; use super::dialects::base::Dialect; use crate::core::dialects::init::{dialect_readout, dialect_selector, get_default_dialect}; @@ -122,7 +123,7 @@ pub fn split_comma_separated_string(raw_str: &str) -> Value { #[derive(Debug, PartialEq, Clone)] pub struct FluffConfig { pub(crate) indentation: FluffConfigIndentation, - pub(crate) raw: AHashMap, + pub raw: AHashMap, extra_config_path: Option, _configs: AHashMap>, pub(crate) dialect: Dialect, @@ -522,7 +523,8 @@ impl ConfigLoader { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(untagged)] pub enum Value { Int(i32), Bool(bool), @@ -538,8 +540,12 @@ impl Value { matches!(self, Value::None) } - pub fn as_array(&self) -> Option<&[Value]> { - if let Self::Array(v) = self { Some(v) } else { None } + pub fn as_array(&self) -> Option> { + match self { + Self::Array(v) => Some(v.clone()), + v @ Self::String(_) => Some(vec![v.clone()]), + _ => None, + } } } @@ -567,6 +573,13 @@ impl Value { } } + pub fn map(&self, f: impl Fn(&Self) -> T) -> Option { + if self == &Value::None { + return None; + } + + Some(f(self)) + } pub fn as_map(&self) -> Option<&AHashMap> { if let Self::Map(map) = self { Some(map) } else { None } } diff --git a/crates/lib/src/core/dialects/init.rs b/crates/lib/src/core/dialects/init.rs index 23709568b..5df558f5a 100644 --- a/crates/lib/src/core/dialects/init.rs +++ b/crates/lib/src/core/dialects/init.rs @@ -1,14 +1,26 @@ +use std::str::FromStr; + use super::base::Dialect; +#[derive(strum_macros::EnumString)] +#[strum(serialize_all = "snake_case")] +pub enum DialectKind { + Ansi, + Bigquery, + Postgres, + Snowflake, + Clickhouse, + Sparksql, +} + pub fn dialect_selector(s: &str) -> Option { - match s { - "ansi" => Some(crate::dialects::ansi::ansi_dialect()), - "bigquery" => Some(crate::dialects::bigquery::bigquery_dialect()), - "postgres" => Some(crate::dialects::postgres::postgres_dialect()), - "snowflake" => Some(crate::dialects::snowflake::snowflake_dialect()), - "clickhouse" => Some(crate::dialects::clickhouse::clickhouse_dialect()), - "sparksql" => Some(crate::dialects::sparksql::sparksql_dialect()), - _ => None, + match DialectKind::from_str(s).ok()? { + DialectKind::Ansi => Some(crate::dialects::ansi::ansi_dialect()), + DialectKind::Bigquery => Some(crate::dialects::bigquery::bigquery_dialect()), + DialectKind::Postgres => Some(crate::dialects::postgres::postgres_dialect()), + DialectKind::Snowflake => Some(crate::dialects::snowflake::snowflake_dialect()), + DialectKind::Clickhouse => Some(crate::dialects::clickhouse::clickhouse_dialect()), + DialectKind::Sparksql => Some(crate::dialects::sparksql::sparksql_dialect()), } } diff --git a/crates/lib/src/core/rules/base.rs b/crates/lib/src/core/rules/base.rs index 82a8c6713..2c9f5ca77 100644 --- a/crates/lib/src/core/rules/base.rs +++ b/crates/lib/src/core/rules/base.rs @@ -394,6 +394,7 @@ pub struct RuleManifest { pub rule_class: ErasedRule, } +#[derive(Clone)] pub struct RulePack { pub(crate) rules: Vec, _reference_map: AHashMap<&'static str, AHashSet<&'static str>>, diff --git a/crates/lib/src/lib.rs b/crates/lib/src/lib.rs index c2d864878..a2750bb8a 100644 --- a/crates/lib/src/lib.rs +++ b/crates/lib/src/lib.rs @@ -2,7 +2,6 @@ #![feature(let_chains)] #![allow(non_snake_case, clippy::module_inception, clippy::type_complexity)] #![deny(unused_qualifications)] - pub mod api; pub mod cli; pub mod core; diff --git a/crates/lib/src/rules/references.rs b/crates/lib/src/rules/references.rs index 255717e82..e7284ebc4 100644 --- a/crates/lib/src/rules/references.rs +++ b/crates/lib/src/rules/references.rs @@ -2,9 +2,14 @@ use crate::core::rules::base::ErasedRule; pub mod RF01; pub mod RF03; +pub mod RF04; pub fn rules() -> Vec { use crate::core::rules::base::Erased as _; - vec![RF01::RuleRF01.erased(), RF03::RuleRF03::default().erased()] + vec![ + RF01::RuleRF01.erased(), + RF03::RuleRF03::default().erased(), + RF04::RuleRF04::default().erased(), + ] } diff --git a/crates/lib/src/rules/references/RF04.rs b/crates/lib/src/rules/references/RF04.rs new file mode 100644 index 000000000..93bff19b9 --- /dev/null +++ b/crates/lib/src/rules/references/RF04.rs @@ -0,0 +1,126 @@ +use itertools::Itertools; +use regex::Regex; + +use crate::core::rules::base::{CloneRule, ErasedRule, LintResult, Rule}; +use crate::core::rules::context::RuleContext; +use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler}; +use crate::utils::identifers::identifiers_policy_applicable; + +#[derive(Debug, Clone, Default)] +pub struct RuleRF04 { + unquoted_identifiers_policy: String, + quoted_identifiers_policy: Option, + ignore_words: Vec, + ignore_words_regex: Vec, +} + +impl Rule for RuleRF04 { + fn load_from_config( + &self, + config: &ahash::AHashMap, + ) -> Result { + Ok(RuleRF04 { + unquoted_identifiers_policy: config["unquoted_identifiers_policy"] + .as_string() + .unwrap() + .to_owned(), + quoted_identifiers_policy: config["quoted_identifiers_policy"] + .map(|it| it.as_string().unwrap().to_string()), + ignore_words: config["ignore_words"] + .map(|it| { + it.as_array() + .unwrap() + .iter() + .map(|it| it.as_string().unwrap().to_lowercase()) + .collect_vec() + }) + .unwrap_or_default(), + ignore_words_regex: config["ignore_words_regex"] + .map(|it| { + it.as_array() + .unwrap() + .iter() + .map(|it| Regex::new(it.as_string().unwrap()).unwrap()) + .collect_vec() + }) + .unwrap_or_default(), + } + .erased()) + } + + fn name(&self) -> &'static str { + "references.keywords" + } + + fn description(&self) -> &'static str { + "Keywords should not be used as identifiers." + } + + fn long_description(&self) -> &'static str { + r#" +**Anti-pattern** + +In this example, `SUM` (a built-in function) is used as an alias. + +```sql +SELECT + sum.a +FROM foo AS sum +``` + +**Best practice** + +Avoid using keywords as the name of an alias. + +```sql +SELECT + vee.a +FROM foo AS vee +``` +"# + } + + fn eval(&self, context: RuleContext) -> Vec { + if context.segment.raw().len() == 1 + || self.ignore_words.contains(&context.segment.raw().to_lowercase()) + || self.ignore_words_regex.iter().any(|regex| regex.is_match(&context.segment.raw())) + { + return vec![LintResult::new(None, Vec::new(), None, None, None)]; + } + + let raw_segment = context.segment.raw(); + let upper_segment = raw_segment[1..raw_segment.len() - 1].to_uppercase(); + + // FIXME: simplify the condition + if (context.segment.is_type("naked_identifier") + && identifiers_policy_applicable( + &self.unquoted_identifiers_policy, + &context.parent_stack, + ) + && context + .dialect + .sets("unreserved_keywords") + .contains(context.segment.raw().to_uppercase().as_str())) + || (context.segment.is_type("quoted_identifier") + && self.quoted_identifiers_policy.as_ref().map_or( + false, + |quoted_identifiers_policy| { + identifiers_policy_applicable( + quoted_identifiers_policy, + &context.parent_stack, + ) + }, + ) + && context.dialect.sets("unreserved_keywords").contains(upper_segment.as_str()) + || context.dialect.sets("reserved_keywords").contains(upper_segment.as_str())) + { + vec![LintResult::new(Some(context.segment.clone()), Vec::new(), None, None, None)] + } else { + Vec::new() + } + } + + fn crawl_behaviour(&self) -> Crawler { + SegmentSeekerCrawler::new(["naked_identifier", "quoted_identifier"].into()).into() + } +} diff --git a/crates/lib/src/utils/identifers.rs b/crates/lib/src/utils/identifers.rs index 7644f7753..4aea7d370 100644 --- a/crates/lib/src/utils/identifers.rs +++ b/crates/lib/src/utils/identifers.rs @@ -1,6 +1,6 @@ use crate::core::parser::segments::base::ErasedSegment; -pub fn identifiers_policy_applicable(policy: &'static str, parent_stack: &[ErasedSegment]) -> bool { +pub fn identifiers_policy_applicable(policy: &str, parent_stack: &[ErasedSegment]) -> bool { match policy { "all" => true, "none" => false, diff --git a/crates/lib/test/fixtures/rules/std_rule_cases/RF04.yml b/crates/lib/test/fixtures/rules/std_rule_cases/RF04.yml new file mode 100644 index 000000000..deabbd8d5 --- /dev/null +++ b/crates/lib/test/fixtures/rules/std_rule_cases/RF04.yml @@ -0,0 +1,152 @@ +rule: RF04 + +test_pass_valid_identifier: + pass_str: CREATE TABLE artist(artist_name TEXT) + +test_fail_keyword_as_identifier_column: + fail_str: CREATE TABLE artist(create TEXT) + +test_fail_keyword_as_identifier_column_alias: + fail_str: SELECT 1 as parameter + +test_fail_keyword_as_identifier_table_alias: + fail_str: SELECT x FROM tbl AS parameter + +test_pass_valid_identifier_not_alias: + # should pass on default config as not alias + pass_str: SELECT parameter + +test_fail_keyword_as_identifier_not_alias_all: + fail_str: SELECT parameter + configs: + rules: + references.keywords: + unquoted_identifiers_policy: all + +test_pass_valid_identifier_table_alias_column_alias_config: + pass_str: SELECT x FROM tbl AS parameter + configs: + rules: + references.keywords: + unquoted_identifiers_policy: column_aliases + +test_fail_keyword_as_identifier_column_alias_config: + fail_str: SELECT x AS date FROM tbl AS parameter + configs: + rules: + references.keywords: + unquoted_identifiers_policy: column_aliases + +test_pass_valid_quoted_identifier: + pass_str: CREATE TABLE [artist]([artist_name] TEXT) + configs: + rules: + references.keywords: + quoted_identifiers_policy: aliases + core: + dialect: tsql + +test_fail_keyword_as_quoted_identifier_column: + fail_str: CREATE TABLE "artist"("create" TEXT) + configs: + rules: + references.keywords: + quoted_identifiers_policy: aliases + +test_pass_keyword_as_quoted_identifier_column_none_policy: + pass_str: CREATE TABLE "artist"("create" TEXT) + configs: + rules: + references.keywords: + quoted_identifiers_policy: none + +test_fail_keyword_as_quoted_identifier_column_alias: + fail_str: SELECT 1 as [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: aliases + core: + dialect: tsql + +test_fail_keyword_as_quoted_identifier_table_alias: + fail_str: SELECT [x] FROM [tbl] AS [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: aliases + core: + dialect: tsql + +test_pass_valid_quoted_identifier_not_alias: + # should pass on default config as not alias + pass_str: SELECT [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: aliases + core: + dialect: tsql + +test_fail_keyword_as_quoted_identifier_not_alias_all: + fail_str: SELECT [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: all + core: + dialect: tsql + +test_pass_valid_quoted_identifier_table_alias_column_alias_config: + pass_str: SELECT [x] FROM [tbl] AS [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: column_aliases + core: + dialect: tsql + +test_fail_keyword_as_quoted_identifier_column_alias_config: + fail_str: SELECT [x] AS [date] FROM [tbl] AS [parameter] + configs: + rules: + references.keywords: + quoted_identifiers_policy: column_aliases + core: + dialect: tsql + +test_pass_ignore_word1: + pass_str: CREATE TABLE artist(create TEXT) + configs: + rules: + references.keywords: + ignore_words: create + + +test_pass_ignore_word2: + pass_str: SELECT col1 AS date FROM table1 + configs: + rules: + references.keywords: + ignore_words: date + +test_pass_ignore_words_regex1: + pass_str: CREATE TABLE artist(create TEXT) + configs: + rules: + references.keywords: + ignore_words_regex: ^cr + + +test_pass_ignore_words_regex2: + pass_str: SELECT col1 AS date FROM table1 + configs: + rules: + references.keywords: + ignore_words_regex: ^da + +test_pass_one_character_identifier: + pass_str: SELECT d.col1 FROM table1 d + configs: + core: + dialect: snowflake diff --git a/crates/lib/tests/rules.rs b/crates/lib/tests/rules.rs new file mode 100644 index 000000000..524e178de --- /dev/null +++ b/crates/lib/tests/rules.rs @@ -0,0 +1,113 @@ +use std::str::FromStr; + +use ahash::AHashMap; +use glob::glob; +use serde::Deserialize; +use serde_with::{serde_as, KeyValueMap}; +use sqruff_lib::core::config::{FluffConfig, Value}; +use sqruff_lib::core::dialects::init::DialectKind; +use sqruff_lib::core::linter::linter::Linter; + +#[serde_as] +#[derive(Debug, Deserialize)] +struct TestFile { + rule: String, + #[serde_as(as = "KeyValueMap<_>")] + #[serde(flatten)] + cases: Vec, +} + +#[derive(Debug, Deserialize)] +struct TestCase { + #[serde(rename = "$key$")] + name: String, + #[serde(flatten)] + kind: TestCaseKind, + #[serde(default)] + configs: AHashMap, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum TestCaseKind { + Pass { + pass_str: String, + }, + Fail { + fail_str: String, + }, + #[allow(dead_code)] + Fix { + pass_str: String, + fail_str: String, + }, +} + +// FIXME: Simplify config handling. It's quite chaotic right now. +fn main() { + let mut linter = Linter::new(FluffConfig::default(), None, None); + let mut core = AHashMap::new(); + core.insert("core".to_string(), Value::Map(<_>::default())); + + for path in glob("test/fixtures/rules/std_rule_cases/*.yml").unwrap() { + let path = path.unwrap(); + let input = std::fs::read_to_string(path).unwrap(); + + let file: TestFile = serde_yaml::from_str(&input).unwrap(); + core.get_mut("core").unwrap().as_map_mut().unwrap().insert( + "rule_allowlist".into(), + Value::Array(vec![Value::String(file.rule.clone().into())]), + ); + + linter.config_mut().raw.extend(core.clone()); + + for case in file.cases { + let dialect_name = case + .configs + .get("core") + .and_then(|it| it.as_map()) + .and_then(|it| it.get("dialect")) + .and_then(|it| it.as_string()) + .unwrap_or("ansi"); + + let dialect = DialectKind::from_str(dialect_name); + let message = if dialect.is_err() { + format!(" ignored, dialect {dialect_name} is not supported") + } else { + String::new() + }; + + println!("test {}::{}{message}", file.rule, case.name); + + if dialect.is_err() { + continue; + } + + let has_config = !case.configs.is_empty(); + + if has_config { + *linter.config_mut() = FluffConfig::new(case.configs, None, None); + linter.config_mut().raw.extend(core.clone()); + } + + let rule_pack = linter.get_rulepack().rules(); + + match case.kind { + TestCaseKind::Pass { pass_str } => { + let f = linter.lint_string_wrapped(&pass_str, None, None, rule_pack); + assert_eq!(&f.paths[0].files[0].violations, &[]); + } + TestCaseKind::Fail { fail_str } => { + let f = linter.lint_string_wrapped(&fail_str, None, None, rule_pack); + assert_ne!(&f.paths[0].files[0].violations, &[]); + } + TestCaseKind::Fix { .. } => unimplemented!(), + } + + if has_config { + *linter.config_mut() = FluffConfig::default(); + linter.config_mut().raw.extend(core.clone()); + } + } + } +}