From 2df6247e61f699eda1cabdcc73f05446efc71405 Mon Sep 17 00:00:00 2001 From: gvozdvmozgu Date: Tue, 9 Jul 2024 13:58:14 -0700 Subject: [PATCH] feat: add `RF06` rule --- crates/lib/src/core/parser/parsers.rs | 4 +- crates/lib/src/rules/references.rs | 2 + crates/lib/src/rules/references/RF06.rs | 230 +++++++ .../fixtures/rules/std_rule_cases/RF06.yml | 625 ++++++++++++++++++ crates/lib/tests/rules.rs | 22 +- 5 files changed, 869 insertions(+), 14 deletions(-) create mode 100644 crates/lib/src/rules/references/RF06.rs create mode 100644 crates/lib/test/fixtures/rules/std_rule_cases/RF06.yml diff --git a/crates/lib/src/core/parser/parsers.rs b/crates/lib/src/core/parser/parsers.rs index a3568bd9f..df6bae427 100644 --- a/crates/lib/src/core/parser/parsers.rs +++ b/crates/lib/src/core/parser/parsers.rs @@ -170,8 +170,8 @@ impl Matchable for StringParser { #[derive(Debug, Clone)] pub struct RegexParser { - template: Regex, - anti_template: Option, + pub(crate) template: Regex, + pub(crate) anti_template: Option, factory: fn(&dyn Segment) -> ErasedSegment, cache_key: MatchableCacheKey, } diff --git a/crates/lib/src/rules/references.rs b/crates/lib/src/rules/references.rs index 5a4664803..5f1bc5f9b 100644 --- a/crates/lib/src/rules/references.rs +++ b/crates/lib/src/rules/references.rs @@ -4,6 +4,7 @@ pub mod RF01; pub mod RF03; pub mod RF04; pub mod RF05; +pub mod RF06; pub fn rules() -> Vec { use crate::core::rules::base::Erased as _; @@ -13,5 +14,6 @@ pub fn rules() -> Vec { RF03::RuleRF03::default().erased(), RF04::RuleRF04::default().erased(), RF05::RuleRF05::default().erased(), + RF06::RuleRF06::default().erased(), ] } diff --git a/crates/lib/src/rules/references/RF06.rs b/crates/lib/src/rules/references/RF06.rs new file mode 100644 index 000000000..bf6c067cb --- /dev/null +++ b/crates/lib/src/rules/references/RF06.rs @@ -0,0 +1,230 @@ +use regex::Regex; + +use crate::core::config::Value; +use crate::core::parser::parsers::RegexParser; +use crate::core::parser::segments::base::{CodeSegmentNewArgs, IdentifierSegment}; +use crate::core::rules::base::{Erased, ErasedRule, LintFix, LintResult, Rule, RuleGroups}; +use crate::core::rules::context::RuleContext; +use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler}; +use crate::utils::functional::context::FunctionalContext; + +#[derive(Default, Debug, Clone)] +pub struct RuleRF06 { + prefer_quoted_identifiers: bool, + prefer_quoted_keywords: bool, + ignore_words: Vec, + ignore_words_regex: Vec, + force_enable: bool, +} + +impl Rule for RuleRF06 { + fn groups(&self) -> &'static [RuleGroups] { + &[RuleGroups::All, RuleGroups::References] + } + + fn load_from_config( + &self, + config: &ahash::AHashMap, + ) -> Result { + Ok(Self { + prefer_quoted_identifiers: config["prefer_quoted_identifiers"].as_bool().unwrap(), + prefer_quoted_keywords: config["prefer_quoted_keywords"].as_bool().unwrap(), + ignore_words: config["ignore_words"] + .map(|it| { + it.as_array() + .unwrap() + .iter() + .map(|it| it.as_string().unwrap().to_lowercase()) + .collect() + }) + .unwrap_or_default(), + ignore_words_regex: config["ignore_words_regex"] + .map(|it| { + it.as_array() + .unwrap() + .iter() + .map(|it| Regex::new(it.as_string().unwrap()).unwrap()) + .collect() + }) + .unwrap_or_default(), + force_enable: config["force_enable"].as_bool().unwrap(), + } + .erased()) + } + + fn name(&self) -> &'static str { + "references.quoting" + } + + fn description(&self) -> &'static str { + "Unnecessary quoted identifier." + } + + fn long_description(&self) -> &'static str { + r#" +**Anti-pattern** + +In this example, a valid unquoted identifier, that is also not a reserved keyword, is needlessly quoted. + +```sql +SELECT 123 as "foo" +``` + +**Best practice** + +Use unquoted identifiers where possible. + +```sql +SELECT 123 as foo +``` + +When `prefer_quoted_identifiers = True`, the quotes are always necessary, no matter if the identifier is valid, a reserved keyword, or contains special characters. + +> **Note** +> Note due to different quotes being used by different dialects supported by `SQLFluff`, and those quotes meaning different things in different contexts, this mode is not `sqlfluff fix` compatible. + +**Anti-pattern** + +In this example, a valid unquoted identifier, that is also not a reserved keyword, is required to be quoted. + +```sql +SELECT 123 as foo +``` + +**Best practice** + +Use quoted identifiers. + +```sql +SELECT 123 as "foo" -- For ANSI, ... +-- or +SELECT 123 as `foo` -- For BigQuery, MySql, ... +```"# + } + + fn eval(&self, context: RuleContext) -> Vec { + if matches!(context.dialect.name, "postgres" | "snowflake") && !self.force_enable { + return Vec::new(); + } + + if FunctionalContext::new(context.clone()) + .parent_stack() + .any(Some(|it| ["password_auth", "execute_as_clause"].iter().any(|ty| it.is_type(ty)))) + { + return Vec::new(); + } + + let identifier_is_quoted = + !lazy_regex::regex_is_match!(r#"^[^"\'\[].+[^"\'\]]$"#, context.segment.raw().as_ref()); + + let identifier_contents = context.segment.raw(); + let identifier_contents = if identifier_is_quoted { + identifier_contents + .get(1..identifier_contents.len() - 1) + .map(ToOwned::to_owned) + .unwrap_or_default() + } else { + identifier_contents.to_string() + }; + + let identifier_is_keyword = context + .dialect + .sets("reserved_keywords") + .contains(identifier_contents.to_uppercase().as_str()) + || context + .dialect + .sets("unreserved_keywords") + .contains(identifier_contents.to_uppercase().as_str()); + + let context_policy = + if self.prefer_quoted_identifiers { "naked_identifier" } else { "quoted_identifier" }; + + if self.ignore_words.contains(&identifier_contents.to_lowercase()) { + return Vec::new(); + } + + if self.ignore_words_regex.iter().any(|regex| regex.is_match(identifier_contents.as_ref())) + { + return Vec::new(); + } + + if self.prefer_quoted_keywords && identifier_is_keyword { + return if !identifier_is_quoted { + vec![LintResult::new( + context.segment.into(), + Vec::new(), + None, + Some(format!("Missing quoted keyword identifier {identifier_contents}.")), + None, + )] + } else { + Vec::new() + }; + } + + if !context.segment.is_type(context_policy) + || context.segment.raw().eq_ignore_ascii_case("quoted_identifier") + || context.segment.raw().eq_ignore_ascii_case("naked_identifier") + { + return Vec::new(); + } + + if self.prefer_quoted_identifiers { + return vec![LintResult::new( + context.segment.into(), + Vec::new(), + None, + Some(format!("Missing quoted identifier {identifier_contents}.")), + None, + )]; + } + + let owned = context.dialect.grammar("NakedIdentifierSegment"); + + let naked_identifier_parser = owned.as_any().downcast_ref::().unwrap(); + + if is_full_match(naked_identifier_parser.template.as_str(), &identifier_contents) + && naked_identifier_parser.anti_template.as_ref().map_or(true, |anti_template| { + !is_full_match(anti_template.as_str(), &identifier_contents) + }) + { + return vec![LintResult::new( + context.segment.clone().into(), + vec![LintFix::replace( + context.segment.clone(), + vec![IdentifierSegment::create( + &identifier_contents, + None, + CodeSegmentNewArgs { + code_type: "naked_identifier", + instance_types: vec![], + trim_start: None, + trim_chars: None, + source_fixes: None, + }, + )], + None, + )], + None, + Some(format!("Unnecessary quoted identifier {}.", context.segment.raw())), + None, + )]; + } + + Vec::new() + } + + fn is_fix_compatible(&self) -> bool { + true + } + + fn crawl_behaviour(&self) -> Crawler { + SegmentSeekerCrawler::new(["quoted_identifier", "naked_identifier"].into()).into() + } +} + +fn is_full_match(pattern: &str, text: &str) -> bool { + let full_pattern = format!("(?i)^{}$", pattern); // Adding (?i) for case insensitivity + let regex = fancy_regex::Regex::new(&full_pattern).unwrap(); + regex.is_match(text).unwrap() +} diff --git a/crates/lib/test/fixtures/rules/std_rule_cases/RF06.yml b/crates/lib/test/fixtures/rules/std_rule_cases/RF06.yml new file mode 100644 index 000000000..f7d67b4c9 --- /dev/null +++ b/crates/lib/test/fixtures/rules/std_rule_cases/RF06.yml @@ -0,0 +1,625 @@ +rule: RF06 + +test_pass_column_reference: + pass_str: | + SELECT 123 AS foo; + +test_fail_column_reference: + fail_str: | + SELECT 123 AS "foo"; + fix_str: | + SELECT 123 AS foo; + +test_fail_column_reference_tsql: + fail_str: | + SELECT 123 AS [foo]; + fix_str: | + SELECT 123 AS foo; + configs: + core: + dialect: tsql + +test_pass_table_reference: + pass_str: | + SELECT foo + FROM bar; + +test_fail_table_reference: + fail_str: | + SELECT foo + FROM "bar"; + fix_str: | + SELECT foo + FROM bar; + +test_fail_table_reference_tsql: + fail_str: | + SELECT foo + FROM [bar]; + fix_str: | + SELECT foo + FROM bar; + configs: + core: + dialect: tsql + +test_pass_multiple_references: + pass_str: | + SELECT foo + FROM bar.baz; + +test_fail_multiple_references: + fail_str: | + SELECT foo + FROM "bar"."baz"; + fix_str: | + SELECT foo + FROM bar.baz; + +test_fail_multiple_references_tsql: + fail_str: | + SELECT foo + FROM [bar].[baz]; + fix_str: | + SELECT foo + FROM bar.baz; + configs: + core: + dialect: tsql + +test_pass_whitespace: + pass_str: | + SELECT 123 AS "I cannot be unquoted" + +test_pass_whitespace_tsql: + pass_str: | + SELECT 123 AS [I cannot be unquoted] + configs: + core: + dialect: tsql + +test_pass_special_symbols: + pass_str: | + SELECT 123 AS "I-c@nn0t-be~un-quoted" + +test_pass_special_symbols_tsql: + pass_str: | + SELECT 123 AS [I-c@nn0t-be~un-quoted] + configs: + core: + dialect: tsql + +test_pass_reserved_keyword: + pass_str: | + SELECT 123 AS "SELECT" + +test_pass_reserved_keyword_tsql: + pass_str: | + SELECT 123 AS [SELECT] + configs: + core: + dialect: tsql + +test_pass_column_reference_prefer_quoted_ansi: + pass_str: | + SELECT 123 AS "foo"; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_column_reference_prefer_quoted_tsql: + pass_str: | + SELECT 123 AS [foo]; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_column_reference_prefer_quoted_ansi: + fail_str: | + SELECT 123 AS foo; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_table_reference_prefer_quoted_ansi: + pass_str: | + SELECT "foo" + FROM "bar"; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_table_reference_prefer_quoted_tsql: + pass_str: | + SELECT [foo] + FROM [bar]; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_table_reference_prefer_quoted_ansi: + fail_str: | + SELECT "foo" + FROM bar; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_table_reference_prefer_quoted_tsql: + fail_str: | + SELECT [foo] + FROM bar; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_multiple_references_prefer_quoted_ansi: + pass_str: | + SELECT "foo" + FROM "bar"."baz"; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_multiple_references_prefer_quoted_tsql: + pass_str: | + SELECT [foo] + FROM [bar].[baz]; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_multiple_references_prefer_quoted_ansi: + fail_str: | + SELECT "foo" + FROM bar.baz; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_multiple_references_prefer_quoted_tsql: + fail_str: | + SELECT [foo] + FROM bar.baz; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_whitespace_prefer_quoted_ansi: + pass_str: | + SELECT 123 AS "I cannot be unquoted" + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_whitespace_prefer_quoted_tsql: + pass_str: | + SELECT 123 AS [I cannot be unquoted] + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_special_symbols_prefer_quoted_ansi: + pass_str: | + SELECT 123 AS "I-c@nn0t-be~un-quoted" + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_special_symbols_prefer_quoted_tsql: + pass_str: | + SELECT 123 AS [I-c@nn0t-be~un-quoted] + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_reserved_keyword_prefer_quoted_ansi: + pass_str: | + SELECT 123 AS "SELECT" + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_reserved_keyword_prefer_quoted_tsql: + pass_str: | + SELECT 123 AS [SELECT] + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_column_reference_prefer_quoted_backticks: + pass_str: | + SELECT 123 AS `foo`; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_column_reference_prefer_quoted_backticks: + fail_str: | + SELECT 123 AS foo; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_table_reference_prefer_quoted_backticks: + pass_str: | + SELECT `foo` + FROM `bar`; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_table_reference_prefer_quoted_backticks: + fail_str: | + SELECT `foo` + FROM bar; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_multiple_references_prefer_quoted_backticks: + pass_str: | + SELECT `foo` + FROM `bar`.`baz`; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_multiple_references_prefer_quoted_backticks: + fail_str: | + SELECT `foo` + FROM bar.baz; + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_whitespace_prefer_quoted_backticks: + pass_str: | + SELECT 123 AS `I cannot be unquoted` + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_special_symbols_prefer_quoted_backticks: + pass_str: | + SELECT 123 AS `I-c@nn0t-be~un-quoted` + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_reserved_keyword_prefer_quoted_backticks: + pass_str: | + SELECT 123 AS `SELECT` + configs: + core: + dialect: bigquery + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_datetime_redshift: + pass_str: | + SELECT "datetime" + configs: + core: + dialect: redshift + +test_pass_uppivot_bigquery: + pass_str: | + SELECT + * + FROM model + UNPIVOT( + (A, B) + FOR year + IN ((C, D) AS 'year_2011', (E, F) AS 'year_2012')); + configs: + core: + dialect: bigquery + +test_pass_quoted_identifier_keyword_tsql: + pass_str: | + SET QUOTED_IDENTIFIER ON + GO + configs: + core: + dialect: tsql + +test_pass_create_user_quoted_password_exasol: + pass_str: | + CREATE USER user_1 IDENTIFIED BY "h12_xhz"; + configs: + core: + dialect: exasol + +test_fail_create_quoted_user_exasol: + fail_str: | + CREATE USER "USER1" IDENTIFIED BY "h12_xhz"; + fix_str: | + CREATE USER USER1 IDENTIFIED BY "h12_xhz"; + configs: + core: + dialect: exasol + +test_pass_ignore_lists: + pass_str: + SELECT 123 AS "foo"; + configs: + rules: + references.quoting: + ignore_words: foo + +test_pass_ignore_lists_tsql: + pass_str: + SELECT 123 AS [foo]; + configs: + core: + dialect: tsql + rules: + references.quoting: + ignore_words: foo + +test_pass_ignore_lists_mixed_case: + pass_str: + SELECT 123 AS "Foo"; + configs: + rules: + references.quoting: + ignore_words: foo + +test_pass_ignore_lists_mixed_case_tsql: + pass_str: + SELECT 123 AS [Foo]; + configs: + core: + dialect: tsql + rules: + references.quoting: + ignore_words: foo + +test_pass_ignore_words_regex: + pass_str: + SELECT 123 AS "foo"; + configs: + rules: + references.quoting: + ignore_words_regex: ^fo + +test_pass_ignore_words_regex_tsql: + pass_str: + SELECT 123 AS [foo]; + configs: + core: + dialect: tsql + rules: + references.quoting: + ignore_words_regex: ^fo + +test_pass_ignore_words_regex_mixed_case: + pass_str: + SELECT 123 AS "Foo"; + configs: + rules: + references.quoting: + ignore_words_regex: ^Fo + +test_pass_ignore_words_regex_mixed_case_tsql: + pass_str: + SELECT 123 AS [Foo]; + configs: + core: + dialect: tsql + rules: + references.quoting: + ignore_words_regex: ^Fo + +test_pass_ignore_if: + pass_str: + DROP TABLE IF EXISTS "example"; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_pass_ignore_if_tsql: + pass_str: + DROP TABLE IF EXISTS [example]; + configs: + core: + dialect: tsql + rules: + references.quoting: + prefer_quoted_identifiers: true + +test_fail_insert_overwrite_directory: + fail_str: | + INSERT OVERWRITE DIRECTORY '/tmp/destination' + USING PARQUET + OPTIONS ("col1" = "1", "col2" = "2", "col3" = 'test', "user" = "a person") + SELECT a FROM test_table; + fix_str: | + INSERT OVERWRITE DIRECTORY '/tmp/destination' + USING PARQUET + OPTIONS (col1 = "1", col2 = "2", col3 = 'test', "user" = "a person") + SELECT a FROM test_table; + configs: + core: + dialect: sparksql + +test_pass_insert_overwrite_directory: + pass_str: | + INSERT OVERWRITE DIRECTORY '/tmp/destination' + USING PARQUET + OPTIONS (col1 = "1", col2 = "2", col3 = 'test', "user" = "a person") + SELECT a FROM test_table; + configs: + core: + dialect: sparksql + +test_fail_quoted_column_ansi: + fail_str: | + SELECT d."date" + FROM d + fix_str: | + SELECT d.date + FROM d + +test_fail_quoted_column_tsql: + fail_str: | + SELECT d.[date] + FROM d + fix_str: | + SELECT d.date + FROM d + configs: + core: + dialect: tsql + +test_pass_quoted_column_snowflake: + # The rule is disabled by default in Snowflake. + pass_str: | + SELECT d."date" + FROM d + configs: + core: + dialect: snowflake + +test_fail_quoted_column_snowflake_force_enable: + fail_str: | + SELECT d."date" + FROM d + fix_str: | + SELECT d.date + FROM d + configs: + core: + dialect: snowflake + rules: + references.quoting: + force_enable: true + +test_pass_quoted_column_postgres: + # The rule is disabled by default in Postgres. + pass_str: | + SELECT d."date" + FROM d + configs: + core: + dialect: postgres + +test_fail_quoted_column_postgres_force_enable: + fail_str: | + SELECT d."date" + FROM d + fix_str: | + SELECT d.date + FROM d + configs: + core: + dialect: postgres + rules: + references.quoting: + force_enable: true + +test_pass_prefer_quoted_keywords_athena: + pass_str: SELECT 1 AS "metadata" + configs: + rules: + references.quoting: + prefer_quoted_keywords: true + core: + dialect: athena + +test_fail_prefer_quoted_keywords_athena: + fail_str: SELECT 1 AS metadata + configs: + rules: + references.quoting: + prefer_quoted_keywords: true + core: + dialect: athena + +test_pass_tsql_execute_as: + # 'login_test' should remain quoted. + pass_str: | + CREATE TRIGGER connection_limit_trigger + ON ALL SERVER WITH EXECUTE AS 'login_test' + FOR LOGON + AS + PRINT 'Database Created.' + SELECT 1 + GO + configs: + rules: + references.quoting: + prefer_quoted_identifiers: false + core: + dialect: tsql + +test_pass_exasol_password_literals: + # password literals should remain quoted. + pass_str: | + ALTER USER user_1 IDENTIFIED BY "h22_xhz" REPLACE "h12_xhz"; + configs: + rules: + references.quoting: + prefer_quoted_identifiers: false + core: + dialect: exasol diff --git a/crates/lib/tests/rules.rs b/crates/lib/tests/rules.rs index d630b7070..2eee1cfe0 100644 --- a/crates/lib/tests/rules.rs +++ b/crates/lib/tests/rules.rs @@ -54,17 +54,9 @@ struct TestCase { #[derive(Debug, Deserialize)] #[serde(untagged)] enum TestCaseKind { - Pass { - pass_str: String, - }, - Fail { - fail_str: String, - }, - #[allow(dead_code)] - Fix { - pass_str: String, - fail_str: String, - }, + Pass { pass_str: String }, + Fix { fail_str: String, fix_str: String }, + Fail { fail_str: String }, } // FIXME: Simplify FluffConfig handling. It's quite chaotic right now. @@ -140,7 +132,13 @@ fn main() { let f = linter.lint_string_wrapped(&fail_str, None, None, rule_pack); assert_ne!(&f.paths[0].files[0].violations, &[]); } - TestCaseKind::Fix { .. } => unimplemented!(), + TestCaseKind::Fix { fail_str, fix_str } => { + let f = + linter.lint_string_wrapped(&fail_str, None, Some(true), rule_pack).paths[0] + .files[0] + .fix_string(); + assert_eq!(f, fix_str); + } } if has_config {