Skip to content

Commit

Permalink
feat: add RF06 rule
Browse files Browse the repository at this point in the history
  • Loading branch information
gvozdvmozgu authored and benfdking committed Jul 10, 2024
1 parent e77e7ca commit 2df6247
Show file tree
Hide file tree
Showing 5 changed files with 869 additions and 14 deletions.
4 changes: 2 additions & 2 deletions crates/lib/src/core/parser/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ impl Matchable for StringParser {

#[derive(Debug, Clone)]
pub struct RegexParser {
template: Regex,
anti_template: Option<Regex>,
pub(crate) template: Regex,
pub(crate) anti_template: Option<Regex>,
factory: fn(&dyn Segment) -> ErasedSegment,
cache_key: MatchableCacheKey,
}
Expand Down
2 changes: 2 additions & 0 deletions crates/lib/src/rules/references.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod RF01;
pub mod RF03;
pub mod RF04;
pub mod RF05;
pub mod RF06;

pub fn rules() -> Vec<ErasedRule> {
use crate::core::rules::base::Erased as _;
Expand All @@ -13,5 +14,6 @@ pub fn rules() -> Vec<ErasedRule> {
RF03::RuleRF03::default().erased(),
RF04::RuleRF04::default().erased(),
RF05::RuleRF05::default().erased(),
RF06::RuleRF06::default().erased(),
]
}
230 changes: 230 additions & 0 deletions crates/lib/src/rules/references/RF06.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
use regex::Regex;

use crate::core::config::Value;
use crate::core::parser::parsers::RegexParser;
use crate::core::parser::segments::base::{CodeSegmentNewArgs, IdentifierSegment};
use crate::core::rules::base::{Erased, ErasedRule, LintFix, LintResult, Rule, RuleGroups};
use crate::core::rules::context::RuleContext;
use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
use crate::utils::functional::context::FunctionalContext;

#[derive(Default, Debug, Clone)]
pub struct RuleRF06 {
prefer_quoted_identifiers: bool,
prefer_quoted_keywords: bool,
ignore_words: Vec<String>,
ignore_words_regex: Vec<Regex>,
force_enable: bool,
}

impl Rule for RuleRF06 {
fn groups(&self) -> &'static [RuleGroups] {
&[RuleGroups::All, RuleGroups::References]
}

fn load_from_config(
&self,
config: &ahash::AHashMap<String, Value>,
) -> Result<ErasedRule, String> {
Ok(Self {
prefer_quoted_identifiers: config["prefer_quoted_identifiers"].as_bool().unwrap(),
prefer_quoted_keywords: config["prefer_quoted_keywords"].as_bool().unwrap(),
ignore_words: config["ignore_words"]
.map(|it| {
it.as_array()
.unwrap()
.iter()
.map(|it| it.as_string().unwrap().to_lowercase())
.collect()
})
.unwrap_or_default(),
ignore_words_regex: config["ignore_words_regex"]
.map(|it| {
it.as_array()
.unwrap()
.iter()
.map(|it| Regex::new(it.as_string().unwrap()).unwrap())
.collect()
})
.unwrap_or_default(),
force_enable: config["force_enable"].as_bool().unwrap(),
}
.erased())
}

fn name(&self) -> &'static str {
"references.quoting"
}

fn description(&self) -> &'static str {
"Unnecessary quoted identifier."
}

fn long_description(&self) -> &'static str {
r#"
**Anti-pattern**
In this example, a valid unquoted identifier, that is also not a reserved keyword, is needlessly quoted.
```sql
SELECT 123 as "foo"
```
**Best practice**
Use unquoted identifiers where possible.
```sql
SELECT 123 as foo
```
When `prefer_quoted_identifiers = True`, the quotes are always necessary, no matter if the identifier is valid, a reserved keyword, or contains special characters.
> **Note**
> Note due to different quotes being used by different dialects supported by `SQLFluff`, and those quotes meaning different things in different contexts, this mode is not `sqlfluff fix` compatible.
**Anti-pattern**
In this example, a valid unquoted identifier, that is also not a reserved keyword, is required to be quoted.
```sql
SELECT 123 as foo
```
**Best practice**
Use quoted identifiers.
```sql
SELECT 123 as "foo" -- For ANSI, ...
-- or
SELECT 123 as `foo` -- For BigQuery, MySql, ...
```"#
}

fn eval(&self, context: RuleContext) -> Vec<LintResult> {
if matches!(context.dialect.name, "postgres" | "snowflake") && !self.force_enable {
return Vec::new();
}

if FunctionalContext::new(context.clone())
.parent_stack()
.any(Some(|it| ["password_auth", "execute_as_clause"].iter().any(|ty| it.is_type(ty))))
{
return Vec::new();
}

let identifier_is_quoted =
!lazy_regex::regex_is_match!(r#"^[^"\'\[].+[^"\'\]]$"#, context.segment.raw().as_ref());

let identifier_contents = context.segment.raw();
let identifier_contents = if identifier_is_quoted {
identifier_contents
.get(1..identifier_contents.len() - 1)
.map(ToOwned::to_owned)
.unwrap_or_default()
} else {
identifier_contents.to_string()
};

let identifier_is_keyword = context
.dialect
.sets("reserved_keywords")
.contains(identifier_contents.to_uppercase().as_str())
|| context
.dialect
.sets("unreserved_keywords")
.contains(identifier_contents.to_uppercase().as_str());

let context_policy =
if self.prefer_quoted_identifiers { "naked_identifier" } else { "quoted_identifier" };

if self.ignore_words.contains(&identifier_contents.to_lowercase()) {
return Vec::new();
}

if self.ignore_words_regex.iter().any(|regex| regex.is_match(identifier_contents.as_ref()))
{
return Vec::new();
}

if self.prefer_quoted_keywords && identifier_is_keyword {
return if !identifier_is_quoted {
vec![LintResult::new(
context.segment.into(),
Vec::new(),
None,
Some(format!("Missing quoted keyword identifier {identifier_contents}.")),
None,
)]
} else {
Vec::new()
};
}

if !context.segment.is_type(context_policy)
|| context.segment.raw().eq_ignore_ascii_case("quoted_identifier")
|| context.segment.raw().eq_ignore_ascii_case("naked_identifier")
{
return Vec::new();
}

if self.prefer_quoted_identifiers {
return vec![LintResult::new(
context.segment.into(),
Vec::new(),
None,
Some(format!("Missing quoted identifier {identifier_contents}.")),
None,
)];
}

let owned = context.dialect.grammar("NakedIdentifierSegment");

let naked_identifier_parser = owned.as_any().downcast_ref::<RegexParser>().unwrap();

if is_full_match(naked_identifier_parser.template.as_str(), &identifier_contents)
&& naked_identifier_parser.anti_template.as_ref().map_or(true, |anti_template| {
!is_full_match(anti_template.as_str(), &identifier_contents)
})
{
return vec![LintResult::new(
context.segment.clone().into(),
vec![LintFix::replace(
context.segment.clone(),
vec![IdentifierSegment::create(
&identifier_contents,
None,
CodeSegmentNewArgs {
code_type: "naked_identifier",
instance_types: vec![],
trim_start: None,
trim_chars: None,
source_fixes: None,
},
)],
None,
)],
None,
Some(format!("Unnecessary quoted identifier {}.", context.segment.raw())),
None,
)];
}

Vec::new()
}

fn is_fix_compatible(&self) -> bool {
true
}

fn crawl_behaviour(&self) -> Crawler {
SegmentSeekerCrawler::new(["quoted_identifier", "naked_identifier"].into()).into()
}
}

fn is_full_match(pattern: &str, text: &str) -> bool {
let full_pattern = format!("(?i)^{}$", pattern); // Adding (?i) for case insensitivity
let regex = fancy_regex::Regex::new(&full_pattern).unwrap();
regex.is_match(text).unwrap()
}
Loading

0 comments on commit 2df6247

Please sign in to comment.