Skip to content

Implement lint for regex::Regex compilation inside a loop #13412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5873,6 +5873,7 @@ Released 2018-09-13
[`ref_in_deref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_in_deref
[`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref
[`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns
[`regex_creation_in_loops`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_creation_in_loops
[`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro
[`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params
[`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/declared_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
crate::ref_patterns::REF_PATTERNS_INFO,
crate::reference::DEREF_ADDROF_INFO,
crate::regex::INVALID_REGEX_INFO,
crate::regex::REGEX_CREATION_IN_LOOPS_INFO,
crate::regex::TRIVIAL_REGEX_INFO,
crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO,
crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO,
Expand Down
65 changes: 63 additions & 2 deletions clippy_lints/src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use clippy_utils::source::SpanRangeExt;
use clippy_utils::{def_path_def_ids, path_def_id, paths};
use rustc_ast::ast::{LitKind, StrStyle};
use rustc_hir::def_id::DefIdMap;
use rustc_hir::{BorrowKind, Expr, ExprKind};
use rustc_hir::{BorrowKind, Expr, ExprKind, OwnerId};
use rustc_lint::{LateContext, LateLintPass};
use rustc_session::impl_lint_pass;
use rustc_span::{BytePos, Span};
Expand Down Expand Up @@ -55,6 +55,44 @@ declare_clippy_lint! {
"trivial regular expressions"
}

declare_clippy_lint! {
/// ### What it does
///
/// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal.
///
/// ### Why is this bad?
///
/// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times.
/// This is documented as an antipattern [on the regex documentation](https://docs.rs/regex/latest/regex/#avoid-re-compiling-regexes-especially-in-a-loop)
///
/// ### Example
/// ```no_run
/// # let haystacks = [""];
/// # const MY_REGEX: &str = "a.b";
/// for haystack in haystacks {
/// let regex = regex::Regex::new(MY_REGEX).unwrap();
/// if regex.is_match(haystack) {
/// // Perform operation
/// }
/// }
/// ```
/// can be replaced with
/// ```no_run
/// # let haystacks = [""];
/// # const MY_REGEX: &str = "a.b";
/// let regex = regex::Regex::new(MY_REGEX).unwrap();
/// for haystack in haystacks {
/// if regex.is_match(haystack) {
/// // Perform operation
/// }
/// }
/// ```
#[clippy::version = "1.83.0"]
pub REGEX_CREATION_IN_LOOPS,
perf,
"regular expression compilation performed in a loop"
}

#[derive(Copy, Clone)]
enum RegexKind {
Unicode,
Expand All @@ -66,9 +104,10 @@ enum RegexKind {
#[derive(Default)]
pub struct Regex {
definitions: DefIdMap<RegexKind>,
loop_stack: Vec<(OwnerId, Span)>,
}

impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]);
impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]);

impl<'tcx> LateLintPass<'tcx> for Regex {
fn check_crate(&mut self, cx: &LateContext<'tcx>) {
Expand Down Expand Up @@ -96,12 +135,34 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
&& let Some(def_id) = path_def_id(cx, fun)
&& let Some(regex_kind) = self.definitions.get(&def_id)
{
if let Some(&(loop_item_id, loop_span)) = self.loop_stack.last()
&& loop_item_id == fun.hir_id.owner
&& (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some())
{
span_lint_and_help(
cx,
REGEX_CREATION_IN_LOOPS,
fun.span,
"compiling a regex in a loop",
Some(loop_span),
"move the regex construction outside this loop",
);
}

match regex_kind {
RegexKind::Unicode => check_regex(cx, arg, true),
RegexKind::UnicodeSet => check_set(cx, arg, true),
RegexKind::Bytes => check_regex(cx, arg, false),
RegexKind::BytesSet => check_set(cx, arg, false),
}
} else if let ExprKind::Loop(block, _, _, span) = expr.kind {
self.loop_stack.push((block.hir_id.owner, span));
}
}

fn check_expr_post(&mut self, _: &LateContext<'tcx>, expr: &'tcx Expr<'_>) {
if matches!(expr.kind, ExprKind::Loop(..)) {
self.loop_stack.pop();
}
}
}
Expand Down
30 changes: 29 additions & 1 deletion tests/ui/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
clippy::needless_borrow,
clippy::needless_borrows_for_generic_args
)]
#![warn(clippy::invalid_regex, clippy::trivial_regex)]
#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_creation_in_loops)]

extern crate regex;

Expand Down Expand Up @@ -118,7 +118,35 @@ fn trivial_regex() {
let _ = BRegex::new(r"\b{start}word\b{end}");
}

fn regex_creation_in_loops() {
loop {
static STATIC_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| Regex::new("a.b").unwrap());

let regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
let regex = BRegex::new("a.b");
//~^ ERROR: compiling a regex in a loop
#[allow(clippy::regex_creation_in_loops)]
let allowed_regex = Regex::new("a.b");

if true {
let regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
}

for _ in 0..10 {
let nested_regex = Regex::new("a.b");
//~^ ERROR: compiling a regex in a loop
}
}

for i in 0..10 {
let dependant_regex = Regex::new(&format!("{i}"));
}
}

fn main() {
syntax_error();
trivial_regex();
regex_creation_in_loops();
}
52 changes: 51 additions & 1 deletion tests/ui/regex.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -195,5 +195,55 @@ LL | let binary_trivial_empty = BRegex::new("^$");
|
= help: consider using `str::is_empty`

error: aborting due to 24 previous errors
error: compiling a regex in a loop
--> tests/ui/regex.rs:125:21
|
LL | let regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^
= note: `-D clippy::regex-creation-in-loops` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]`

error: compiling a regex in a loop
--> tests/ui/regex.rs:127:21
|
LL | let regex = BRegex::new("a.b");
| ^^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^

error: compiling a regex in a loop
--> tests/ui/regex.rs:133:25
|
LL | let regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:122:5
|
LL | loop {
| ^^^^

error: compiling a regex in a loop
--> tests/ui/regex.rs:138:32
|
LL | let nested_regex = Regex::new("a.b");
| ^^^^^^^^^^
|
help: move the regex construction outside this loop
--> tests/ui/regex.rs:137:9
|
LL | for _ in 0..10 {
| ^^^^^^^^^^^^^^

error: aborting due to 28 previous errors

Loading