Skip to content

Commit a7bfccb

Browse files
committed
[ruff] Offer fixes for RUF039 in more cases
1 parent db3dcd8 commit a7bfccb

9 files changed

+632
-27
lines changed

crates/ruff_linter/resources/test/fixtures/ruff/RUF039.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,16 @@
5353
regex.splititer(both, non_literal)
5454
regex.subf(f, lambda _: r'means', '"format"')
5555
regex.subfn(fn, f'''a$1n't''', lambda: "'function'")
56+
57+
58+
# https://github.com/astral-sh/ruff/issues/16713
59+
re.compile("\a\f\n\r\t\u27F2\U0001F0A1\v\x41") # with unsafe fix
60+
re.compile("\b") # without fix
61+
re.compile("\"") # without fix
62+
re.compile("\'") # without fix
63+
re.compile('\"') # without fix
64+
re.compile('\'') # without fix
65+
re.compile("\\") # without fix
66+
re.compile("\101") # without fix
67+
re.compile("a\
68+
b") # without fix

crates/ruff_linter/resources/test/fixtures/ruff/RUF039_concat.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,20 @@
9191
br''br""br''
9292
)
9393
regex.subfn(br'I\s\nee*d\s[O0o]me\x20\Qoffe\E, ' br'b')
94+
95+
96+
# https://github.com/astral-sh/ruff/issues/16713
97+
re.compile(
98+
"["
99+
"\U0001F600-\U0001F64F" # emoticons
100+
"\U0001F300-\U0001F5FF" # symbols & pictographs
101+
"\U0001F680-\U0001F6FF" # transport & map symbols
102+
"\U0001F1E0-\U0001F1FF" # flags (iOS)
103+
"\U00002702-\U000027B0"
104+
"\U000024C2-\U0001F251"
105+
"\u200d" # zero width joiner
106+
"\u200c" # zero width non-joiner
107+
"\\u200c" # must not be escaped in a raw string
108+
"]+",
109+
flags=re.UNICODE,
110+
)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import re
2+
3+
re.compile("\N{Partial Differential}") # with unsafe fix if python target is 3.8 or higher, else without fix

crates/ruff_linter/src/rules/ruff/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,44 @@ mod tests {
554554
Ok(())
555555
}
556556

557+
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
558+
fn preview_rules_py37(rule_code: Rule, path: &Path) -> Result<()> {
559+
let snapshot = format!(
560+
"preview__py37__{}_{}",
561+
rule_code.noqa_code(),
562+
path.to_string_lossy()
563+
);
564+
let diagnostics = test_path(
565+
Path::new("ruff").join(path).as_path(),
566+
&settings::LinterSettings {
567+
preview: PreviewMode::Enabled,
568+
unresolved_target_version: PythonVersion::PY37.into(),
569+
..settings::LinterSettings::for_rule(rule_code)
570+
},
571+
)?;
572+
assert_diagnostics!(snapshot, diagnostics);
573+
Ok(())
574+
}
575+
576+
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
577+
fn preview_rules_py38(rule_code: Rule, path: &Path) -> Result<()> {
578+
let snapshot = format!(
579+
"preview__py38__{}_{}",
580+
rule_code.noqa_code(),
581+
path.to_string_lossy()
582+
);
583+
let diagnostics = test_path(
584+
Path::new("ruff").join(path).as_path(),
585+
&settings::LinterSettings {
586+
preview: PreviewMode::Enabled,
587+
unresolved_target_version: PythonVersion::PY38.into(),
588+
..settings::LinterSettings::for_rule(rule_code)
589+
},
590+
)?;
591+
assert_diagnostics!(snapshot, diagnostics);
592+
Ok(())
593+
}
594+
557595
#[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"^_+", 1)]
558596
#[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"", 2)]
559597
fn custom_regexp_preset(

crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs

Lines changed: 95 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::fmt::{Display, Formatter};
22
use std::str::FromStr;
33

4+
use ruff_diagnostics::Applicability;
45
use ruff_macros::{ViolationMetadata, derive_message_formats};
56
use ruff_python_ast::{
6-
BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, StringLiteral,
7+
BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, PythonVersion, StringLiteral,
78
};
89
use ruff_python_semantic::{Modules, SemanticModel};
9-
10-
use ruff_text_size::Ranged;
10+
use ruff_text_size::{Ranged, TextRange};
1111

1212
use crate::checkers::ast::Checker;
1313
use crate::{Edit, Fix, FixAvailability, Violation};
@@ -24,6 +24,21 @@ use crate::{Edit, Fix, FixAvailability, Violation};
2424
/// Regular expressions should be written
2525
/// using raw strings to avoid double escaping.
2626
///
27+
/// ## Fix safety
28+
/// If a fix is available and the string/bytes literal contains an escape sequence, the fix is
29+
/// marked unsafe because it alters the runtime value of the literal while retaining the regex
30+
/// semantics.
31+
///
32+
/// For example
33+
/// ```python
34+
/// # Literal is `1\n2`.
35+
/// re.compile("1\n2")
36+
///
37+
/// # Literal is `1\\n2`, but the regex library will interpret `\\n` and will still match a newline
38+
/// # character as before.
39+
/// re.compile(r"1\n2")
40+
/// ```
41+
///
2742
/// ## Example
2843
///
2944
/// ```python
@@ -163,20 +178,35 @@ fn check_string(checker: &Checker, literal: &StringLiteral, module: RegexModule,
163178
let range = literal.range;
164179
let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
165180

166-
if
167-
// The (no-op) `u` prefix is a syntax error when combined with `r`
168-
!literal.flags.prefix().is_unicode()
169-
// We are looking for backslash characters
170-
// in the raw source code here, because `\n`
171-
// gets converted to a single character already
172-
// at the lexing stage.
173-
&&!checker.locator().slice(literal.range()).contains('\\')
174-
{
175-
diagnostic.set_fix(Fix::safe_edit(Edit::insertion(
176-
"r".to_string(),
177-
literal.range().start(),
178-
)));
181+
let Some(applicability) = raw_string_applicability(checker, literal) else {
182+
return;
183+
};
184+
185+
diagnostic.set_fix(Fix::applicable_edit(
186+
Edit::insertion("r".to_string(), literal.range().start()),
187+
applicability,
188+
));
189+
}
190+
191+
/// Check how same it is to prepend the `r` prefix to the sting.
192+
///
193+
/// ## Returns
194+
/// * `None` if the prefix cannot be added,
195+
/// * `Some(a)` if it can be added with applicability `a`.
196+
fn raw_string_applicability(checker: &Checker, literal: &StringLiteral) -> Option<Applicability> {
197+
if literal.flags.prefix().is_unicode() {
198+
// The (no-op) `u` prefix is a syntax error when combined with `r`
199+
return None;
179200
}
201+
202+
raw_applicability(checker, literal.range(), |escaped| {
203+
matches!(
204+
escaped,
205+
Some('a' | 'f' | 'n' | 'r' | 't' | 'u' | 'U' | 'v' | 'x')
206+
) || checker.target_version() >= PythonVersion::PY38 && escaped.is_some_and(|c| c == 'N')
207+
})
208+
209+
// re.compile("\a\f\n\N{Partial Differential}\r\t\u27F2\U0001F0A1\v\x41") # with unsafe fix
180210
}
181211

182212
fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, func: &str) {
@@ -187,5 +217,53 @@ fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, f
187217
let kind = PatternKind::Bytes;
188218
let func = func.to_string();
189219
let range = literal.range;
190-
checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
220+
let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
221+
222+
let Some(applicability) = raw_byte_applicability(checker, literal) else {
223+
return;
224+
};
225+
226+
diagnostic.set_fix(Fix::applicable_edit(
227+
Edit::insertion("r".to_string(), literal.range().start()),
228+
applicability,
229+
));
230+
}
231+
232+
/// Check how same it is to prepend the `r` prefix to the byte sting.
233+
///
234+
/// ## Returns
235+
/// * `None` if the prefix cannot be added,
236+
/// * `Some(a)` if it can be added with applicability `a`.
237+
fn raw_byte_applicability(checker: &Checker, literal: &BytesLiteral) -> Option<Applicability> {
238+
raw_applicability(checker, literal.range(), |escaped| {
239+
matches!(escaped, Some('a' | 'f' | 'n' | 'r' | 't' | 'v' | 'x'))
240+
})
241+
}
242+
243+
fn raw_applicability(
244+
checker: &Checker,
245+
literal_range: TextRange,
246+
match_allowed_escape_sequence: impl Fn(Option<char>) -> bool,
247+
) -> Option<Applicability> {
248+
let mut found_slash = false;
249+
let mut chars = checker.locator().slice(literal_range).chars().peekable();
250+
while let Some(char) = chars.next() {
251+
if char == '\\' {
252+
found_slash = true;
253+
// Turning `"\uXXXX"` into `r"\uXXXX"` is behaviorally equivalent when passed
254+
// to `re`, however, it's not exactly the same runtime value.
255+
// Similarly, for the other escape sequences.
256+
if !match_allowed_escape_sequence(chars.peek().copied()) {
257+
// If the next character is not one of whitelisted one, we likely cannot safely turn
258+
// this into a raw string.
259+
return None;
260+
}
261+
}
262+
}
263+
264+
Some(if found_slash {
265+
Applicability::Unsafe
266+
} else {
267+
Applicability::Safe
268+
})
191269
}

0 commit comments

Comments
 (0)