Skip to content

Commit 2293a91

Browse files
committed
[ruff] Offer fixes for RUF039 in more cases
1 parent db3dcd8 commit 2293a91

9 files changed

+617
-27
lines changed

crates/ruff_linter/resources/test/fixtures/ruff/RUF039.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,16 @@
5353
regex.splititer(both, non_literal)
5454
regex.subf(f, lambda _: r'means', '"format"')
5555
regex.subfn(fn, f'''a$1n't''', lambda: "'function'")
56+
57+
58+
# https://github.com/astral-sh/ruff/issues/16713
59+
re.compile("\a\f\n\r\t\u27F2\U0001F0A1\v\x41") # with unsafe fix
60+
re.compile("\b") # without fix
61+
re.compile("\"") # without fix
62+
re.compile("\'") # without fix
63+
re.compile('\"') # without fix
64+
re.compile('\'') # without fix
65+
re.compile("\\") # without fix
66+
re.compile("\101") # without fix
67+
re.compile("a\
68+
b") # without fix

crates/ruff_linter/resources/test/fixtures/ruff/RUF039_concat.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,20 @@
9191
br''br""br''
9292
)
9393
regex.subfn(br'I\s\nee*d\s[O0o]me\x20\Qoffe\E, ' br'b')
94+
95+
96+
# https://github.com/astral-sh/ruff/issues/16713
97+
re.compile(
98+
"["
99+
"\U0001F600-\U0001F64F" # emoticons
100+
"\U0001F300-\U0001F5FF" # symbols & pictographs
101+
"\U0001F680-\U0001F6FF" # transport & map symbols
102+
"\U0001F1E0-\U0001F1FF" # flags (iOS)
103+
"\U00002702-\U000027B0"
104+
"\U000024C2-\U0001F251"
105+
"\u200d" # zero width joiner
106+
"\u200c" # zero width non-joiner
107+
"\\u200c" # must not be escaped in a raw string
108+
"]+",
109+
flags=re.UNICODE,
110+
)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import re
2+
3+
re.compile("\N{Partial Differential}") # with unsafe fix if python target is 3.8 or higher, else without fix

crates/ruff_linter/src/rules/ruff/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,44 @@ mod tests {
554554
Ok(())
555555
}
556556

557+
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
558+
fn preview_rules_py37(rule_code: Rule, path: &Path) -> Result<()> {
559+
let snapshot = format!(
560+
"preview__py37__{}_{}",
561+
rule_code.noqa_code(),
562+
path.to_string_lossy()
563+
);
564+
let diagnostics = test_path(
565+
Path::new("ruff").join(path).as_path(),
566+
&settings::LinterSettings {
567+
preview: PreviewMode::Enabled,
568+
unresolved_target_version: PythonVersion::PY37.into(),
569+
..settings::LinterSettings::for_rule(rule_code)
570+
},
571+
)?;
572+
assert_diagnostics!(snapshot, diagnostics);
573+
Ok(())
574+
}
575+
576+
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
577+
fn preview_rules_py38(rule_code: Rule, path: &Path) -> Result<()> {
578+
let snapshot = format!(
579+
"preview__py38__{}_{}",
580+
rule_code.noqa_code(),
581+
path.to_string_lossy()
582+
);
583+
let diagnostics = test_path(
584+
Path::new("ruff").join(path).as_path(),
585+
&settings::LinterSettings {
586+
preview: PreviewMode::Enabled,
587+
unresolved_target_version: PythonVersion::PY38.into(),
588+
..settings::LinterSettings::for_rule(rule_code)
589+
},
590+
)?;
591+
assert_diagnostics!(snapshot, diagnostics);
592+
Ok(())
593+
}
594+
557595
#[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"^_+", 1)]
558596
#[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"", 2)]
559597
fn custom_regexp_preset(

crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::fmt::{Display, Formatter};
22
use std::str::FromStr;
33

4+
use ruff_diagnostics::Applicability;
45
use ruff_macros::{ViolationMetadata, derive_message_formats};
56
use ruff_python_ast::{
6-
BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, StringLiteral,
7+
BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, PythonVersion, StringLiteral,
78
};
89
use ruff_python_semantic::{Modules, SemanticModel};
9-
10-
use ruff_text_size::Ranged;
10+
use ruff_text_size::{Ranged, TextRange};
1111

1212
use crate::checkers::ast::Checker;
1313
use crate::{Edit, Fix, FixAvailability, Violation};
@@ -163,20 +163,35 @@ fn check_string(checker: &Checker, literal: &StringLiteral, module: RegexModule,
163163
let range = literal.range;
164164
let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
165165

166-
if
167-
// The (no-op) `u` prefix is a syntax error when combined with `r`
168-
!literal.flags.prefix().is_unicode()
169-
// We are looking for backslash characters
170-
// in the raw source code here, because `\n`
171-
// gets converted to a single character already
172-
// at the lexing stage.
173-
&&!checker.locator().slice(literal.range()).contains('\\')
174-
{
175-
diagnostic.set_fix(Fix::safe_edit(Edit::insertion(
176-
"r".to_string(),
177-
literal.range().start(),
178-
)));
166+
let Some(applicability) = raw_string_applicability(checker, literal) else {
167+
return;
168+
};
169+
170+
diagnostic.set_fix(Fix::applicable_edit(
171+
Edit::insertion("r".to_string(), literal.range().start()),
172+
applicability,
173+
));
174+
}
175+
176+
/// Check how same it is to prepend the `r` prefix to the sting.
177+
///
178+
/// ## Returns
179+
/// * `None` if the prefix cannot be added,
180+
/// * `Some(a)` if it can be added with applicability `a`.
181+
fn raw_string_applicability(checker: &Checker, literal: &StringLiteral) -> Option<Applicability> {
182+
if literal.flags.prefix().is_unicode() {
183+
// The (no-op) `u` prefix is a syntax error when combined with `r`
184+
return None;
179185
}
186+
187+
raw_applicability(checker, literal.range(), |escaped| {
188+
matches!(
189+
escaped,
190+
Some('a' | 'f' | 'n' | 'r' | 't' | 'u' | 'U' | 'v' | 'x')
191+
) || checker.target_version() >= PythonVersion::PY38 && escaped.is_some_and(|c| c == 'N')
192+
})
193+
194+
// re.compile("\a\f\n\N{Partial Differential}\r\t\u27F2\U0001F0A1\v\x41") # with unsafe fix
180195
}
181196

182197
fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, func: &str) {
@@ -187,5 +202,53 @@ fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, f
187202
let kind = PatternKind::Bytes;
188203
let func = func.to_string();
189204
let range = literal.range;
190-
checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
205+
let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
206+
207+
let Some(applicability) = raw_byte_applicability(checker, literal) else {
208+
return;
209+
};
210+
211+
diagnostic.set_fix(Fix::applicable_edit(
212+
Edit::insertion("r".to_string(), literal.range().start()),
213+
applicability,
214+
));
215+
}
216+
217+
/// Check how same it is to prepend the `r` prefix to the byte sting.
218+
///
219+
/// ## Returns
220+
/// * `None` if the prefix cannot be added,
221+
/// * `Some(a)` if it can be added with applicability `a`.
222+
fn raw_byte_applicability(checker: &Checker, literal: &BytesLiteral) -> Option<Applicability> {
223+
raw_applicability(checker, literal.range(), |escaped| {
224+
matches!(escaped, Some('a' | 'f' | 'n' | 'r' | 't' | 'v' | 'x'))
225+
})
226+
}
227+
228+
fn raw_applicability(
229+
checker: &Checker,
230+
literal_range: TextRange,
231+
match_allowed_escape_sequence: impl Fn(Option<char>) -> bool,
232+
) -> Option<Applicability> {
233+
let mut found_slash = false;
234+
let mut chars = checker.locator().slice(literal_range).chars().peekable();
235+
while let Some(char) = chars.next() {
236+
if char == '\\' {
237+
found_slash = true;
238+
// Turning `"\uXXXX"` into `r"\uXXXX"` is behaviorally equivalent when passed
239+
// to `re`, however, it's not exactly the same runtime value.
240+
// Similarly, for the other escape sequences.
241+
if !match_allowed_escape_sequence(chars.peek().copied()) {
242+
// If the next character is not one of whitelisted one, we likely cannot safely turn
243+
// this into a raw string.
244+
return None;
245+
}
246+
}
247+
}
248+
249+
Some(if found_slash {
250+
Applicability::Unsafe
251+
} else {
252+
Applicability::Safe
253+
})
191254
}

0 commit comments

Comments
 (0)