astral-sh · ntBre · Jul 24, 2025 · Jul 1, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF039.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF039.py
@@ -53,3 +53,16 @@
 regex.splititer(both, non_literal)
 regex.subf(f, lambda _: r'means', '"format"')
 regex.subfn(fn, f'''a$1n't''', lambda: "'function'")
+
+
+# https://github.com/astral-sh/ruff/issues/16713
+re.compile("\a\f\n\r\t\u27F2\U0001F0A1\v\x41")  # with unsafe fix
+re.compile("\b")  # without fix
+re.compile("\"")  # without fix
+re.compile("\'")  # without fix
+re.compile('\"')  # without fix
+re.compile('\'')  # without fix
+re.compile("\\")  # without fix
+re.compile("\101")  # without fix
+re.compile("a\
+b")  # without fix
diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF039_concat.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF039_concat.py
@@ -91,3 +91,20 @@
     br''br""br''
 )
 regex.subfn(br'I\s\nee*d\s[O0o]me\x20\Qoffe\E, ' br'b')
+
+
+# https://github.com/astral-sh/ruff/issues/16713
+re.compile(
+    "["
+    "\U0001F600-\U0001F64F"  # emoticons
+    "\U0001F300-\U0001F5FF"  # symbols & pictographs
+    "\U0001F680-\U0001F6FF"  # transport & map symbols
+    "\U0001F1E0-\U0001F1FF"  # flags (iOS)
+    "\U00002702-\U000027B0"
+    "\U000024C2-\U0001F251"
+    "\u200d"  # zero width joiner
+    "\u200c"  # zero width non-joiner
+    "\\u200c" # must not be escaped in a raw string
+    "]+",
+    flags=re.UNICODE,
+)
diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF039_py_version_sensitive.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF039_py_version_sensitive.py
@@ -0,0 +1,3 @@
+import re
+
+re.compile("\N{Partial Differential}")  # with unsafe fix if python target is 3.8 or higher, else without fix
diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs
@@ -554,6 +554,44 @@ mod tests {
         Ok(())
     }
 
+    #[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
+    fn preview_rules_py37(rule_code: Rule, path: &Path) -> Result<()> {
+        let snapshot = format!(
+            "preview__py37__{}_{}",
+            rule_code.noqa_code(),
+            path.to_string_lossy()
+        );
+        let diagnostics = test_path(
+            Path::new("ruff").join(path).as_path(),
+            &settings::LinterSettings {
+                preview: PreviewMode::Enabled,
+                unresolved_target_version: PythonVersion::PY37.into(),
+                ..settings::LinterSettings::for_rule(rule_code)
+            },
+        )?;
+        assert_diagnostics!(snapshot, diagnostics);
+        Ok(())
+    }
+
+    #[test_case(Rule::UnrawRePattern, Path::new("RUF039_py_version_sensitive.py"))]
+    fn preview_rules_py38(rule_code: Rule, path: &Path) -> Result<()> {
+        let snapshot = format!(
+            "preview__py38__{}_{}",
+            rule_code.noqa_code(),
+            path.to_string_lossy()
+        );
+        let diagnostics = test_path(
+            Path::new("ruff").join(path).as_path(),
+            &settings::LinterSettings {
+                preview: PreviewMode::Enabled,
+                unresolved_target_version: PythonVersion::PY38.into(),
+                ..settings::LinterSettings::for_rule(rule_code)
+            },
+        )?;
+        assert_diagnostics!(snapshot, diagnostics);
+        Ok(())
+    }
+
     #[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"^_+", 1)]
     #[test_case(Rule::UsedDummyVariable, Path::new("RUF052.py"), r"", 2)]
     fn custom_regexp_preset(

diff --git a/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs b/crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs
@@ -1,13 +1,13 @@
 use std::fmt::{Display, Formatter};
 use std::str::FromStr;
 
+use ruff_diagnostics::Applicability;
 use ruff_macros::{ViolationMetadata, derive_message_formats};
 use ruff_python_ast::{
-    BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, StringLiteral,
+    BytesLiteral, Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral, PythonVersion, StringLiteral,
 };
 use ruff_python_semantic::{Modules, SemanticModel};
-
-use ruff_text_size::Ranged;
+use ruff_text_size::{Ranged, TextRange};
 
 use crate::checkers::ast::Checker;
 use crate::{Edit, Fix, FixAvailability, Violation};
@@ -24,6 +24,29 @@ use crate::{Edit, Fix, FixAvailability, Violation};
 /// Regular expressions should be written
 /// using raw strings to avoid double escaping.
 ///
+/// ## Fix safety
+/// The fix is unsafe if the string/bytes literal contains an escape sequence because the fix alters
+/// the runtime value of the literal while retaining the regex semantics.
+///
+/// For example
+/// ```python
+/// # Literal is `1\n2`.
+/// re.compile("1\n2")
+///
+/// # Literal is `1\\n2`, but the regex library will interpret `\\n` and will still match a newline
+/// # character as before.
+/// re.compile(r"1\n2")
+/// ```
+///
+/// ## Fix availability
+///  A fix is not available if either
+///  * the argument is a string with a (no-op) `u` prefix (e.g., `u"foo"`) as the prefix is
+///    incompatible with the raw prefix `r`
+///  * the argument is a string or bytes literal with an escape sequence that has a different
+///    meaning in the context of a regular expression such as `\b`, which is word boundary or
+///    backspace in a regex, depending on the context, but always a backspace in string and bytes
+///    literals.
+///
 /// ## Example
 ///
 /// ```python
@@ -163,20 +186,44 @@ fn check_string(checker: &Checker, literal: &StringLiteral, module: RegexModule,
     let range = literal.range;
     let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
 
-    if
-    // The (no-op) `u` prefix is a syntax error when combined with `r`
-    !literal.flags.prefix().is_unicode()
-    // We are looking for backslash characters
-    // in the raw source code here, because `\n`
-    // gets converted to a single character already
-    // at the lexing stage.
-    &&!checker.locator().slice(literal.range()).contains('\\')
-    {
-        diagnostic.set_fix(Fix::safe_edit(Edit::insertion(
-            "r".to_string(),
-            literal.range().start(),
-        )));
+    let Some(applicability) = raw_string_applicability(checker, literal) else {
+        return;
+    };
+
+    diagnostic.set_fix(Fix::applicable_edit(
+        Edit::insertion("r".to_string(), literal.range().start()),
+        applicability,
+    ));
+}
+
+/// Check how safe it is to prepend the `r` prefix to the string.
+///
+/// ## Returns
+///  * `None` if the prefix cannot be added,
+///  * `Some(a)` if it can be added with applicability `a`.
+fn raw_string_applicability(checker: &Checker, literal: &StringLiteral) -> Option<Applicability> {
+    if literal.flags.prefix().is_unicode() {
+        // The (no-op) `u` prefix is a syntax error when combined with `r`
+        return None;
     }
+
+    if checker.target_version() >= PythonVersion::PY38 {
+        raw_applicability(checker, literal.range(), |escaped| {
+            matches!(
+                escaped,
+                Some('a' | 'f' | 'n' | 'r' | 't' | 'u' | 'U' | 'v' | 'x' | 'N')
+            )
+        })
+    } else {
+        raw_applicability(checker, literal.range(), |escaped| {
+            matches!(
+                escaped,
+                Some('a' | 'f' | 'n' | 'r' | 't' | 'u' | 'U' | 'v' | 'x')
+            )
+        })
+    }
+
+    // re.compile("\a\f\n\N{Partial Differential}\r\t\u27F2\U0001F0A1\v\x41")  # with unsafe fix
 }
 
 fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, func: &str) {
@@ -187,5 +234,53 @@ fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, f
     let kind = PatternKind::Bytes;
     let func = func.to_string();
     let range = literal.range;
-    checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
+    let mut diagnostic = checker.report_diagnostic(UnrawRePattern { module, func, kind }, range);
+
+    let Some(applicability) = raw_byte_applicability(checker, literal) else {
+        return;
+    };
+
+    diagnostic.set_fix(Fix::applicable_edit(
+        Edit::insertion("r".to_string(), literal.range().start()),
+        applicability,
+    ));
+}
+
+/// Check how same it is to prepend the `r` prefix to the byte sting.
+///
+/// ## Returns
+///  * `None` if the prefix cannot be added,
+///  * `Some(a)` if it can be added with applicability `a`.
+fn raw_byte_applicability(checker: &Checker, literal: &BytesLiteral) -> Option<Applicability> {
+    raw_applicability(checker, literal.range(), |escaped| {
+        matches!(escaped, Some('a' | 'f' | 'n' | 'r' | 't' | 'v' | 'x'))
+    })
+}
+
+fn raw_applicability(
+    checker: &Checker,
+    literal_range: TextRange,
+    match_allowed_escape_sequence: impl Fn(Option<char>) -> bool,
+) -> Option<Applicability> {
+    let mut found_slash = false;
+    let mut chars = checker.locator().slice(literal_range).chars().peekable();
+    while let Some(char) = chars.next() {
+        if char == '\\' {
+            found_slash = true;
+            // Turning `"\uXXXX"` into `r"\uXXXX"` is behaviorally equivalent when passed
+            // to `re`, however, it's not exactly the same runtime value.
+            // Similarly, for the other escape sequences.
+            if !match_allowed_escape_sequence(chars.peek().copied()) {
+                // If the next character is not one of the whitelisted ones, we likely cannot safely turn
+                // this into a raw string.
+                return None;
+            }
+        }
+    }
 for (c, next) in lit.value.chars().tuple_windows() { 
     // `\0` (or any other ASCII digit) and `\g` have special meaning in `repl` strings. 
     // Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes 
     // `re.PatternError` to be raised at runtime. 
     // 
     // If we see that the escaped character is an alphanumeric ASCII character, 
     // we should only emit a diagnostic suggesting to replace the `re.sub()` call with 
     // `str.replace`if we can detect that the escaped character is one that is both 
     // valid in a `repl` string *and* does not have any special meaning in a REPL string. 
     // 
     // It's out of scope for this rule to change invalid `re.sub()` calls into something 
     // that would not raise an exception at runtime. They should be left as-is. 
     if c == '\\' && next.is_ascii_alphanumeric() { 
         if "abfnrtv".contains(next) { 
             fixable = false; 
         } else { 
             return None; 
         } 
     } 
 } 
 for (c, next) in lit.value.chars().tuple_windows() { 
     // `\0` (or any other ASCII digit) and `\g` have special meaning in `repl` strings. 
     // Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes 
     // `re.PatternError` to be raised at runtime. 
     // 
     // If we see that the escaped character is an alphanumeric ASCII character, 
     // we should only emit a diagnostic suggesting to replace the `re.sub()` call with 
     // `str.replace`if we can detect that the escaped character is one that is both 
     // valid in a `repl` string *and* does not have any special meaning in a REPL string. 
     // 
     // It's out of scope for this rule to change invalid `re.sub()` calls into something 
     // that would not raise an exception at runtime. They should be left as-is. 
     if c == '\\' && next.is_ascii_alphanumeric() { 
         if "abfnrtv".contains(next) { 
             fixable = false; 
         } else { 
             return None; 
         } 
     } 
 } 
+
+    Some(if found_slash {
+        Applicability::Unsafe
+    } else {
+        Applicability::Safe
+    })
 }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import re

		re.compile("\N{Partial Differential}") # with unsafe fix if python target is 3.8 or higher, else without fix