11use std:: fmt:: { Display , Formatter } ;
22use std:: str:: FromStr ;
33
4+ use ruff_diagnostics:: Applicability ;
45use ruff_macros:: { ViolationMetadata , derive_message_formats} ;
56use ruff_python_ast:: {
6- BytesLiteral , Expr , ExprBytesLiteral , ExprCall , ExprStringLiteral , StringLiteral ,
7+ BytesLiteral , Expr , ExprBytesLiteral , ExprCall , ExprStringLiteral , PythonVersion , StringLiteral ,
78} ;
89use ruff_python_semantic:: { Modules , SemanticModel } ;
9-
10- use ruff_text_size:: Ranged ;
10+ use ruff_text_size:: { Ranged , TextRange } ;
1111
1212use crate :: checkers:: ast:: Checker ;
1313use crate :: { Edit , Fix , FixAvailability , Violation } ;
@@ -163,20 +163,35 @@ fn check_string(checker: &Checker, literal: &StringLiteral, module: RegexModule,
163163 let range = literal. range ;
164164 let mut diagnostic = checker. report_diagnostic ( UnrawRePattern { module, func, kind } , range) ;
165165
166- if
167- // The (no-op) `u` prefix is a syntax error when combined with `r`
168- !literal. flags . prefix ( ) . is_unicode ( )
169- // We are looking for backslash characters
170- // in the raw source code here, because `\n`
171- // gets converted to a single character already
172- // at the lexing stage.
173- &&!checker. locator ( ) . slice ( literal. range ( ) ) . contains ( '\\' )
174- {
175- diagnostic. set_fix ( Fix :: safe_edit ( Edit :: insertion (
176- "r" . to_string ( ) ,
177- literal. range ( ) . start ( ) ,
178- ) ) ) ;
166+ let Some ( applicability) = raw_string_applicability ( checker, literal) else {
167+ return ;
168+ } ;
169+
170+ diagnostic. set_fix ( Fix :: applicable_edit (
171+ Edit :: insertion ( "r" . to_string ( ) , literal. range ( ) . start ( ) ) ,
172+ applicability,
173+ ) ) ;
174+ }
175+
176+ /// Check how same it is to prepend the `r` prefix to the sting.
177+ ///
178+ /// ## Returns
179+ /// * `None` if the prefix cannot be added,
180+ /// * `Some(a)` if it can be added with applicability `a`.
181+ fn raw_string_applicability ( checker : & Checker , literal : & StringLiteral ) -> Option < Applicability > {
182+ if literal. flags . prefix ( ) . is_unicode ( ) {
183+ // The (no-op) `u` prefix is a syntax error when combined with `r`
184+ return None ;
179185 }
186+
187+ raw_applicability ( checker, literal. range ( ) , |escaped| {
188+ matches ! (
189+ escaped,
190+ Some ( 'a' | 'f' | 'n' | 'r' | 't' | 'u' | 'U' | 'v' | 'x' )
191+ ) || checker. target_version ( ) >= PythonVersion :: PY38 && escaped. is_some_and ( |c| c == 'N' )
192+ } )
193+
194+ // re.compile("\a\f\n\N{Partial Differential}\r\t\u27F2\U0001F0A1\v\x41") # with unsafe fix
180195}
181196
182197fn check_bytes ( checker : & Checker , literal : & BytesLiteral , module : RegexModule , func : & str ) {
@@ -187,5 +202,53 @@ fn check_bytes(checker: &Checker, literal: &BytesLiteral, module: RegexModule, f
187202 let kind = PatternKind :: Bytes ;
188203 let func = func. to_string ( ) ;
189204 let range = literal. range ;
190- checker. report_diagnostic ( UnrawRePattern { module, func, kind } , range) ;
205+ let mut diagnostic = checker. report_diagnostic ( UnrawRePattern { module, func, kind } , range) ;
206+
207+ let Some ( applicability) = raw_byte_applicability ( checker, literal) else {
208+ return ;
209+ } ;
210+
211+ diagnostic. set_fix ( Fix :: applicable_edit (
212+ Edit :: insertion ( "r" . to_string ( ) , literal. range ( ) . start ( ) ) ,
213+ applicability,
214+ ) ) ;
215+ }
216+
217+ /// Check how same it is to prepend the `r` prefix to the byte sting.
218+ ///
219+ /// ## Returns
220+ /// * `None` if the prefix cannot be added,
221+ /// * `Some(a)` if it can be added with applicability `a`.
222+ fn raw_byte_applicability ( checker : & Checker , literal : & BytesLiteral ) -> Option < Applicability > {
223+ raw_applicability ( checker, literal. range ( ) , |escaped| {
224+ matches ! ( escaped, Some ( 'a' | 'f' | 'n' | 'r' | 't' | 'v' | 'x' ) )
225+ } )
226+ }
227+
228+ fn raw_applicability (
229+ checker : & Checker ,
230+ literal_range : TextRange ,
231+ match_allowed_escape_sequence : impl Fn ( Option < char > ) -> bool ,
232+ ) -> Option < Applicability > {
233+ let mut found_slash = false ;
234+ let mut chars = checker. locator ( ) . slice ( literal_range) . chars ( ) . peekable ( ) ;
235+ while let Some ( char) = chars. next ( ) {
236+ if char == '\\' {
237+ found_slash = true ;
238+ // Turning `"\uXXXX"` into `r"\uXXXX"` is behaviorally equivalent when passed
239+ // to `re`, however, it's not exactly the same runtime value.
240+ // Similarly, for the other escape sequences.
241+ if !match_allowed_escape_sequence ( chars. peek ( ) . copied ( ) ) {
242+ // If the next character is not one of whitelisted one, we likely cannot safely turn
243+ // this into a raw string.
244+ return None ;
245+ }
246+ }
247+ }
248+
249+ Some ( if found_slash {
250+ Applicability :: Unsafe
251+ } else {
252+ Applicability :: Safe
253+ } )
191254}
0 commit comments