@@ -52,10 +52,8 @@ pub enum EscapeError {
52
52
53
53
/// Unicode escape code in byte literal.
54
54
UnicodeEscapeInByte ,
55
- /// Non-ascii character in byte literal.
55
+ /// Non-ascii character in byte literal, byte string literal, or raw byte string literal .
56
56
NonAsciiCharInByte ,
57
- /// Non-ascii character in byte string literal.
58
- NonAsciiCharInByteString ,
59
57
60
58
/// After a line ending with '\', the next line contains whitespace
61
59
/// characters that are not skipped.
@@ -78,54 +76,37 @@ impl EscapeError {
78
76
/// Takes a contents of a literal (without quotes) and produces a
79
77
/// sequence of escaped characters or errors.
80
78
/// Values are returned through invoking of the provided callback.
81
- pub fn unescape_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
79
+ pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
82
80
where
83
81
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
84
82
{
85
83
match mode {
86
84
Mode :: Char | Mode :: Byte => {
87
- let mut chars = literal_text. chars ( ) ;
88
- let result = unescape_char_or_byte ( & mut chars, mode) ;
89
- // The Chars iterator moved forward.
90
- callback ( 0 ..( literal_text. len ( ) - chars. as_str ( ) . len ( ) ) , result) ;
85
+ let mut chars = src. chars ( ) ;
86
+ let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
87
+ callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
91
88
}
92
- Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( literal_text, mode, callback) ,
93
- // NOTE: Raw strings do not perform any explicit character escaping, here we
94
- // only translate CRLF to LF and produce errors on bare CR.
89
+ Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( src, mode == Mode :: ByteStr , callback) ,
95
90
Mode :: RawStr | Mode :: RawByteStr => {
96
- unescape_raw_str_or_raw_byte_str ( literal_text , mode, callback)
91
+ unescape_raw_str_or_raw_byte_str ( src , mode == Mode :: RawByteStr , callback)
97
92
}
98
93
}
99
94
}
100
95
101
- /// Takes a contents of a byte, byte string or raw byte string (without quotes)
102
- /// and produces a sequence of bytes or errors.
103
- /// Values are returned through invoking of the provided callback.
104
- pub fn unescape_byte_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
105
- where
106
- F : FnMut ( Range < usize > , Result < u8 , EscapeError > ) ,
107
- {
108
- debug_assert ! ( mode. is_bytes( ) ) ;
109
- unescape_literal ( literal_text, mode, & mut |range, result| {
110
- callback ( range, result. map ( byte_from_char) ) ;
111
- } )
112
- }
113
-
114
96
/// Takes a contents of a char literal (without quotes), and returns an
115
97
/// unescaped char or an error
116
- pub fn unescape_char ( literal_text : & str ) -> Result < char , ( usize , EscapeError ) > {
117
- let mut chars = literal_text. chars ( ) ;
118
- unescape_char_or_byte ( & mut chars, Mode :: Char )
119
- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
98
+ pub fn unescape_char ( src : & str ) -> Result < char , ( usize , EscapeError ) > {
99
+ let mut chars = src. chars ( ) ;
100
+ unescape_char_or_byte ( & mut chars, false ) . map_err ( |err| ( src. len ( ) - chars. as_str ( ) . len ( ) , err) )
120
101
}
121
102
122
103
/// Takes a contents of a byte literal (without quotes), and returns an
123
104
/// unescaped byte or an error.
124
- pub fn unescape_byte ( literal_text : & str ) -> Result < u8 , ( usize , EscapeError ) > {
125
- let mut chars = literal_text . chars ( ) ;
126
- unescape_char_or_byte ( & mut chars, Mode :: Byte )
105
+ pub fn unescape_byte ( src : & str ) -> Result < u8 , ( usize , EscapeError ) > {
106
+ let mut chars = src . chars ( ) ;
107
+ unescape_char_or_byte ( & mut chars, true )
127
108
. map ( byte_from_char)
128
- . map_err ( |err| ( literal_text . len ( ) - chars. as_str ( ) . len ( ) , err) )
109
+ . map_err ( |err| ( src . len ( ) - chars. as_str ( ) . len ( ) , err) )
129
110
}
130
111
131
112
/// What kind of literal do we parse.
@@ -147,20 +128,17 @@ impl Mode {
147
128
}
148
129
}
149
130
150
- pub fn is_bytes ( self ) -> bool {
131
+ pub fn is_byte ( self ) -> bool {
151
132
match self {
152
133
Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
153
134
Mode :: Char | Mode :: Str | Mode :: RawStr => false ,
154
135
}
155
136
}
156
137
}
157
138
158
- fn scan_escape ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
139
+ fn scan_escape ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
159
140
// Previous character was '\\', unescape what follows.
160
-
161
- let second_char = chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ?;
162
-
163
- let res = match second_char {
141
+ let res = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
164
142
'"' => '"' ,
165
143
'n' => '\n' ,
166
144
'r' => '\r' ,
@@ -181,7 +159,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
181
159
let value = hi * 16 + lo;
182
160
183
161
// For a non-byte literal verify that it is within ASCII range.
184
- if !mode . is_bytes ( ) && !is_ascii ( value) {
162
+ if !is_byte && !is_ascii ( value) {
185
163
return Err ( EscapeError :: OutOfRangeHexEscape ) ;
186
164
}
187
165
let value = value as u8 ;
@@ -217,7 +195,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
217
195
218
196
// Incorrect syntax has higher priority for error reporting
219
197
// than unallowed value for a literal.
220
- if mode . is_bytes ( ) {
198
+ if is_byte {
221
199
return Err ( EscapeError :: UnicodeEscapeInByte ) ;
222
200
}
223
201
@@ -249,23 +227,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
249
227
}
250
228
251
229
#[ inline]
252
- fn ascii_check ( first_char : char , mode : Mode ) -> Result < char , EscapeError > {
253
- if mode . is_bytes ( ) && !first_char . is_ascii ( ) {
230
+ fn ascii_check ( c : char , is_byte : bool ) -> Result < char , EscapeError > {
231
+ if is_byte && !c . is_ascii ( ) {
254
232
// Byte literal can't be a non-ascii character.
255
233
Err ( EscapeError :: NonAsciiCharInByte )
256
234
} else {
257
- Ok ( first_char )
235
+ Ok ( c )
258
236
}
259
237
}
260
238
261
- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
262
- debug_assert ! ( mode == Mode :: Char || mode == Mode :: Byte ) ;
263
- let first_char = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
264
- let res = match first_char {
265
- '\\' => scan_escape ( chars, mode) ,
239
+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
240
+ let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
241
+ let res = match c {
242
+ '\\' => scan_escape ( chars, is_byte) ,
266
243
'\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
267
244
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
268
- _ => ascii_check ( first_char , mode ) ,
245
+ _ => ascii_check ( c , is_byte ) ,
269
246
} ?;
270
247
if chars. next ( ) . is_some ( ) {
271
248
return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -275,20 +252,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
275
252
276
253
/// Takes a contents of a string literal (without quotes) and produces a
277
254
/// sequence of escaped characters or errors.
278
- fn unescape_str_or_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
255
+ fn unescape_str_or_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
279
256
where
280
257
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
281
258
{
282
- debug_assert ! ( mode == Mode :: Str || mode == Mode :: ByteStr ) ;
283
- let initial_len = src. len ( ) ;
284
259
let mut chars = src. chars ( ) ;
285
- while let Some ( first_char) = chars. next ( ) {
286
- let start = initial_len - chars. as_str ( ) . len ( ) - first_char. len_utf8 ( ) ;
287
260
288
- let unescaped_char = match first_char {
261
+ // The `start` and `end` computation here is complicated because
262
+ // `skip_ascii_whitespace` makes us to skip over chars without counting
263
+ // them in the range computation.
264
+ while let Some ( c) = chars. next ( ) {
265
+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
266
+ let res = match c {
289
267
'\\' => {
290
- let second_char = chars. clone ( ) . next ( ) ;
291
- match second_char {
268
+ match chars. clone ( ) . next ( ) {
292
269
Some ( '\n' ) => {
293
270
// Rust language specification requires us to skip whitespaces
294
271
// if unescaped '\' character is followed by '\n'.
@@ -297,17 +274,17 @@ where
297
274
skip_ascii_whitespace ( & mut chars, start, callback) ;
298
275
continue ;
299
276
}
300
- _ => scan_escape ( & mut chars, mode ) ,
277
+ _ => scan_escape ( & mut chars, is_byte ) ,
301
278
}
302
279
}
303
280
'\n' => Ok ( '\n' ) ,
304
281
'\t' => Ok ( '\t' ) ,
305
282
'"' => Err ( EscapeError :: EscapeOnlyChar ) ,
306
283
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
307
- _ => ascii_check ( first_char , mode ) ,
284
+ _ => ascii_check ( c , is_byte ) ,
308
285
} ;
309
- let end = initial_len - chars. as_str ( ) . len ( ) ;
310
- callback ( start..end, unescaped_char ) ;
286
+ let end = src . len ( ) - chars. as_str ( ) . len ( ) ;
287
+ callback ( start..end, res ) ;
311
288
}
312
289
313
290
fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
@@ -340,30 +317,29 @@ where
340
317
/// Takes a contents of a string literal (without quotes) and produces a
341
318
/// sequence of characters or errors.
342
319
/// NOTE: Raw strings do not perform any explicit character escaping, here we
343
- /// only translate CRLF to LF and produce errors on bare CR.
344
- fn unescape_raw_str_or_raw_byte_str < F > ( literal_text : & str , mode : Mode , callback : & mut F )
320
+ /// only produce errors on bare CR.
321
+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
345
322
where
346
323
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
347
324
{
348
- debug_assert ! ( mode == Mode :: RawStr || mode == Mode :: RawByteStr ) ;
349
- let initial_len = literal_text. len ( ) ;
350
-
351
- let mut chars = literal_text. chars ( ) ;
352
- while let Some ( curr) = chars. next ( ) {
353
- let start = initial_len - chars. as_str ( ) . len ( ) - curr. len_utf8 ( ) ;
325
+ let mut chars = src. chars ( ) ;
354
326
355
- let result = match curr {
327
+ // The `start` and `end` computation here matches the one in
328
+ // `unescape_str_or_byte_str` for consistency, even though this function
329
+ // doesn't have to worry about skipping any chars.
330
+ while let Some ( c) = chars. next ( ) {
331
+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
332
+ let res = match c {
356
333
'\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
357
- c if mode. is_bytes ( ) && !c. is_ascii ( ) => Err ( EscapeError :: NonAsciiCharInByteString ) ,
358
- c => Ok ( c) ,
334
+ _ => ascii_check ( c, is_byte) ,
359
335
} ;
360
- let end = initial_len - chars. as_str ( ) . len ( ) ;
361
-
362
- callback ( start..end, result) ;
336
+ let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
337
+ callback ( start..end, res) ;
363
338
}
364
339
}
365
340
366
- fn byte_from_char ( c : char ) -> u8 {
341
+ #[ inline]
342
+ pub fn byte_from_char ( c : char ) -> u8 {
367
343
let res = c as u32 ;
368
344
debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode::ByteStr" ) ;
369
345
res as u8
0 commit comments