22//! turning escape sequences into the values they represent.
33
44use std:: ffi:: CStr ;
5+ use std:: num:: NonZero ;
56use std:: ops:: Range ;
67use std:: str:: Chars ;
78
@@ -105,7 +106,10 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
105106/// and produces a sequence of characters or errors,
106107/// which are returned by invoking `callback`.
107108/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
108- pub fn check_raw_c_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
109+ pub fn check_raw_c_str (
110+ src : & str ,
111+ callback : impl FnMut ( Range < usize > , Result < NonZero < char > , EscapeError > ) ,
112+ ) {
109113 CStr :: check_raw ( src, callback) ;
110114}
111115
@@ -178,14 +182,10 @@ fn char2byte(c: char) -> Result<u8, EscapeError> {
178182}
179183
180184impl CheckRaw for CStr {
181- type RawUnit = char ;
185+ type RawUnit = NonZero < char > ;
182186
183187 fn char2raw_unit ( c : char ) -> Result < Self :: RawUnit , EscapeError > {
184- if c == '\0' {
185- Err ( EscapeError :: NulInCStr )
186- } else {
187- Ok ( c)
188- }
188+ NonZero :: new ( c) . ok_or ( EscapeError :: NulInCStr )
189189 }
190190}
191191
@@ -247,40 +247,63 @@ pub enum MixedUnit {
247247 /// For example, if '¥' appears in a string it is represented here as
248248 /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
249249 /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
250- Char ( char ) ,
250+ Char ( NonZero < char > ) ,
251251
252252 /// Used for high bytes (`\x80`..`\xff`).
253253 ///
254254 /// For example, if `\xa5` appears in a string it is represented here as
255255 /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
256256 /// byte string as the single byte `0xa5`.
257- HighByte ( u8 ) ,
257+ HighByte ( NonZero < u8 > ) ,
258258}
259259
260- impl From < char > for MixedUnit {
261- fn from ( c : char ) -> Self {
260+ impl From < NonZero < char > > for MixedUnit {
261+ fn from ( c : NonZero < char > ) -> Self {
262262 MixedUnit :: Char ( c)
263263 }
264264}
265265
266- impl From < u8 > for MixedUnit {
267- fn from ( n : u8 ) -> Self {
268- if n . is_ascii ( ) {
269- MixedUnit :: Char ( n as char )
266+ impl From < NonZero < u8 > > for MixedUnit {
267+ fn from ( byte : NonZero < u8 > ) -> Self {
268+ if byte . get ( ) . is_ascii ( ) {
269+ MixedUnit :: Char ( NonZero :: new ( byte . get ( ) as char ) . unwrap ( ) )
270270 } else {
271- MixedUnit :: HighByte ( n )
271+ MixedUnit :: HighByte ( byte )
272272 }
273273 }
274274}
275275
276+ impl TryFrom < char > for MixedUnit {
277+ type Error = EscapeError ;
278+
279+ fn try_from ( c : char ) -> Result < Self , EscapeError > {
280+ NonZero :: new ( c)
281+ . map ( MixedUnit :: Char )
282+ . ok_or ( EscapeError :: NulInCStr )
283+ }
284+ }
285+
286+ impl TryFrom < u8 > for MixedUnit {
287+ type Error = EscapeError ;
288+
289+ fn try_from ( byte : u8 ) -> Result < Self , EscapeError > {
290+ NonZero :: new ( byte)
291+ . map ( From :: from)
292+ . ok_or ( EscapeError :: NulInCStr )
293+ }
294+ }
295+
276296/// Trait for unescaping escape sequences in strings
277297trait Unescape {
278298 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
279- type Unit : From < u8 > ;
299+ type Unit ;
280300
281301 /// Result of unescaping the zero char ('\0')
282302 const ZERO_RESULT : Result < Self :: Unit , EscapeError > ;
283303
304+ /// Converts non-zero bytes to the unit type
305+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit ;
306+
284307 /// Converts chars to the unit type
285308 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > ;
286309
@@ -311,18 +334,20 @@ trait Unescape {
311334 if c == '0' {
312335 Self :: ZERO_RESULT
313336 } else {
314- simple_escape ( c) . map ( |b| b. into ( ) ) . or_else ( |c| match c {
315- 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
316- 'u' => Self :: unicode2unit ( {
317- let value = unicode_escape ( chars) ?;
318- if value > char:: MAX as u32 {
319- Err ( EscapeError :: OutOfRangeUnicodeEscape )
320- } else {
321- char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
322- }
323- } ) ,
324- _ => Err ( EscapeError :: InvalidEscape ) ,
325- } )
337+ simple_escape ( c)
338+ . map ( |b| Self :: nonzero_byte2unit ( b) )
339+ . or_else ( |c| match c {
340+ 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
341+ 'u' => Self :: unicode2unit ( {
342+ let value = unicode_escape ( chars) ?;
343+ if value > char:: MAX as u32 {
344+ Err ( EscapeError :: OutOfRangeUnicodeEscape )
345+ } else {
346+ char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
347+ }
348+ } ) ,
349+ _ => Err ( EscapeError :: InvalidEscape ) ,
350+ } )
326351 }
327352 }
328353
@@ -364,9 +389,9 @@ trait Unescape {
364389/// Interpret a non-nul ASCII escape
365390///
366391/// Parses the character of an ASCII escape (except nul) without the leading backslash.
367- fn simple_escape ( c : char ) -> Result < u8 , char > {
392+ fn simple_escape ( c : char ) -> Result < NonZero < u8 > , char > {
368393 // Previous character was '\\', unescape what follows.
369- Ok ( match c {
394+ Ok ( NonZero :: new ( match c {
370395 '"' => b'"' ,
371396 'n' => b'\n' ,
372397 'r' => b'\r' ,
@@ -375,6 +400,7 @@ fn simple_escape(c: char) -> Result<u8, char> {
375400 '\'' => b'\'' ,
376401 _ => Err ( c) ?,
377402 } )
403+ . unwrap ( ) )
378404}
379405
380406/// Interpret a hexadecimal escape
@@ -476,6 +502,10 @@ impl Unescape for str {
476502
477503 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( '\0' ) ;
478504
505+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
506+ b. get ( ) . into ( )
507+ }
508+
479509 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
480510 Ok ( c)
481511 }
@@ -499,6 +529,10 @@ impl Unescape for [u8] {
499529
500530 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( b'\0' ) ;
501531
532+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
533+ b. get ( )
534+ }
535+
502536 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
503537 char2byte ( c)
504538 }
@@ -518,22 +552,16 @@ impl Unescape for CStr {
518552
519553 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Err ( EscapeError :: NulInCStr ) ;
520554
555+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
556+ b. into ( )
557+ }
558+
521559 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
522- if c == '\0' {
523- Err ( EscapeError :: NulInCStr )
524- } else {
525- Ok ( MixedUnit :: Char ( c) )
526- }
560+ c. try_into ( )
527561 }
528562
529563 fn hex2unit ( byte : u8 ) -> Result < Self :: Unit , EscapeError > {
530- if byte == b'\0' {
531- Err ( EscapeError :: NulInCStr )
532- } else if byte. is_ascii ( ) {
533- Ok ( MixedUnit :: Char ( byte as char ) )
534- } else {
535- Ok ( MixedUnit :: HighByte ( byte) )
536- }
564+ byte. try_into ( )
537565 }
538566
539567 /// Converts the result of a unicode escape to the unit type
0 commit comments