@@ -29,9 +29,11 @@ pub mod unescape;
2929#[ cfg( test) ]  
3030mod  tests; 
3131
32+ pub  use  crate :: cursor:: Cursor ; 
33+ 
3234use  self :: LiteralKind :: * ; 
3335use  self :: TokenKind :: * ; 
34- use  crate :: cursor:: { Cursor ,   EOF_CHAR } ; 
36+ use  crate :: cursor:: EOF_CHAR ; 
3537use  std:: convert:: TryFrom ; 
3638
3739/// Parsed token. 
@@ -139,6 +141,9 @@ pub enum TokenKind {
139141
140142    /// Unknown token, not expected by the lexer, e.g. "№" 
141143     Unknown , 
144+ 
145+     /// End of input. 
146+      Eof , 
142147} 
143148
144149#[ derive( Clone ,  Copy ,  Debug ,  PartialEq ,  Eq ) ]  
@@ -219,13 +224,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
219224    None 
220225} 
221226
222- /// Parses the first token from the provided input string. 
223- #[ inline]  
224- pub  fn  first_token ( input :  & str )  -> Token  { 
225-     debug_assert ! ( !input. is_empty( ) ) ; 
226-     Cursor :: new ( input) . advance_token ( ) 
227- } 
228- 
229227/// Validates a raw string literal. Used for getting more information about a 
230228/// problem with a `RawStr`/`RawByteStr` with a `None` field. 
231229#[ inline]  
@@ -243,12 +241,8 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
243241pub  fn  tokenize ( input :  & str )  -> impl  Iterator < Item  = Token >  + ' _  { 
244242    let  mut  cursor = Cursor :: new ( input) ; 
245243    std:: iter:: from_fn ( move  || { 
246-         if  cursor. is_eof ( )  { 
247-             None 
248-         }  else  { 
249-             cursor. reset_len_consumed ( ) ; 
250-             Some ( cursor. advance_token ( ) ) 
251-         } 
244+         let  token = cursor. advance_token ( ) ; 
245+         if  token. kind  != TokenKind :: Eof  {  Some ( token)  }  else  {  None  } 
252246    } ) 
253247} 
254248
@@ -311,8 +305,11 @@ pub fn is_ident(string: &str) -> bool {
311305
312306impl  Cursor < ' _ >  { 
313307    /// Parses a token from the input string. 
314-      fn  advance_token ( & mut  self )  -> Token  { 
315-         let  first_char = self . bump ( ) . unwrap ( ) ; 
308+      pub  fn  advance_token ( & mut  self )  -> Token  { 
309+         let  first_char = match  self . bump ( )  { 
310+             Some ( c)  => c, 
311+             None  => return  Token :: new ( TokenKind :: Eof ,  0 ) , 
312+         } ; 
316313        let  token_kind = match  first_char { 
317314            // Slash, comment or block comment. 
318315            '/'  => match  self . first ( )  { 
@@ -329,7 +326,7 @@ impl Cursor<'_> {
329326                ( '#' ,  c1)  if  is_id_start ( c1)  => self . raw_ident ( ) , 
330327                ( '#' ,  _)  | ( '"' ,  _)  => { 
331328                    let  res = self . raw_double_quoted_string ( 1 ) ; 
332-                     let  suffix_start = self . len_consumed ( ) ; 
329+                     let  suffix_start = self . pos_within_token ( ) ; 
333330                    if  res. is_ok ( )  { 
334331                        self . eat_literal_suffix ( ) ; 
335332                    } 
@@ -344,7 +341,7 @@ impl Cursor<'_> {
344341                ( '\'' ,  _)  => { 
345342                    self . bump ( ) ; 
346343                    let  terminated = self . single_quoted_string ( ) ; 
347-                     let  suffix_start = self . len_consumed ( ) ; 
344+                     let  suffix_start = self . pos_within_token ( ) ; 
348345                    if  terminated { 
349346                        self . eat_literal_suffix ( ) ; 
350347                    } 
@@ -354,7 +351,7 @@ impl Cursor<'_> {
354351                ( '"' ,  _)  => { 
355352                    self . bump ( ) ; 
356353                    let  terminated = self . double_quoted_string ( ) ; 
357-                     let  suffix_start = self . len_consumed ( ) ; 
354+                     let  suffix_start = self . pos_within_token ( ) ; 
358355                    if  terminated { 
359356                        self . eat_literal_suffix ( ) ; 
360357                    } 
@@ -364,7 +361,7 @@ impl Cursor<'_> {
364361                ( 'r' ,  '"' )  | ( 'r' ,  '#' )  => { 
365362                    self . bump ( ) ; 
366363                    let  res = self . raw_double_quoted_string ( 2 ) ; 
367-                     let  suffix_start = self . len_consumed ( ) ; 
364+                     let  suffix_start = self . pos_within_token ( ) ; 
368365                    if  res. is_ok ( )  { 
369366                        self . eat_literal_suffix ( ) ; 
370367                    } 
@@ -381,7 +378,7 @@ impl Cursor<'_> {
381378            // Numeric literal. 
382379            c @ '0' ..='9'  => { 
383380                let  literal_kind = self . number ( c) ; 
384-                 let  suffix_start = self . len_consumed ( ) ; 
381+                 let  suffix_start = self . pos_within_token ( ) ; 
385382                self . eat_literal_suffix ( ) ; 
386383                TokenKind :: Literal  {  kind :  literal_kind,  suffix_start } 
387384            } 
@@ -420,7 +417,7 @@ impl Cursor<'_> {
420417            // String literal. 
421418            '"'  => { 
422419                let  terminated = self . double_quoted_string ( ) ; 
423-                 let  suffix_start = self . len_consumed ( ) ; 
420+                 let  suffix_start = self . pos_within_token ( ) ; 
424421                if  terminated { 
425422                    self . eat_literal_suffix ( ) ; 
426423                } 
@@ -433,7 +430,9 @@ impl Cursor<'_> {
433430            } 
434431            _ => Unknown , 
435432        } ; 
436-         Token :: new ( token_kind,  self . len_consumed ( ) ) 
433+         let  res = Token :: new ( token_kind,  self . pos_within_token ( ) ) ; 
434+         self . reset_pos_within_token ( ) ; 
435+         res
437436    } 
438437
439438    fn  line_comment ( & mut  self )  -> TokenKind  { 
@@ -618,7 +617,7 @@ impl Cursor<'_> {
618617
619618        if  !can_be_a_lifetime { 
620619            let  terminated = self . single_quoted_string ( ) ; 
621-             let  suffix_start = self . len_consumed ( ) ; 
620+             let  suffix_start = self . pos_within_token ( ) ; 
622621            if  terminated { 
623622                self . eat_literal_suffix ( ) ; 
624623            } 
@@ -643,7 +642,7 @@ impl Cursor<'_> {
643642        if  self . first ( )  == '\''  { 
644643            self . bump ( ) ; 
645644            let  kind = Char  {  terminated :  true  } ; 
646-             Literal  {  kind,  suffix_start :  self . len_consumed ( )  } 
645+             Literal  {  kind,  suffix_start :  self . pos_within_token ( )  } 
647646        }  else  { 
648647            Lifetime  {  starts_with_number } 
649648        } 
@@ -724,7 +723,7 @@ impl Cursor<'_> {
724723
725724    fn  raw_string_unvalidated ( & mut  self ,  prefix_len :  u32 )  -> Result < u32 ,  RawStrError >  { 
726725        debug_assert ! ( self . prev( )  == 'r' ) ; 
727-         let  start_pos = self . len_consumed ( ) ; 
726+         let  start_pos = self . pos_within_token ( ) ; 
728727        let  mut  possible_terminator_offset = None ; 
729728        let  mut  max_hashes = 0 ; 
730729
@@ -778,7 +777,7 @@ impl Cursor<'_> {
778777                // Keep track of possible terminators to give a hint about 
779778                // where there might be a missing terminator 
780779                possible_terminator_offset =
781-                     Some ( self . len_consumed ( )  - start_pos - n_end_hashes + prefix_len) ; 
780+                     Some ( self . pos_within_token ( )  - start_pos - n_end_hashes + prefix_len) ; 
782781                max_hashes = n_end_hashes; 
783782            } 
784783        } 
0 commit comments