@@ -79,7 +79,7 @@ impl<'a> StringReader<'a> {
7979 /// preceded by whitespace.
8080 fn next_token ( & mut self ) -> ( Token , bool ) {
8181 let mut preceded_by_whitespace = false ;
82-
82+ let mut swallow_next_invalid = 0 ;
8383 // Skip trivial (whitespace & comments) tokens
8484 loop {
8585 let token = self . cursor . advance_token ( ) ;
@@ -232,19 +232,34 @@ impl<'a> StringReader<'a> {
232232 rustc_lexer:: TokenKind :: Percent => token:: BinOp ( token:: Percent ) ,
233233
234234 rustc_lexer:: TokenKind :: Unknown | rustc_lexer:: TokenKind :: InvalidIdent => {
235- let c = self . str_from ( start) . chars ( ) . next ( ) . unwrap ( ) ;
235+ // Don't emit diagnostics for sequences of the same invalid token
236+ if swallow_next_invalid > 0 {
237+ swallow_next_invalid -= 1 ;
238+ continue ;
239+ }
240+ let mut it = self . str_from_to_end ( start) . chars ( ) ;
241+ let c = it. next ( ) . unwrap ( ) ;
242+ let repeats = it. take_while ( |c1| * c1 == c) . count ( ) ;
236243 let mut err =
237- self . struct_err_span_char ( start, self . pos , "unknown start of token" , c) ;
244+ self . struct_err_span_char ( start, self . pos + Pos :: from_usize ( repeats * c . len_utf8 ( ) ) , "unknown start of token" , c) ;
238245 // FIXME: the lexer could be used to turn the ASCII version of unicode
239246 // homoglyphs, instead of keeping a table in `check_for_substitution`into the
240247 // token. Ideally, this should be inside `rustc_lexer`. However, we should
241248 // first remove compound tokens like `<<` from `rustc_lexer`, and then add
242249 // fancier error recovery to it, as there will be less overall work to do this
243250 // way.
244- let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err) ;
251+ let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err, repeats+ 1 ) ;
245252 if c == '\x00' {
246253 err. help ( "source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used" ) ;
247254 }
255+ if repeats > 0 {
256+ if repeats == 1 {
257+ err. note ( format ! ( "character appears once more" ) ) ;
258+ } else {
259+ err. note ( format ! ( "character appears {repeats} more times" ) ) ;
260+ }
261+ swallow_next_invalid = repeats;
262+ }
248263 err. emit ( ) ;
249264 if let Some ( token) = token {
250265 token
@@ -486,6 +501,11 @@ impl<'a> StringReader<'a> {
486501 & self . src [ self . src_index ( start) ..self . src_index ( end) ]
487502 }
488503
504+ /// Slice of the source text spanning from `start` until the end
505+ fn str_from_to_end ( & self , start : BytePos ) -> & str {
506+ & self . src [ self . src_index ( start) ..]
507+ }
508+
489509 fn report_raw_str_error ( & self , start : BytePos , prefix_len : u32 ) -> ! {
490510 match rustc_lexer:: validate_raw_str ( self . str_from ( start) , prefix_len) {
491511 Err ( RawStrError :: InvalidStarter { bad_char } ) => {
0 commit comments