From 0af255a5aa448efe6ab7e3252e85081128beaa9e Mon Sep 17 00:00:00 2001 From: yukang Date: Tue, 18 Oct 2022 02:00:06 +0800 Subject: [PATCH 1/2] Fix the bug of next_point in span --- compiler/rustc_expand/src/expand.rs | 7 +++---- compiler/rustc_expand/src/mbe/macro_rules.rs | 2 +- compiler/rustc_parse/src/parser/diagnostics.rs | 4 ++-- compiler/rustc_parse/src/parser/expr.rs | 2 +- compiler/rustc_parse/src/parser/item.rs | 2 +- compiler/rustc_resolve/src/late/diagnostics.rs | 2 +- compiler/rustc_span/src/source_map.rs | 7 ++++--- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_expand/src/expand.rs b/compiler/rustc_expand/src/expand.rs index 59f434b941e78..15e9a8db3c602 100644 --- a/compiler/rustc_expand/src/expand.rs +++ b/compiler/rustc_expand/src/expand.rs @@ -937,13 +937,12 @@ pub fn ensure_complete_parse<'a>( kind_name, ); err.note(&msg); - let semi_span = this.sess.source_map().next_point(span); - let semi_full_span = semi_span.to(this.sess.source_map().next_point(semi_span)); - match this.sess.source_map().span_to_snippet(semi_full_span) { + let semi_span = this.sess.source_map().next_point(span); + match this.sess.source_map().span_to_snippet(semi_span) { Ok(ref snippet) if &snippet[..] != ";" && kind_name == "expression" => { err.span_suggestion( - semi_span, + span.shrink_to_hi(), "you might be missing a semicolon here", ";", Applicability::MaybeIncorrect, diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index 30aa4f0fa3482..0aa2b44a0f87a 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -82,7 +82,7 @@ fn emit_frag_parse_err( ); if !e.span.is_dummy() { // early end of macro arm (#52866) - e.replace_span_with(parser.sess.source_map().next_point(parser.token.span)); + e.replace_span_with(parser.token.span.shrink_to_hi()); } } if e.span.is_dummy() { diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index 828b7d2f2f7be..40d85c833a7dd 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -1461,7 +1461,7 @@ impl<'a> Parser<'a> { let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) { // Point at the end of the macro call when reaching end of macro arguments. (token::Eof, Some(_)) => { - let sp = self.sess.source_map().next_point(self.prev_token.span); + let sp = self.prev_token.span.shrink_to_hi(); (sp, sp) } // We don't want to point at the following span after DUMMY_SP. @@ -2039,7 +2039,7 @@ impl<'a> Parser<'a> { pub(super) fn expected_expression_found(&self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { let (span, msg) = match (&self.token.kind, self.subparser_name) { (&token::Eof, Some(origin)) => { - let sp = self.sess.source_map().next_point(self.prev_token.span); + let sp = self.prev_token.span.shrink_to_hi(); (sp, format!("expected expression, found end of {origin}")) } _ => ( diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 11301f03e48eb..afa116ce1bccd 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2172,7 +2172,7 @@ impl<'a> Parser<'a> { }, ExprKind::Block(_, None) => { self.sess.emit_err(IfExpressionMissingCondition { - if_span: self.sess.source_map().next_point(lo), + if_span: lo.shrink_to_hi(), block_span: self.sess.source_map().start_point(cond_span), }); std::mem::replace(&mut cond, this.mk_expr_err(cond_span.shrink_to_hi())) diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index ebcbc75ba32c3..bda301c52e960 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -1601,7 +1601,7 @@ impl<'a> Parser<'a> { self.sess.emit_err(err); } else { if !seen_comma { - let sp = self.sess.source_map().next_point(previous_span); + let sp = previous_span.shrink_to_hi(); err.missing_comma = Some(sp); } return Err(err.into_diagnostic(&self.sess.span_diagnostic)); diff --git a/compiler/rustc_resolve/src/late/diagnostics.rs b/compiler/rustc_resolve/src/late/diagnostics.rs index 13cd7987e923d..4fd5bc1d60a47 100644 --- a/compiler/rustc_resolve/src/late/diagnostics.rs +++ b/compiler/rustc_resolve/src/late/diagnostics.rs @@ -1731,7 +1731,7 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> { for _ in 0..100 { // Try to find an assignment sp = sm.next_point(sp); - let snippet = sm.span_to_snippet(sp.to(sm.next_point(sp))); + let snippet = sm.span_to_snippet(sp); match snippet { Ok(ref x) if x.as_str() == "=" => { err.span_suggestion( diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 4d94c92d3f2b1..d3c2550aa2afb 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -859,14 +859,15 @@ impl SourceMap { } let start_of_next_point = sp.hi().0; - let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true); + let width = self.find_width_of_character_at_span(sp, true); + debug_assert!(width > 0); // If the width is 1, then the next span should point to the same `lo` and `hi`. However, // in the case of a multibyte character, where the width != 1, the next span should // span multiple bytes to include the whole character. let end_of_next_point = - start_of_next_point.checked_add(width - 1).unwrap_or(start_of_next_point); + start_of_next_point.checked_add(width).unwrap_or(start_of_next_point); - let end_of_next_point = BytePos(cmp::max(sp.lo().0 + 1, end_of_next_point)); + let end_of_next_point = BytePos(cmp::max(start_of_next_point + 1, end_of_next_point)); Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None) } From eb8aa9759dc99b604145f94e5296b7add60e0a48 Mon Sep 17 00:00:00 2001 From: yukang Date: Wed, 19 Oct 2022 11:46:26 +0800 Subject: [PATCH 2/2] Add testcase for next_point, fix more trivial issues in find_width_of_character_at_span --- compiler/rustc_span/src/source_map.rs | 19 ++++++--- compiler/rustc_span/src/source_map/tests.rs | 45 +++++++++++++++++++++ src/tools/clippy/clippy_utils/src/sugg.rs | 3 +- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index d3c2550aa2afb..506ce6955d399 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -853,6 +853,10 @@ impl SourceMap { } /// Returns a new span representing the next character after the end-point of this span. + /// Special cases: + /// - if span is a dummy one, returns the same span + /// - if next_point reached the end of source, return span with lo = hi + /// - respect multi-byte characters pub fn next_point(&self, sp: Span) -> Span { if sp.is_dummy() { return sp; @@ -860,9 +864,11 @@ impl SourceMap { let start_of_next_point = sp.hi().0; let width = self.find_width_of_character_at_span(sp, true); - debug_assert!(width > 0); - // If the width is 1, then the next span should point to the same `lo` and `hi`. However, - // in the case of a multibyte character, where the width != 1, the next span should + if width == 0 { + return Span::new(sp.hi(), sp.hi(), sp.ctxt(), None); + } + // If the width is 1, then the next span should only contain the next char besides current ending. + // However, in the case of a multibyte character, where the width != 1, the next span should // span multiple bytes to include the whole character. let end_of_next_point = start_of_next_point.checked_add(width).unwrap_or(start_of_next_point); @@ -875,7 +881,8 @@ impl SourceMap { /// depending on the `forwards` parameter. fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 { let sp = sp.data(); - if sp.lo == sp.hi { + + if sp.lo == sp.hi && !forwards { debug!("find_width_of_character_at_span: early return empty span"); return 1; } @@ -909,9 +916,9 @@ impl SourceMap { let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize(); debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len); // Ensure indexes are also not malformed. - if start_index > end_index || end_index > source_len { + if start_index > end_index || end_index > source_len - 1 { debug!("find_width_of_character_at_span: source indexes are malformed"); - return 1; + return 0; } let src = local_begin.sf.external_src.borrow(); diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 3058ec45a6468..1fd81018fa05c 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -479,3 +479,48 @@ fn path_prefix_remapping_expand_to_absolute() { RealFileName::Remapped { local_path: None, virtual_name: path("XYZ/src/main.rs") } ); } + +#[test] +fn test_next_point() { + let sm = SourceMap::new(FilePathMapping::empty()); + sm.new_source_file(PathBuf::from("example.rs").into(), "a…b".to_string()); + + // Dummy spans don't advance. + let span = DUMMY_SP; + let span = sm.next_point(span); + assert_eq!(span.lo().0, 0); + assert_eq!(span.hi().0, 0); + + // Span advance respect multi-byte character + let span = Span::with_root_ctxt(BytePos(0), BytePos(1)); + assert_eq!(sm.span_to_snippet(span), Ok("a".to_string())); + let span = sm.next_point(span); + assert_eq!(sm.span_to_snippet(span), Ok("…".to_string())); + assert_eq!(span.lo().0, 1); + assert_eq!(span.hi().0, 4); + + // An empty span pointing just before a multi-byte character should + // advance to contain the multi-byte character. + let span = Span::with_root_ctxt(BytePos(1), BytePos(1)); + let span = sm.next_point(span); + assert_eq!(span.lo().0, 1); + assert_eq!(span.hi().0, 4); + + let span = Span::with_root_ctxt(BytePos(1), BytePos(4)); + let span = sm.next_point(span); + assert_eq!(span.lo().0, 4); + assert_eq!(span.hi().0, 5); + + // A non-empty span at the last byte should advance to create an empty + // span pointing at the end of the file. + let span = Span::with_root_ctxt(BytePos(4), BytePos(5)); + let span = sm.next_point(span); + assert_eq!(span.lo().0, 5); + assert_eq!(span.hi().0, 5); + + // Empty span pointing just past the last byte. + let span = Span::with_root_ctxt(BytePos(5), BytePos(5)); + let span = sm.next_point(span); + assert_eq!(span.lo().0, 5); + assert_eq!(span.hi().0, 5); +} diff --git a/src/tools/clippy/clippy_utils/src/sugg.rs b/src/tools/clippy/clippy_utils/src/sugg.rs index 3c5dd92b9cd6d..3347342e412b6 100644 --- a/src/tools/clippy/clippy_utils/src/sugg.rs +++ b/src/tools/clippy/clippy_utils/src/sugg.rs @@ -769,8 +769,7 @@ impl DiagnosticExt for rustc_errors::Diagnostic { fn suggest_remove_item(&mut self, cx: &T, item: Span, msg: &str, applicability: Applicability) { let mut remove_span = item; - let hi = cx.sess().source_map().next_point(remove_span).hi(); - let fmpos = cx.sess().source_map().lookup_byte_offset(hi); + let fmpos = cx.sess().source_map().lookup_byte_offset(remove_span.hi()); if let Some(ref src) = fmpos.sf.src { let non_whitespace_offset = src[fmpos.pos.to_usize()..].find(|c| c != ' ' && c != '\t' && c != '\n');