Skip to content

Commit

Permalink
Be more accurate about calculating display_col from a BytePos
Browse files Browse the repository at this point in the history
No longer track "zero-width" chars in `SourceMap`, read directly from the line when calculating the `display_col` of a `BytePos`. Move `char_width` to `rustc_span` and use it from the emitter.

This change allows the following to properly align in terminals (depending on the font, the replaced control codepoints are rendered as 1 or 2 width, on my terminal they are rendered as 1, on VSCode text they are rendered as 2):

```
error: this file contains an unclosed delimiter
  --> $DIR/issue-68629.rs:5:17
   |
LL | ␜␟ts␀![{i
   |       -- unclosed delimiter
   |       |
   |       unclosed delimiter
LL | ␀␀  fn rݻoa>rݻm
   |                ^
```
  • Loading branch information
estebank committed Jul 16, 2024
1 parent c60b38c commit 42ed400
Show file tree
Hide file tree
Showing 60 changed files with 141 additions and 285 deletions.
1 change: 0 additions & 1 deletion Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3982,7 +3982,6 @@ dependencies = [
"termcolor",
"termize",
"tracing",
"unicode-width",
"windows",
]

Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_errors/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ serde_json = "1.0.59"
termcolor = "1.2.0"
termize = "0.1.1"
tracing = "0.1"
unicode-width = "0.1.4"
# tidy-alphabetical-end

[target.'cfg(windows)'.dependencies.windows]
Expand Down
17 changes: 1 addition & 16 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! The output types are defined in `rustc_session::config::ErrorOutputType`.
use rustc_span::source_map::SourceMap;
use rustc_span::{FileLines, FileName, SourceFile, Span};
use rustc_span::{char_width, FileLines, FileName, SourceFile, Span};

use crate::snippet::{
Annotation, AnnotationColumn, AnnotationType, Line, MultilineAnnotation, Style, StyledString,
Expand Down Expand Up @@ -2614,21 +2614,6 @@ fn normalize_whitespace(str: &str) -> String {
s
}

fn char_width(ch: char) -> usize {
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
// just accept that sometimes the code line will be longer than desired.
match ch {
'\t' => 4,
'\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
| '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
| '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
| '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
| '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
| '\u{007F}' => 1,
_ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
}
}

fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
buffer.puts(line, col, "| ", Style::LineNumber);
}
Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1727,7 +1727,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
source_len,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
stable_id,
..
Expand Down Expand Up @@ -1779,7 +1778,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
self.cnum,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
source_file_index,
);
Expand Down
6 changes: 0 additions & 6 deletions compiler/rustc_query_system/src/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
source_len: _,
lines: _,
ref multibyte_chars,
ref non_narrow_chars,
ref normalized_pos,
} = *self;

Expand All @@ -98,11 +97,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
char_pos.hash_stable(hcx, hasher);
}

non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
char_pos.hash_stable(hcx, hasher);
}

normalized_pos.len().hash_stable(hcx, hasher);
for &char_pos in normalized_pos.iter() {
char_pos.hash_stable(hcx, hasher);
Expand Down
53 changes: 12 additions & 41 deletions compiler/rustc_span/src/analyze_source_file.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::*;
use unicode_width::UnicodeWidthChar;

#[cfg(test)]
mod tests;
Expand All @@ -9,15 +8,12 @@ mod tests;
///
/// This function will use an SSE2 enhanced implementation if hardware support
/// is detected at runtime.
pub fn analyze_source_file(
src: &str,
) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
pub fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
let mut lines = vec![RelativeBytePos::from_u32(0)];
let mut multi_byte_chars = vec![];
let mut non_narrow_chars = vec![];

// Calls the right implementation, depending on hardware support available.
analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars, &mut non_narrow_chars);
analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);

// The code above optimistically registers a new line *after* each \n
// it encounters. If that point is already outside the source_file, remove
Expand All @@ -30,29 +26,26 @@ pub fn analyze_source_file(
}
}

(lines, multi_byte_chars, non_narrow_chars)
(lines, multi_byte_chars)
}

cfg_match! {
cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
fn analyze_source_file_dispatch(src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
multi_byte_chars: &mut Vec<MultiByteChar>) {
if is_x86_feature_detected!("sse2") {
unsafe {
analyze_source_file_sse2(src,
lines,
multi_byte_chars,
non_narrow_chars);
multi_byte_chars);
}
} else {
analyze_source_file_generic(src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
non_narrow_chars);
multi_byte_chars);

}
}
Expand All @@ -64,8 +57,7 @@ cfg_match! {
#[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
multi_byte_chars: &mut Vec<MultiByteChar>) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -157,7 +149,6 @@ cfg_match! {
RelativeBytePos::from_usize(scan_start),
lines,
multi_byte_chars,
non_narrow_chars
);
}

Expand All @@ -168,23 +159,20 @@ cfg_match! {
src.len() - tail_start,
RelativeBytePos::from_usize(tail_start),
lines,
multi_byte_chars,
non_narrow_chars);
multi_byte_chars);
}
}
}
_ => {
// The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
multi_byte_chars: &mut Vec<MultiByteChar>) {
analyze_source_file_generic(src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
non_narrow_chars);
multi_byte_chars);
}
}
}
Expand All @@ -198,7 +186,6 @@ fn analyze_source_file_generic(
output_offset: RelativeBytePos,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>,
) -> usize {
assert!(src.len() >= scan_len);
let mut i = 0;
Expand All @@ -220,16 +207,8 @@ fn analyze_source_file_generic(

let pos = RelativeBytePos::from_usize(i) + output_offset;

match byte {
b'\n' => {
lines.push(pos + RelativeBytePos(1));
}
b'\t' => {
non_narrow_chars.push(NonNarrowChar::Tab(pos));
}
_ => {
non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
}
if let b'\n' = byte {
lines.push(pos + RelativeBytePos(1));
}
} else if byte >= 127 {
// The slow path:
Expand All @@ -245,14 +224,6 @@ fn analyze_source_file_generic(
let mbc = MultiByteChar { pos, bytes: char_len as u8 };
multi_byte_chars.push(mbc);
}

// Assume control characters are zero width.
// FIXME: How can we decide between `width` and `width_cjk`?
let char_width = UnicodeWidthChar::width(c).unwrap_or(0);

if char_width != 1 {
non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
}
}

i += char_len;
Expand Down
24 changes: 2 additions & 22 deletions compiler/rustc_span/src/analyze_source_file/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ macro_rules! test {
(case: $test_name:ident,
text: $text:expr,
lines: $lines:expr,
multi_byte_chars: $multi_byte_chars:expr,
non_narrow_chars: $non_narrow_chars:expr,) => {
multi_byte_chars: $multi_byte_chars:expr,) => {
#[test]
fn $test_name() {
let (lines, multi_byte_chars, non_narrow_chars) = analyze_source_file($text);
let (lines, multi_byte_chars) = analyze_source_file($text);

let expected_lines: Vec<RelativeBytePos> =
$lines.into_iter().map(RelativeBytePos).collect();
Expand All @@ -21,13 +20,6 @@ macro_rules! test {
.collect();

assert_eq!(multi_byte_chars, expected_mbcs);

let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
.into_iter()
.map(|(pos, width)| NonNarrowChar::new(RelativeBytePos(pos), width))
.collect();

assert_eq!(non_narrow_chars, expected_nncs);
}
};
}
Expand All @@ -37,93 +29,81 @@ test!(
text: "",
lines: vec![],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);

test!(
case: newlines_short,
text: "a\nc",
lines: vec![0, 2],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);

test!(
case: newlines_long,
text: "012345678\nabcdef012345678\na",
lines: vec![0, 10, 26],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);

test!(
case: newline_and_multi_byte_char_in_same_chunk,
text: "01234β789\nbcdef0123456789abcdef",
lines: vec![0, 11],
multi_byte_chars: vec![(5, 2)],
non_narrow_chars: vec![],
);

test!(
case: newline_and_control_char_in_same_chunk,
text: "01234\u{07}6789\nbcdef0123456789abcdef",
lines: vec![0, 11],
multi_byte_chars: vec![],
non_narrow_chars: vec![(5, 0)],
);

test!(
case: multi_byte_char_short,
text: "aβc",
lines: vec![0],
multi_byte_chars: vec![(1, 2)],
non_narrow_chars: vec![],
);

test!(
case: multi_byte_char_long,
text: "0123456789abcΔf012345β",
lines: vec![0],
multi_byte_chars: vec![(13, 2), (22, 2)],
non_narrow_chars: vec![],
);

test!(
case: multi_byte_char_across_chunk_boundary,
text: "0123456789abcdeΔ123456789abcdef01234",
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
);

test!(
case: multi_byte_char_across_chunk_boundary_tail,
text: "0123456789abcdeΔ....",
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
);

test!(
case: non_narrow_short,
text: "0\t2",
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(1, 4)],
);

test!(
case: non_narrow_long,
text: "01\t3456789abcdef01234567\u{07}9",
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(2, 4), (24, 0)],
);

test!(
case: output_offset_all,
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
lines: vec![0, 7, 27],
multi_byte_chars: vec![(13, 2), (29, 2)],
non_narrow_chars: vec![(2, 4), (24, 0)],
);
Loading

0 comments on commit 42ed400

Please sign in to comment.