Skip to content

Commit

Permalink
Replace ASCII control chars with Unicode Control Pictures
Browse files Browse the repository at this point in the history
```
error: bare CR not allowed in doc-comment
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
   |
LL | /// doc comment with bare CR: '␍'
   |                                ^
```
  • Loading branch information
estebank committed Jul 16, 2024
1 parent 24d2ac0 commit c60b38c
Show file tree
Hide file tree
Showing 15 changed files with 92 additions and 31 deletions.
69 changes: 54 additions & 15 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,7 @@ impl HumanEmitter {
.skip(left)
.take_while(|ch| {
// Make sure that the trimming on the right will fall within the terminal width.
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char`
// is. For now, just accept that sometimes the code line will be longer than
// desired.
let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1);
let next = char_width(*ch);
if taken + next > right - left {
return false;
}
Expand Down Expand Up @@ -742,11 +739,7 @@ impl HumanEmitter {
let left = margin.left(source_string.len());

// Account for unicode characters of width !=0 that were removed.
let left = source_string
.chars()
.take(left)
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
.sum();
let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum();

self.draw_line(
buffer,
Expand Down Expand Up @@ -2039,7 +2032,7 @@ impl HumanEmitter {
let sub_len: usize =
if is_whitespace_addition { &part.snippet } else { part.snippet.trim() }
.chars()
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
.map(|ch| char_width(ch))
.sum();

let offset: isize = offsets
Expand Down Expand Up @@ -2076,11 +2069,8 @@ impl HumanEmitter {
}

// length of the code after substitution
let full_sub_len = part
.snippet
.chars()
.map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
.sum::<usize>() as isize;
let full_sub_len =
part.snippet.chars().map(|ch| char_width(ch)).sum::<usize>() as isize;

// length of the code to be substituted
let snippet_len = span_end_pos as isize - span_start_pos as isize;
Expand Down Expand Up @@ -2580,6 +2570,40 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
('\u{2068}', ""),
('\u{202C}', ""),
('\u{2069}', ""),
// In terminals without Unicode support the following will be garbled, but in *all* terminals
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
// support" gate.
('\u{0000}', "␀"),
('\u{0001}', "␁"),
('\u{0002}', "␂"),
('\u{0003}', "␃"),
('\u{0004}', "␄"),
('\u{0005}', "␅"),
('\u{0006}', "␆"),
('\u{0007}', "␇"),
('\u{0008}', "␈"),
('\u{000B}', "␋"),
('\u{000C}', "␌"),
('\u{000D}', "␍"),
('\u{000E}', "␎"),
('\u{000F}', "␏"),
('\u{0010}', "␐"),
('\u{0011}', "␑"),
('\u{0012}', "␒"),
('\u{0013}', "␓"),
('\u{0014}', "␔"),
('\u{0015}', "␕"),
('\u{0016}', "␖"),
('\u{0017}', "␗"),
('\u{0018}', "␘"),
('\u{0019}', "␙"),
('\u{001A}', "␚"),
('\u{001B}', "␛"),
('\u{001C}', "␜"),
('\u{001D}', "␝"),
('\u{001E}', "␞"),
('\u{001F}', "␟"),
('\u{007F}', "␡"),
];

fn normalize_whitespace(str: &str) -> String {
Expand All @@ -2590,6 +2614,21 @@ fn normalize_whitespace(str: &str) -> String {
s
}

fn char_width(ch: char) -> usize {
// FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
// just accept that sometimes the code line will be longer than desired.
match ch {
'\t' => 4,
'\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
| '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
| '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
| '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
| '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
| '\u{007F}' => 1,
_ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
}
}

fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
buffer.puts(line, col, "| ", Style::LineNumber);
}
Expand Down
11 changes: 11 additions & 0 deletions tests/rustdoc-ui/ice-unresolved-import-100241.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
error[E0432]: unresolved import `inner`
--> $DIR/ice-unresolved-import-100241.rs:9:13
|
LL | pub use inner::S;
| ^^^^^ maybe a missing crate `inner`?
|
= help: consider adding `extern crate inner` to use the `inner` crate

error: aborting due to 1 previous error

For more information about this error, try `rustc --explain E0432`.
14 changes: 7 additions & 7 deletions tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
error: bare CR not allowed in doc-comment
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
|
LL | /// doc comment with bare CR: ''
LL | /// doc comment with bare CR: ''
| ^

error: bare CR not allowed in block doc-comment
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38
|
LL | /** block doc comment with bare CR: '' */
LL | /** block doc comment with bare CR: '' */
| ^

error: bare CR not allowed in doc-comment
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36
|
LL | //! doc comment with bare CR: ''
LL | //! doc comment with bare CR: ''
| ^

error: bare CR not allowed in block doc-comment
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42
|
LL | /*! block doc comment with bare CR: '' */
LL | /*! block doc comment with bare CR: '' */
| ^

error: bare CR not allowed in string, use `\r` instead
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18
|
LL | let _s = "foobar";
LL | let _s = "foobar";
| ^
|
help: escape the character
Expand All @@ -36,13 +36,13 @@ LL | let _s = "foo\rbar";
error: bare CR not allowed in raw string
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19
|
LL | let _s = r"barfoo";
LL | let _s = r"barfoo";
| ^

error: unknown character escape: `\r`
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19
|
LL | let _s = "foo\bar";
LL | let _s = "foo\bar";
| ^ unknown character escape
|
= help: this is an isolated carriage return; consider checking your editor and version control settings
Expand Down
Binary file modified tests/ui/parser/bad-char-literals.rs
Binary file not shown.
17 changes: 14 additions & 3 deletions tests/ui/parser/bad-char-literals.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,27 @@ LL | '\n';
error: character constant must be escaped: `\r`
--> $DIR/bad-char-literals.rs:15:6
|
LL | '';
LL | '';
| ^
|
help: escape the character
|
LL | '\r';
| ++

error: character literal may only contain one codepoint
--> $DIR/bad-char-literals.rs:18:5
|
LL | '-␀-';
| ^^^^
|
help: if you meant to write a string literal, use double quotes
|
LL | "-␀-";
| ~ ~

error: character constant must be escaped: `\t`
--> $DIR/bad-char-literals.rs:18:6
--> $DIR/bad-char-literals.rs:21:6
|
LL | ' ';
| ^^^^
Expand All @@ -44,5 +55,5 @@ help: escape the character
LL | '\t';
| ++

error: aborting due to 4 previous errors
error: aborting due to 5 previous errors

Binary file modified tests/ui/parser/issues/issue-66473.stderr
Binary file not shown.
Binary file modified tests/ui/parser/issues/issue-68629.stderr
Binary file not shown.
Binary file modified tests/ui/parser/issues/issue-68730.stderr
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/ui/parser/raw/raw-byte-string-literals.stderr
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
error: bare CR not allowed in raw string
--> $DIR/raw-byte-string-literals.rs:4:9
|
LL | br"a";
LL | br"a";
| ^

error: non-ASCII character in raw byte string literal
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
error: bare CR not allowed in doc-comment
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:12
|
LL | /// This doc comment contains three isolated `\r` symbols
LL | /// This doc comment contains three isolated `\r` symbols
| ^

error: bare CR not allowed in doc-comment
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:32
|
LL | /// This doc comment contains three isolated `\r` symbols
LL | /// This doc comment contains three isolated `\r` symbols
| ^

error: bare CR not allowed in doc-comment
--> $DIR/several-carriage-returns-in-doc-comment.rs:6:52
|
LL | /// This doc comment contains three isolated `\r` symbols
LL | /// This doc comment contains three isolated `\r` symbols
| ^

error: aborting due to 3 previous errors
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/parser/trailing-carriage-return-in-string.stderr
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
error: unknown character escape: `\r`
--> $DIR/trailing-carriage-return-in-string.rs:10:25
|
LL | let bad = "This is \ a test";
LL | let bad = "This is \ a test";
| ^ unknown character escape
|
= help: this is an isolated carriage return; consider checking your editor and version control settings
Expand Down
Binary file modified tests/ui/parser/utf16-be-without-bom.stderr
Binary file not shown.
Binary file modified tests/ui/parser/utf16-le-without-bom.stderr
Binary file not shown.
Binary file modified tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/ui/str/str-escape.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ warning: whitespace symbol '\u{c}' is not skipped
|
LL | let s = b"a\
| ________________^
LL | | b";
LL | | b";
| | ^- whitespace symbol '\u{c}' is not skipped
| |____|
|
Expand Down

0 comments on commit c60b38c

Please sign in to comment.