Skip to content

Commit 66d1844

Browse files
authored
Rollup merge of #150790 - lexer/help-invisible-character, r=Kivooeo,tgross35
feat: invisible character help string I was playing around with zero width spaces in different programming languages and thought that this error message could be more helpful. Hopefully it's a good first contribution! :)
2 parents 0a7d5f9 + 2b597f5 commit 66d1844

File tree

5 files changed

+28
-0
lines changed

5 files changed

+28
-0
lines changed

‎compiler/rustc_parse/messages.ftl‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,7 @@ parse_unknown_start_of_token = unknown start of token: {$escaped}
967967
.sugg_quotes = Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{$ascii_str}' ({$ascii_name}), but are not
968968
.sugg_other = Unicode character '{$ch}' ({$u_name}) looks like '{$ascii_str}' ({$ascii_name}), but it is not
969969
.help_null = source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used
970+
.help_invisible_char = invisible characters like '{$escaped}' are not usually visible in text editors
970971
.note_repeats = character appears {$repeats ->
971972
[one] once more
972973
*[other] {$repeats} more times

‎compiler/rustc_parse/src/errors.rs‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,8 @@ pub(crate) struct UnknownTokenStart {
23692369
pub null: Option<UnknownTokenNull>,
23702370
#[subdiagnostic]
23712371
pub repeat: Option<UnknownTokenRepeat>,
2372+
#[subdiagnostic]
2373+
pub invisible: Option<InvisibleCharacter>,
23722374
}
23732375

23742376
#[derive(Subdiagnostic)]
@@ -2409,6 +2411,10 @@ pub(crate) struct UnknownTokenRepeat {
24092411
pub repeats: usize,
24102412
}
24112413

2414+
#[derive(Subdiagnostic)]
2415+
#[help(parse_help_invisible_char)]
2416+
pub(crate) struct InvisibleCharacter;
2417+
24122418
#[derive(Subdiagnostic)]
24132419
#[help(parse_help_null)]
24142420
pub(crate) struct UnknownTokenNull;

‎compiler/rustc_parse/src/lexer/mod.rs‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
3636
#[cfg(target_pointer_width = "64")]
3737
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
3838

39+
const INVISIBLE_CHARACTERS: [char; 8] = [
40+
'\u{200b}', '\u{200c}', '\u{2060}', '\u{2061}', '\u{2062}', '\u{00ad}', '\u{034f}', '\u{061c}',
41+
];
42+
3943
#[derive(Clone, Debug)]
4044
pub(crate) struct UnmatchedDelim {
4145
pub found_delim: Option<Delimiter>,
@@ -456,6 +460,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
456460
escaped: escaped_char(c),
457461
sugg,
458462
null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None },
463+
invisible: if INVISIBLE_CHARACTERS.contains(&c) { Some(errors::InvisibleCharacter) } else { None },
459464
repeat: if repeats > 0 {
460465
swallow_next_invalid = repeats;
461466
Some(errors::UnknownTokenRepeat { repeats })
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Provide extra help when a user has an invisible character in their code
2+
3+
fn main() {
4+
//~^ ERROR unknown start of token: \u{200b}
5+
//~| HELP invisible characters like '\u{200b}' are not usually visible in text editors
6+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
error: unknown start of token: \u{200b}
2+
--> $DIR/lex-invisible-characters.rs:3:8
3+
|
4+
LL | fn main​() {
5+
| ^
6+
|
7+
= help: invisible characters like '\u{200b}' are not usually visible in text editors
8+
9+
error: aborting due to 1 previous error
10+

0 commit comments

Comments
 (0)