diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index 8e8bdce1ee..b9e7566fdf 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -274,7 +274,7 @@ impl<'a> Converter<'a> { let text = &self.res.text[self.offset + 2..][..len - 2]; let i = text.rfind('\'').unwrap(); let text = &text[..i]; - if let Err(e) = rustc_lexer::unescape::unescape_char(text) { + if let Err(e) = rustc_lexer::unescape::unescape_byte(text) { err = error_to_diagnostic_message(e, Mode::Byte); } } @@ -284,18 +284,33 @@ impl<'a> Converter<'a> { rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 1..][..len - 1]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::Str); } STRING } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the byte string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::ByteStr); } BYTE_STRING } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::CStr); } C_STRING } @@ -360,3 +375,27 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str { EscapeError::MultipleSkippedLinesWarning => "", } } + +fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str { + let mut error_message = ""; + match mode { + Mode::CStr => { + rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| { + if let Err(e) = res { + error_message = error_to_diagnostic_message(e, mode); + } + }); + } + Mode::ByteStr | Mode::Str => { + rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| { + if let Err(e) = res { + error_message = error_to_diagnostic_message(e, mode); + } + }); + } + _ => { + // Other Modes are not supported yet or do not apply + } + } + error_message +} diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast index 24892bc239..7603c9099d 100644 --- a/crates/parser/test_data/lexer/err/byte_char_literals.rast +++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast @@ -22,9 +22,9 @@ BYTE "b'\\'a'" error: character literal may only contain one codepoint WHITESPACE "\n" BYTE "b'\\0a'" error: character literal may only contain one codepoint WHITESPACE "\n" -BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint +BYTE "b'\\u{0}x'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint +BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string WHITESPACE "\n" BYTE "b'\\v'" error: unknown byte escape WHITESPACE "\n" @@ -50,12 +50,6 @@ BYTE "b'\\x🦀'" error: invalid character in numeric character escape WHITESPACE "\n" BYTE "b'\\xtt'" error: invalid character in numeric character escape WHITESPACE "\n" -BYTE "b'\\xff'" error: out of range hex escape -WHITESPACE "\n" -BYTE "b'\\xFF'" error: out of range hex escape -WHITESPACE "\n" -BYTE "b'\\x80'" error: out of range hex escape -WHITESPACE "\n" BYTE "b'\\u'" error: incorrect unicode escape sequence WHITESPACE "\n" BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence @@ -72,21 +66,21 @@ BYTE "b'\\u{_0000}'" error: invalid start of unicode escape WHITESPACE "\n" BYTE "b'\\u{0000000}'" error: overlong unicode escape WHITESPACE "\n" -BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape +BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{ffffff}'" error: invalid unicode character escape +BYTE "b'\\u{ffffff}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{ffffff}'" error: invalid unicode character escape +BYTE "b'\\u{ffffff}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{DC00}'" error: invalid unicode character escape +BYTE "b'\\u{DC00}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{DDDD}'" error: invalid unicode character escape +BYTE "b'\\u{DDDD}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{DFFF}'" error: invalid unicode character escape +BYTE "b'\\u{DFFF}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{D800}'" error: invalid unicode character escape +BYTE "b'\\u{D800}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{DAAA}'" error: invalid unicode character escape +BYTE "b'\\u{DAAA}'" error: unicode escape in byte string WHITESPACE "\n" -BYTE "b'\\u{DBFF}'" error: invalid unicode character escape +BYTE "b'\\u{DBFF}'" error: unicode escape in byte string WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs index 9f2f4309e7..b2d06e490b 100644 --- a/crates/parser/test_data/lexer/err/byte_char_literals.rs +++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs @@ -25,9 +25,6 @@ b'\xx' b'\xы' b'\x🦀' b'\xtt' -b'\xff' -b'\xFF' -b'\x80' b'\u' b'\u[0123]' b'\u{0x}' diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast new file mode 100644 index 0000000000..e8d8ff8cef --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rast @@ -0,0 +1,28 @@ +BYTE_STRING "b\"\\💩\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\●\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs new file mode 100644 index 0000000000..e74847137b --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rs @@ -0,0 +1,14 @@ +b"\💩" +b"\●" +b"\u{_0000}" +b"\u{0000000}" +b"\u{FFFFFF}" +b"\u{ffffff}" +b"\u{ffffff}" +b"\u{DC00}" +b"\u{DDDD}" +b"\u{DFFF}" +b"\u{D800}" +b"\u{DAAA}" +b"\u{DBFF}" +b"\xы" diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast new file mode 100644 index 0000000000..1b4424ba5c --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rast @@ -0,0 +1,28 @@ +C_STRING "c\"\\💩\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\●\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs new file mode 100644 index 0000000000..1b78ffc28a --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rs @@ -0,0 +1,14 @@ +c"\💩" +c"\●" +c"\u{_0000}" +c"\u{0000000}" +c"\u{FFFFFF}" +c"\u{ffffff}" +c"\u{ffffff}" +c"\u{DC00}" +c"\u{DDDD}" +c"\u{DFFF}" +c"\u{D800}" +c"\u{DAAA}" +c"\u{DBFF}" +c"\xы" diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast new file mode 100644 index 0000000000..0cd1747208 --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rast @@ -0,0 +1,28 @@ +STRING "\"\\💩\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\●\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +STRING "\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs new file mode 100644 index 0000000000..2499516d3f --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rs @@ -0,0 +1,14 @@ +"\💩" +"\●" +"\u{_0000}" +"\u{0000000}" +"\u{FFFFFF}" +"\u{ffffff}" +"\u{ffffff}" +"\u{DC00}" +"\u{DDDD}" +"\u{DFFF}" +"\u{D800}" +"\u{DAAA}" +"\u{DBFF}" +"\xы"