Merge commit '3b7c7f97e4' into sync-from-ra

2025-09-26 20:09:19 +00:00 · 2023-11-08 08:15:03 +02:00 · 2023-11-08 08:15:03 +02:00 · d1d111d09e
commit d1d111d09e
parent 6eaf3f8bb2
177 changed files with 14930 additions and 2099 deletions
--- a/crates/parser/src/grammar/params.rs
+++ b/crates/parser/src/grammar/params.rs
@ -7,6 +7,9 @@ use super::*;
 // fn b(x: i32) {}
 // fn c(x: i32, ) {}
 // fn d(x: i32, y: ()) {}
+
+// test_err empty_param_slot
+// fn f(y: i32, ,t: i32) {}
 pub(super) fn param_list_fn_def(p: &mut Parser<'_>) {
    list_(p, Flavor::FnDef);
 }
@ -71,7 +74,11 @@ fn list_(p: &mut Parser<'_>, flavor: Flavor) {
        if !p.at_ts(PARAM_FIRST.union(ATTRIBUTE_FIRST)) {
            p.error("expected value parameter");
            m.abandon(p);
-            break;
+            if p.eat(T![,]) {
+                continue;
+            } else {
+                break;
+            }
        }
        param(p, m, flavor);
        if !p.at(T![,]) {
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@ -8,8 +8,12 @@
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.

+use rustc_dependencies::lexer as rustc_lexer;
+
 use std::ops;

+use rustc_lexer::unescape::{EscapeError, Mode};
+
 use crate::{
    SyntaxKind::{self, *},
    T,
@ -253,30 +257,60 @@ impl<'a> Converter<'a> {
            rustc_lexer::LiteralKind::Char { terminated } => {
                if !terminated {
                    err = "Missing trailing `'` symbol to terminate the character literal";
+                } else {
+                    let text = &self.res.text[self.offset + 1..][..len - 1];
+                    let i = text.rfind('\'').unwrap();
+                    let text = &text[..i];
+                    if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+                        err = error_to_diagnostic_message(e, Mode::Char);
+                    }
                }
                CHAR
            }
            rustc_lexer::LiteralKind::Byte { terminated } => {
                if !terminated {
                    err = "Missing trailing `'` symbol to terminate the byte literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('\'').unwrap();
+                    let text = &text[..i];
+                    if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
+                        err = error_to_diagnostic_message(e, Mode::Byte);
+                    }
                }
+
                BYTE
            }
            rustc_lexer::LiteralKind::Str { terminated } => {
                if !terminated {
                    err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 1..][..len - 1];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::Str);
                }
                STRING
            }
            rustc_lexer::LiteralKind::ByteStr { terminated } => {
                if !terminated {
                    err = "Missing trailing `\"` symbol to terminate the byte string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::ByteStr);
                }
                BYTE_STRING
            }
            rustc_lexer::LiteralKind::CStr { terminated } => {
                if !terminated {
                    err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::CStr);
                }
                C_STRING
            }
@ -304,3 +338,64 @@ impl<'a> Converter<'a> {
        self.push(syntax_kind, len, err);
    }
 }
+
+fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
+    match error {
+        EscapeError::ZeroChars => "empty character literal",
+        EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
+        EscapeError::LoneSlash => "",
+        EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
+            "unknown byte escape"
+        }
+        EscapeError::InvalidEscape => "unknown character escape",
+        EscapeError::BareCarriageReturn => "",
+        EscapeError::BareCarriageReturnInRawString => "",
+        EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
+        EscapeError::EscapeOnlyChar => "character constant must be escaped",
+        EscapeError::TooShortHexEscape => "numeric character escape is too short",
+        EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
+        EscapeError::OutOfRangeHexEscape => "out of range hex escape",
+        EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
+        EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
+        EscapeError::EmptyUnicodeEscape => "empty unicode escape",
+        EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
+        EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
+        EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
+        EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
+        EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
+        EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
+        EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
+            "non-ASCII character in byte literal"
+        }
+        EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
+            "non-ASCII character in byte string literal"
+        }
+        EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
+        EscapeError::UnskippedWhitespaceWarning => "",
+        EscapeError::MultipleSkippedLinesWarning => "",
+    }
+}
+
+fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
+    let mut error_message = "";
+    match mode {
+        Mode::CStr => {
+            rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
+        }
+        Mode::ByteStr | Mode::Str => {
+            rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
+        }
+        _ => {
+            // Other Modes are not supported yet or do not apply
+        }
+    }
+    error_message
+}
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@ -19,6 +19,7 @@

 #![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)]
 #![allow(rustdoc::private_intra_doc_links)]
+#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]

 mod lexed_str;
 mod token_set;
--- a/crates/parser/src/shortcuts.rs
+++ b/crates/parser/src/shortcuts.rs
@ -32,29 +32,27 @@ impl LexedStr<'_> {
            let kind = self.kind(i);
            if kind.is_trivia() {
                was_joint = false
+            } else if kind == SyntaxKind::IDENT {
+                let token_text = self.text(i);
+                let contextual_kw =
+                    SyntaxKind::from_contextual_keyword(token_text).unwrap_or(SyntaxKind::IDENT);
+                res.push_ident(contextual_kw);
            } else {
-                if kind == SyntaxKind::IDENT {
-                    let token_text = self.text(i);
-                    let contextual_kw = SyntaxKind::from_contextual_keyword(token_text)
-                        .unwrap_or(SyntaxKind::IDENT);
-                    res.push_ident(contextual_kw);
-                } else {
-                    if was_joint {
+                if was_joint {
+                    res.was_joint();
+                }
+                res.push(kind);
+                // Tag the token as joint if it is float with a fractional part
+                // we use this jointness to inform the parser about what token split
+                // event to emit when we encounter a float literal in a field access
+                if kind == SyntaxKind::FLOAT_NUMBER {
+                    if !self.text(i).ends_with('.') {
                        res.was_joint();
-                    }
-                    res.push(kind);
-                    // Tag the token as joint if it is float with a fractional part
-                    // we use this jointness to inform the parser about what token split
-                    // event to emit when we encounter a float literal in a field access
-                    if kind == SyntaxKind::FLOAT_NUMBER {
-                        if !self.text(i).ends_with('.') {
-                            res.was_joint();
-                        } else {
-                            was_joint = false;
-                        }
                    } else {
-                        was_joint = true;
+                        was_joint = false;
                    }
+                } else {
+                    was_joint = true;
                }
            }
        }